igb[v],ixgbe: don't use flush_scheduled_work()
[linux-2.6-block.git] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <linux/slab.h>
36 #include <net/checksum.h>
37 #include <net/ip6_checksum.h>
38 #include <linux/net_tstamp.h>
39 #include <linux/mii.h>
40 #include <linux/ethtool.h>
41 #include <linux/if_vlan.h>
42 #include <linux/pci.h>
43 #include <linux/pci-aspm.h>
44 #include <linux/delay.h>
45 #include <linux/interrupt.h>
46 #include <linux/if_ether.h>
47 #include <linux/aer.h>
48 #ifdef CONFIG_IGB_DCA
49 #include <linux/dca.h>
50 #endif
51 #include "igb.h"
52
53 #define DRV_VERSION "2.1.0-k2"
54 char igb_driver_name[] = "igb";
55 char igb_driver_version[] = DRV_VERSION;
56 static const char igb_driver_string[] =
57                                 "Intel(R) Gigabit Ethernet Network Driver";
58 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
59
60 static const struct e1000_info *igb_info_tbl[] = {
61         [board_82575] = &e1000_82575_info,
62 };
63
64 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
65         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
66         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
67         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
68         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
69         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
74         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
81         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
82         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
83         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
84         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
85         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
86         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
87         /* required last entry */
88         {0, }
89 };
90
91 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
92
93 void igb_reset(struct igb_adapter *);
94 static int igb_setup_all_tx_resources(struct igb_adapter *);
95 static int igb_setup_all_rx_resources(struct igb_adapter *);
96 static void igb_free_all_tx_resources(struct igb_adapter *);
97 static void igb_free_all_rx_resources(struct igb_adapter *);
98 static void igb_setup_mrqc(struct igb_adapter *);
99 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
100 static void __devexit igb_remove(struct pci_dev *pdev);
101 static int igb_sw_init(struct igb_adapter *);
102 static int igb_open(struct net_device *);
103 static int igb_close(struct net_device *);
104 static void igb_configure_tx(struct igb_adapter *);
105 static void igb_configure_rx(struct igb_adapter *);
106 static void igb_clean_all_tx_rings(struct igb_adapter *);
107 static void igb_clean_all_rx_rings(struct igb_adapter *);
108 static void igb_clean_tx_ring(struct igb_ring *);
109 static void igb_clean_rx_ring(struct igb_ring *);
110 static void igb_set_rx_mode(struct net_device *);
111 static void igb_update_phy_info(unsigned long);
112 static void igb_watchdog(unsigned long);
113 static void igb_watchdog_task(struct work_struct *);
114 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
115 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
116                                                  struct rtnl_link_stats64 *stats);
117 static int igb_change_mtu(struct net_device *, int);
118 static int igb_set_mac(struct net_device *, void *);
119 static void igb_set_uta(struct igb_adapter *adapter);
120 static irqreturn_t igb_intr(int irq, void *);
121 static irqreturn_t igb_intr_msi(int irq, void *);
122 static irqreturn_t igb_msix_other(int irq, void *);
123 static irqreturn_t igb_msix_ring(int irq, void *);
124 #ifdef CONFIG_IGB_DCA
125 static void igb_update_dca(struct igb_q_vector *);
126 static void igb_setup_dca(struct igb_adapter *);
127 #endif /* CONFIG_IGB_DCA */
128 static bool igb_clean_tx_irq(struct igb_q_vector *);
129 static int igb_poll(struct napi_struct *, int);
130 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
131 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
132 static void igb_tx_timeout(struct net_device *);
133 static void igb_reset_task(struct work_struct *);
134 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
135 static void igb_vlan_rx_add_vid(struct net_device *, u16);
136 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
137 static void igb_restore_vlan(struct igb_adapter *);
138 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
139 static void igb_ping_all_vfs(struct igb_adapter *);
140 static void igb_msg_task(struct igb_adapter *);
141 static void igb_vmm_control(struct igb_adapter *);
142 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
143 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
144 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
145 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
146                                int vf, u16 vlan, u8 qos);
147 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
148 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
149                                  struct ifla_vf_info *ivi);
150
151 #ifdef CONFIG_PM
152 static int igb_suspend(struct pci_dev *, pm_message_t);
153 static int igb_resume(struct pci_dev *);
154 #endif
155 static void igb_shutdown(struct pci_dev *);
156 #ifdef CONFIG_IGB_DCA
157 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
158 static struct notifier_block dca_notifier = {
159         .notifier_call  = igb_notify_dca,
160         .next           = NULL,
161         .priority       = 0
162 };
163 #endif
164 #ifdef CONFIG_NET_POLL_CONTROLLER
165 /* for netdump / net console */
166 static void igb_netpoll(struct net_device *);
167 #endif
168 #ifdef CONFIG_PCI_IOV
169 static unsigned int max_vfs = 0;
170 module_param(max_vfs, uint, 0);
171 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
172                  "per physical function");
173 #endif /* CONFIG_PCI_IOV */
174
175 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
176                      pci_channel_state_t);
177 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
178 static void igb_io_resume(struct pci_dev *);
179
180 static struct pci_error_handlers igb_err_handler = {
181         .error_detected = igb_io_error_detected,
182         .slot_reset = igb_io_slot_reset,
183         .resume = igb_io_resume,
184 };
185
186
187 static struct pci_driver igb_driver = {
188         .name     = igb_driver_name,
189         .id_table = igb_pci_tbl,
190         .probe    = igb_probe,
191         .remove   = __devexit_p(igb_remove),
192 #ifdef CONFIG_PM
193         /* Power Managment Hooks */
194         .suspend  = igb_suspend,
195         .resume   = igb_resume,
196 #endif
197         .shutdown = igb_shutdown,
198         .err_handler = &igb_err_handler
199 };
200
201 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
202 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
203 MODULE_LICENSE("GPL");
204 MODULE_VERSION(DRV_VERSION);
205
206 struct igb_reg_info {
207         u32 ofs;
208         char *name;
209 };
210
211 static const struct igb_reg_info igb_reg_info_tbl[] = {
212
213         /* General Registers */
214         {E1000_CTRL, "CTRL"},
215         {E1000_STATUS, "STATUS"},
216         {E1000_CTRL_EXT, "CTRL_EXT"},
217
218         /* Interrupt Registers */
219         {E1000_ICR, "ICR"},
220
221         /* RX Registers */
222         {E1000_RCTL, "RCTL"},
223         {E1000_RDLEN(0), "RDLEN"},
224         {E1000_RDH(0), "RDH"},
225         {E1000_RDT(0), "RDT"},
226         {E1000_RXDCTL(0), "RXDCTL"},
227         {E1000_RDBAL(0), "RDBAL"},
228         {E1000_RDBAH(0), "RDBAH"},
229
230         /* TX Registers */
231         {E1000_TCTL, "TCTL"},
232         {E1000_TDBAL(0), "TDBAL"},
233         {E1000_TDBAH(0), "TDBAH"},
234         {E1000_TDLEN(0), "TDLEN"},
235         {E1000_TDH(0), "TDH"},
236         {E1000_TDT(0), "TDT"},
237         {E1000_TXDCTL(0), "TXDCTL"},
238         {E1000_TDFH, "TDFH"},
239         {E1000_TDFT, "TDFT"},
240         {E1000_TDFHS, "TDFHS"},
241         {E1000_TDFPC, "TDFPC"},
242
243         /* List Terminator */
244         {}
245 };
246
247 /*
248  * igb_regdump - register printout routine
249  */
250 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
251 {
252         int n = 0;
253         char rname[16];
254         u32 regs[8];
255
256         switch (reginfo->ofs) {
257         case E1000_RDLEN(0):
258                 for (n = 0; n < 4; n++)
259                         regs[n] = rd32(E1000_RDLEN(n));
260                 break;
261         case E1000_RDH(0):
262                 for (n = 0; n < 4; n++)
263                         regs[n] = rd32(E1000_RDH(n));
264                 break;
265         case E1000_RDT(0):
266                 for (n = 0; n < 4; n++)
267                         regs[n] = rd32(E1000_RDT(n));
268                 break;
269         case E1000_RXDCTL(0):
270                 for (n = 0; n < 4; n++)
271                         regs[n] = rd32(E1000_RXDCTL(n));
272                 break;
273         case E1000_RDBAL(0):
274                 for (n = 0; n < 4; n++)
275                         regs[n] = rd32(E1000_RDBAL(n));
276                 break;
277         case E1000_RDBAH(0):
278                 for (n = 0; n < 4; n++)
279                         regs[n] = rd32(E1000_RDBAH(n));
280                 break;
281         case E1000_TDBAL(0):
282                 for (n = 0; n < 4; n++)
283                         regs[n] = rd32(E1000_RDBAL(n));
284                 break;
285         case E1000_TDBAH(0):
286                 for (n = 0; n < 4; n++)
287                         regs[n] = rd32(E1000_TDBAH(n));
288                 break;
289         case E1000_TDLEN(0):
290                 for (n = 0; n < 4; n++)
291                         regs[n] = rd32(E1000_TDLEN(n));
292                 break;
293         case E1000_TDH(0):
294                 for (n = 0; n < 4; n++)
295                         regs[n] = rd32(E1000_TDH(n));
296                 break;
297         case E1000_TDT(0):
298                 for (n = 0; n < 4; n++)
299                         regs[n] = rd32(E1000_TDT(n));
300                 break;
301         case E1000_TXDCTL(0):
302                 for (n = 0; n < 4; n++)
303                         regs[n] = rd32(E1000_TXDCTL(n));
304                 break;
305         default:
306                 printk(KERN_INFO "%-15s %08x\n",
307                         reginfo->name, rd32(reginfo->ofs));
308                 return;
309         }
310
311         snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
312         printk(KERN_INFO "%-15s ", rname);
313         for (n = 0; n < 4; n++)
314                 printk(KERN_CONT "%08x ", regs[n]);
315         printk(KERN_CONT "\n");
316 }
317
318 /*
319  * igb_dump - Print registers, tx-rings and rx-rings
320  */
321 static void igb_dump(struct igb_adapter *adapter)
322 {
323         struct net_device *netdev = adapter->netdev;
324         struct e1000_hw *hw = &adapter->hw;
325         struct igb_reg_info *reginfo;
326         int n = 0;
327         struct igb_ring *tx_ring;
328         union e1000_adv_tx_desc *tx_desc;
329         struct my_u0 { u64 a; u64 b; } *u0;
330         struct igb_buffer *buffer_info;
331         struct igb_ring *rx_ring;
332         union e1000_adv_rx_desc *rx_desc;
333         u32 staterr;
334         int i = 0;
335
336         if (!netif_msg_hw(adapter))
337                 return;
338
339         /* Print netdevice Info */
340         if (netdev) {
341                 dev_info(&adapter->pdev->dev, "Net device Info\n");
342                 printk(KERN_INFO "Device Name     state            "
343                         "trans_start      last_rx\n");
344                 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
345                 netdev->name,
346                 netdev->state,
347                 netdev->trans_start,
348                 netdev->last_rx);
349         }
350
351         /* Print Registers */
352         dev_info(&adapter->pdev->dev, "Register Dump\n");
353         printk(KERN_INFO " Register Name   Value\n");
354         for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
355              reginfo->name; reginfo++) {
356                 igb_regdump(hw, reginfo);
357         }
358
359         /* Print TX Ring Summary */
360         if (!netdev || !netif_running(netdev))
361                 goto exit;
362
363         dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
364         printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma  ]"
365                 " leng ntw timestamp\n");
366         for (n = 0; n < adapter->num_tx_queues; n++) {
367                 tx_ring = adapter->tx_ring[n];
368                 buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
369                 printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n",
370                            n, tx_ring->next_to_use, tx_ring->next_to_clean,
371                            (u64)buffer_info->dma,
372                            buffer_info->length,
373                            buffer_info->next_to_watch,
374                            (u64)buffer_info->time_stamp);
375         }
376
377         /* Print TX Rings */
378         if (!netif_msg_tx_done(adapter))
379                 goto rx_ring_summary;
380
381         dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
382
383         /* Transmit Descriptor Formats
384          *
385          * Advanced Transmit Descriptor
386          *   +--------------------------------------------------------------+
387          * 0 |         Buffer Address [63:0]                                |
388          *   +--------------------------------------------------------------+
389          * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
390          *   +--------------------------------------------------------------+
391          *   63      46 45    40 39 38 36 35 32 31   24             15       0
392          */
393
394         for (n = 0; n < adapter->num_tx_queues; n++) {
395                 tx_ring = adapter->tx_ring[n];
396                 printk(KERN_INFO "------------------------------------\n");
397                 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
398                 printk(KERN_INFO "------------------------------------\n");
399                 printk(KERN_INFO "T [desc]     [address 63:0  ] "
400                         "[PlPOCIStDDM Ln] [bi->dma       ] "
401                         "leng  ntw timestamp        bi->skb\n");
402
403                 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
404                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
405                         buffer_info = &tx_ring->buffer_info[i];
406                         u0 = (struct my_u0 *)tx_desc;
407                         printk(KERN_INFO "T [0x%03X]    %016llX %016llX %016llX"
408                                 " %04X  %3X %016llX %p", i,
409                                 le64_to_cpu(u0->a),
410                                 le64_to_cpu(u0->b),
411                                 (u64)buffer_info->dma,
412                                 buffer_info->length,
413                                 buffer_info->next_to_watch,
414                                 (u64)buffer_info->time_stamp,
415                                 buffer_info->skb);
416                         if (i == tx_ring->next_to_use &&
417                                 i == tx_ring->next_to_clean)
418                                 printk(KERN_CONT " NTC/U\n");
419                         else if (i == tx_ring->next_to_use)
420                                 printk(KERN_CONT " NTU\n");
421                         else if (i == tx_ring->next_to_clean)
422                                 printk(KERN_CONT " NTC\n");
423                         else
424                                 printk(KERN_CONT "\n");
425
426                         if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
427                                 print_hex_dump(KERN_INFO, "",
428                                         DUMP_PREFIX_ADDRESS,
429                                         16, 1, phys_to_virt(buffer_info->dma),
430                                         buffer_info->length, true);
431                 }
432         }
433
434         /* Print RX Rings Summary */
435 rx_ring_summary:
436         dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
437         printk(KERN_INFO "Queue [NTU] [NTC]\n");
438         for (n = 0; n < adapter->num_rx_queues; n++) {
439                 rx_ring = adapter->rx_ring[n];
440                 printk(KERN_INFO " %5d %5X %5X\n", n,
441                            rx_ring->next_to_use, rx_ring->next_to_clean);
442         }
443
444         /* Print RX Rings */
445         if (!netif_msg_rx_status(adapter))
446                 goto exit;
447
448         dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
449
450         /* Advanced Receive Descriptor (Read) Format
451          *    63                                           1        0
452          *    +-----------------------------------------------------+
453          *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
454          *    +----------------------------------------------+------+
455          *  8 |       Header Buffer Address [63:1]           |  DD  |
456          *    +-----------------------------------------------------+
457          *
458          *
459          * Advanced Receive Descriptor (Write-Back) Format
460          *
461          *   63       48 47    32 31  30      21 20 17 16   4 3     0
462          *   +------------------------------------------------------+
463          * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
464          *   | Checksum   Ident  |   |           |    | Type | Type |
465          *   +------------------------------------------------------+
466          * 8 | VLAN Tag | Length | Extended Error | Extended Status |
467          *   +------------------------------------------------------+
468          *   63       48 47    32 31            20 19               0
469          */
470
471         for (n = 0; n < adapter->num_rx_queues; n++) {
472                 rx_ring = adapter->rx_ring[n];
473                 printk(KERN_INFO "------------------------------------\n");
474                 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
475                 printk(KERN_INFO "------------------------------------\n");
476                 printk(KERN_INFO "R  [desc]      [ PktBuf     A0] "
477                         "[  HeadBuf   DD] [bi->dma       ] [bi->skb] "
478                         "<-- Adv Rx Read format\n");
479                 printk(KERN_INFO "RWB[desc]      [PcsmIpSHl PtRs] "
480                         "[vl er S cks ln] ---------------- [bi->skb] "
481                         "<-- Adv Rx Write-Back format\n");
482
483                 for (i = 0; i < rx_ring->count; i++) {
484                         buffer_info = &rx_ring->buffer_info[i];
485                         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
486                         u0 = (struct my_u0 *)rx_desc;
487                         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
488                         if (staterr & E1000_RXD_STAT_DD) {
489                                 /* Descriptor Done */
490                                 printk(KERN_INFO "RWB[0x%03X]     %016llX "
491                                         "%016llX ---------------- %p", i,
492                                         le64_to_cpu(u0->a),
493                                         le64_to_cpu(u0->b),
494                                         buffer_info->skb);
495                         } else {
496                                 printk(KERN_INFO "R  [0x%03X]     %016llX "
497                                         "%016llX %016llX %p", i,
498                                         le64_to_cpu(u0->a),
499                                         le64_to_cpu(u0->b),
500                                         (u64)buffer_info->dma,
501                                         buffer_info->skb);
502
503                                 if (netif_msg_pktdata(adapter)) {
504                                         print_hex_dump(KERN_INFO, "",
505                                                 DUMP_PREFIX_ADDRESS,
506                                                 16, 1,
507                                                 phys_to_virt(buffer_info->dma),
508                                                 rx_ring->rx_buffer_len, true);
509                                         if (rx_ring->rx_buffer_len
510                                                 < IGB_RXBUFFER_1024)
511                                                 print_hex_dump(KERN_INFO, "",
512                                                   DUMP_PREFIX_ADDRESS,
513                                                   16, 1,
514                                                   phys_to_virt(
515                                                     buffer_info->page_dma +
516                                                     buffer_info->page_offset),
517                                                   PAGE_SIZE/2, true);
518                                 }
519                         }
520
521                         if (i == rx_ring->next_to_use)
522                                 printk(KERN_CONT " NTU\n");
523                         else if (i == rx_ring->next_to_clean)
524                                 printk(KERN_CONT " NTC\n");
525                         else
526                                 printk(KERN_CONT "\n");
527
528                 }
529         }
530
531 exit:
532         return;
533 }
534
535
536 /**
537  * igb_read_clock - read raw cycle counter (to be used by time counter)
538  */
539 static cycle_t igb_read_clock(const struct cyclecounter *tc)
540 {
541         struct igb_adapter *adapter =
542                 container_of(tc, struct igb_adapter, cycles);
543         struct e1000_hw *hw = &adapter->hw;
544         u64 stamp = 0;
545         int shift = 0;
546
547         /*
548          * The timestamp latches on lowest register read. For the 82580
549          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
550          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
551          */
552         if (hw->mac.type == e1000_82580) {
553                 stamp = rd32(E1000_SYSTIMR) >> 8;
554                 shift = IGB_82580_TSYNC_SHIFT;
555         }
556
557         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
558         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
559         return stamp;
560 }
561
562 /**
563  * igb_get_hw_dev - return device
564  * used by hardware layer to print debugging information
565  **/
566 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
567 {
568         struct igb_adapter *adapter = hw->back;
569         return adapter->netdev;
570 }
571
572 /**
573  * igb_init_module - Driver Registration Routine
574  *
575  * igb_init_module is the first routine called when the driver is
576  * loaded. All it does is register with the PCI subsystem.
577  **/
578 static int __init igb_init_module(void)
579 {
580         int ret;
581         printk(KERN_INFO "%s - version %s\n",
582                igb_driver_string, igb_driver_version);
583
584         printk(KERN_INFO "%s\n", igb_copyright);
585
586 #ifdef CONFIG_IGB_DCA
587         dca_register_notify(&dca_notifier);
588 #endif
589         ret = pci_register_driver(&igb_driver);
590         return ret;
591 }
592
593 module_init(igb_init_module);
594
595 /**
596  * igb_exit_module - Driver Exit Cleanup Routine
597  *
598  * igb_exit_module is called just before the driver is removed
599  * from memory.
600  **/
601 static void __exit igb_exit_module(void)
602 {
603 #ifdef CONFIG_IGB_DCA
604         dca_unregister_notify(&dca_notifier);
605 #endif
606         pci_unregister_driver(&igb_driver);
607 }
608
609 module_exit(igb_exit_module);
610
611 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
612 /**
613  * igb_cache_ring_register - Descriptor ring to register mapping
614  * @adapter: board private structure to initialize
615  *
616  * Once we know the feature-set enabled for the device, we'll cache
617  * the register offset the descriptor ring is assigned to.
618  **/
619 static void igb_cache_ring_register(struct igb_adapter *adapter)
620 {
621         int i = 0, j = 0;
622         u32 rbase_offset = adapter->vfs_allocated_count;
623
624         switch (adapter->hw.mac.type) {
625         case e1000_82576:
626                 /* The queues are allocated for virtualization such that VF 0
627                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
628                  * In order to avoid collision we start at the first free queue
629                  * and continue consuming queues in the same sequence
630                  */
631                 if (adapter->vfs_allocated_count) {
632                         for (; i < adapter->rss_queues; i++)
633                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
634                                                                Q_IDX_82576(i);
635                 }
636         case e1000_82575:
637         case e1000_82580:
638         case e1000_i350:
639         default:
640                 for (; i < adapter->num_rx_queues; i++)
641                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
642                 for (; j < adapter->num_tx_queues; j++)
643                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
644                 break;
645         }
646 }
647
648 static void igb_free_queues(struct igb_adapter *adapter)
649 {
650         int i;
651
652         for (i = 0; i < adapter->num_tx_queues; i++) {
653                 kfree(adapter->tx_ring[i]);
654                 adapter->tx_ring[i] = NULL;
655         }
656         for (i = 0; i < adapter->num_rx_queues; i++) {
657                 kfree(adapter->rx_ring[i]);
658                 adapter->rx_ring[i] = NULL;
659         }
660         adapter->num_rx_queues = 0;
661         adapter->num_tx_queues = 0;
662 }
663
664 /**
665  * igb_alloc_queues - Allocate memory for all rings
666  * @adapter: board private structure to initialize
667  *
668  * We allocate one ring per queue at run-time since we don't know the
669  * number of queues at compile-time.
670  **/
671 static int igb_alloc_queues(struct igb_adapter *adapter)
672 {
673         struct igb_ring *ring;
674         int i;
675
676         for (i = 0; i < adapter->num_tx_queues; i++) {
677                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
678                 if (!ring)
679                         goto err;
680                 ring->count = adapter->tx_ring_count;
681                 ring->queue_index = i;
682                 ring->dev = &adapter->pdev->dev;
683                 ring->netdev = adapter->netdev;
684                 /* For 82575, context index must be unique per ring. */
685                 if (adapter->hw.mac.type == e1000_82575)
686                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
687                 adapter->tx_ring[i] = ring;
688         }
689
690         for (i = 0; i < adapter->num_rx_queues; i++) {
691                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
692                 if (!ring)
693                         goto err;
694                 ring->count = adapter->rx_ring_count;
695                 ring->queue_index = i;
696                 ring->dev = &adapter->pdev->dev;
697                 ring->netdev = adapter->netdev;
698                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
699                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
700                 /* set flag indicating ring supports SCTP checksum offload */
701                 if (adapter->hw.mac.type >= e1000_82576)
702                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
703                 adapter->rx_ring[i] = ring;
704         }
705
706         igb_cache_ring_register(adapter);
707
708         return 0;
709
710 err:
711         igb_free_queues(adapter);
712
713         return -ENOMEM;
714 }
715
716 #define IGB_N0_QUEUE -1
717 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
718 {
719         u32 msixbm = 0;
720         struct igb_adapter *adapter = q_vector->adapter;
721         struct e1000_hw *hw = &adapter->hw;
722         u32 ivar, index;
723         int rx_queue = IGB_N0_QUEUE;
724         int tx_queue = IGB_N0_QUEUE;
725
726         if (q_vector->rx_ring)
727                 rx_queue = q_vector->rx_ring->reg_idx;
728         if (q_vector->tx_ring)
729                 tx_queue = q_vector->tx_ring->reg_idx;
730
731         switch (hw->mac.type) {
732         case e1000_82575:
733                 /* The 82575 assigns vectors using a bitmask, which matches the
734                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
735                    or more queues to a vector, we write the appropriate bits
736                    into the MSIXBM register for that vector. */
737                 if (rx_queue > IGB_N0_QUEUE)
738                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
739                 if (tx_queue > IGB_N0_QUEUE)
740                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
741                 if (!adapter->msix_entries && msix_vector == 0)
742                         msixbm |= E1000_EIMS_OTHER;
743                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
744                 q_vector->eims_value = msixbm;
745                 break;
746         case e1000_82576:
747                 /* 82576 uses a table-based method for assigning vectors.
748                    Each queue has a single entry in the table to which we write
749                    a vector number along with a "valid" bit.  Sadly, the layout
750                    of the table is somewhat counterintuitive. */
751                 if (rx_queue > IGB_N0_QUEUE) {
752                         index = (rx_queue & 0x7);
753                         ivar = array_rd32(E1000_IVAR0, index);
754                         if (rx_queue < 8) {
755                                 /* vector goes into low byte of register */
756                                 ivar = ivar & 0xFFFFFF00;
757                                 ivar |= msix_vector | E1000_IVAR_VALID;
758                         } else {
759                                 /* vector goes into third byte of register */
760                                 ivar = ivar & 0xFF00FFFF;
761                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
762                         }
763                         array_wr32(E1000_IVAR0, index, ivar);
764                 }
765                 if (tx_queue > IGB_N0_QUEUE) {
766                         index = (tx_queue & 0x7);
767                         ivar = array_rd32(E1000_IVAR0, index);
768                         if (tx_queue < 8) {
769                                 /* vector goes into second byte of register */
770                                 ivar = ivar & 0xFFFF00FF;
771                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
772                         } else {
773                                 /* vector goes into high byte of register */
774                                 ivar = ivar & 0x00FFFFFF;
775                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
776                         }
777                         array_wr32(E1000_IVAR0, index, ivar);
778                 }
779                 q_vector->eims_value = 1 << msix_vector;
780                 break;
781         case e1000_82580:
782         case e1000_i350:
783                 /* 82580 uses the same table-based approach as 82576 but has fewer
784                    entries as a result we carry over for queues greater than 4. */
785                 if (rx_queue > IGB_N0_QUEUE) {
786                         index = (rx_queue >> 1);
787                         ivar = array_rd32(E1000_IVAR0, index);
788                         if (rx_queue & 0x1) {
789                                 /* vector goes into third byte of register */
790                                 ivar = ivar & 0xFF00FFFF;
791                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
792                         } else {
793                                 /* vector goes into low byte of register */
794                                 ivar = ivar & 0xFFFFFF00;
795                                 ivar |= msix_vector | E1000_IVAR_VALID;
796                         }
797                         array_wr32(E1000_IVAR0, index, ivar);
798                 }
799                 if (tx_queue > IGB_N0_QUEUE) {
800                         index = (tx_queue >> 1);
801                         ivar = array_rd32(E1000_IVAR0, index);
802                         if (tx_queue & 0x1) {
803                                 /* vector goes into high byte of register */
804                                 ivar = ivar & 0x00FFFFFF;
805                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
806                         } else {
807                                 /* vector goes into second byte of register */
808                                 ivar = ivar & 0xFFFF00FF;
809                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
810                         }
811                         array_wr32(E1000_IVAR0, index, ivar);
812                 }
813                 q_vector->eims_value = 1 << msix_vector;
814                 break;
815         default:
816                 BUG();
817                 break;
818         }
819
820         /* add q_vector eims value to global eims_enable_mask */
821         adapter->eims_enable_mask |= q_vector->eims_value;
822
823         /* configure q_vector to set itr on first interrupt */
824         q_vector->set_itr = 1;
825 }
826
827 /**
828  * igb_configure_msix - Configure MSI-X hardware
829  *
830  * igb_configure_msix sets up the hardware to properly
831  * generate MSI-X interrupts.
832  **/
833 static void igb_configure_msix(struct igb_adapter *adapter)
834 {
835         u32 tmp;
836         int i, vector = 0;
837         struct e1000_hw *hw = &adapter->hw;
838
839         adapter->eims_enable_mask = 0;
840
841         /* set vector for other causes, i.e. link changes */
842         switch (hw->mac.type) {
843         case e1000_82575:
844                 tmp = rd32(E1000_CTRL_EXT);
845                 /* enable MSI-X PBA support*/
846                 tmp |= E1000_CTRL_EXT_PBA_CLR;
847
848                 /* Auto-Mask interrupts upon ICR read. */
849                 tmp |= E1000_CTRL_EXT_EIAME;
850                 tmp |= E1000_CTRL_EXT_IRCA;
851
852                 wr32(E1000_CTRL_EXT, tmp);
853
854                 /* enable msix_other interrupt */
855                 array_wr32(E1000_MSIXBM(0), vector++,
856                                       E1000_EIMS_OTHER);
857                 adapter->eims_other = E1000_EIMS_OTHER;
858
859                 break;
860
861         case e1000_82576:
862         case e1000_82580:
863         case e1000_i350:
864                 /* Turn on MSI-X capability first, or our settings
865                  * won't stick.  And it will take days to debug. */
866                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
867                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
868                                 E1000_GPIE_NSICR);
869
870                 /* enable msix_other interrupt */
871                 adapter->eims_other = 1 << vector;
872                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
873
874                 wr32(E1000_IVAR_MISC, tmp);
875                 break;
876         default:
877                 /* do nothing, since nothing else supports MSI-X */
878                 break;
879         } /* switch (hw->mac.type) */
880
881         adapter->eims_enable_mask |= adapter->eims_other;
882
883         for (i = 0; i < adapter->num_q_vectors; i++)
884                 igb_assign_vector(adapter->q_vector[i], vector++);
885
886         wrfl();
887 }
888
889 /**
890  * igb_request_msix - Initialize MSI-X interrupts
891  *
892  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
893  * kernel.
894  **/
895 static int igb_request_msix(struct igb_adapter *adapter)
896 {
897         struct net_device *netdev = adapter->netdev;
898         struct e1000_hw *hw = &adapter->hw;
899         int i, err = 0, vector = 0;
900
901         err = request_irq(adapter->msix_entries[vector].vector,
902                           igb_msix_other, 0, netdev->name, adapter);
903         if (err)
904                 goto out;
905         vector++;
906
907         for (i = 0; i < adapter->num_q_vectors; i++) {
908                 struct igb_q_vector *q_vector = adapter->q_vector[i];
909
910                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
911
912                 if (q_vector->rx_ring && q_vector->tx_ring)
913                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
914                                 q_vector->rx_ring->queue_index);
915                 else if (q_vector->tx_ring)
916                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
917                                 q_vector->tx_ring->queue_index);
918                 else if (q_vector->rx_ring)
919                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
920                                 q_vector->rx_ring->queue_index);
921                 else
922                         sprintf(q_vector->name, "%s-unused", netdev->name);
923
924                 err = request_irq(adapter->msix_entries[vector].vector,
925                                   igb_msix_ring, 0, q_vector->name,
926                                   q_vector);
927                 if (err)
928                         goto out;
929                 vector++;
930         }
931
932         igb_configure_msix(adapter);
933         return 0;
934 out:
935         return err;
936 }
937
938 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
939 {
940         if (adapter->msix_entries) {
941                 pci_disable_msix(adapter->pdev);
942                 kfree(adapter->msix_entries);
943                 adapter->msix_entries = NULL;
944         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
945                 pci_disable_msi(adapter->pdev);
946         }
947 }
948
949 /**
950  * igb_free_q_vectors - Free memory allocated for interrupt vectors
951  * @adapter: board private structure to initialize
952  *
953  * This function frees the memory allocated to the q_vectors.  In addition if
954  * NAPI is enabled it will delete any references to the NAPI struct prior
955  * to freeing the q_vector.
956  **/
957 static void igb_free_q_vectors(struct igb_adapter *adapter)
958 {
959         int v_idx;
960
961         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
962                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
963                 adapter->q_vector[v_idx] = NULL;
964                 if (!q_vector)
965                         continue;
966                 netif_napi_del(&q_vector->napi);
967                 kfree(q_vector);
968         }
969         adapter->num_q_vectors = 0;
970 }
971
972 /**
973  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
974  *
975  * This function resets the device so that it has 0 rx queues, tx queues, and
976  * MSI-X interrupts allocated.
977  */
978 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
979 {
980         igb_free_queues(adapter);
981         igb_free_q_vectors(adapter);
982         igb_reset_interrupt_capability(adapter);
983 }
984
985 /**
986  * igb_set_interrupt_capability - set MSI or MSI-X if supported
987  *
988  * Attempt to configure interrupts using the best available
989  * capabilities of the hardware and kernel.
990  **/
991 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
992 {
993         int err;
994         int numvecs, i;
995
996         /* Number of supported queues. */
997         adapter->num_rx_queues = adapter->rss_queues;
998         if (adapter->vfs_allocated_count)
999                 adapter->num_tx_queues = 1;
1000         else
1001                 adapter->num_tx_queues = adapter->rss_queues;
1002
1003         /* start with one vector for every rx queue */
1004         numvecs = adapter->num_rx_queues;
1005
1006         /* if tx handler is separate add 1 for every tx queue */
1007         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1008                 numvecs += adapter->num_tx_queues;
1009
1010         /* store the number of vectors reserved for queues */
1011         adapter->num_q_vectors = numvecs;
1012
1013         /* add 1 vector for link status interrupts */
1014         numvecs++;
1015         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1016                                         GFP_KERNEL);
1017         if (!adapter->msix_entries)
1018                 goto msi_only;
1019
1020         for (i = 0; i < numvecs; i++)
1021                 adapter->msix_entries[i].entry = i;
1022
1023         err = pci_enable_msix(adapter->pdev,
1024                               adapter->msix_entries,
1025                               numvecs);
1026         if (err == 0)
1027                 goto out;
1028
1029         igb_reset_interrupt_capability(adapter);
1030
1031         /* If we can't do MSI-X, try MSI */
1032 msi_only:
1033 #ifdef CONFIG_PCI_IOV
1034         /* disable SR-IOV for non MSI-X configurations */
1035         if (adapter->vf_data) {
1036                 struct e1000_hw *hw = &adapter->hw;
1037                 /* disable iov and allow time for transactions to clear */
1038                 pci_disable_sriov(adapter->pdev);
1039                 msleep(500);
1040
1041                 kfree(adapter->vf_data);
1042                 adapter->vf_data = NULL;
1043                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1044                 msleep(100);
1045                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1046         }
1047 #endif
1048         adapter->vfs_allocated_count = 0;
1049         adapter->rss_queues = 1;
1050         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1051         adapter->num_rx_queues = 1;
1052         adapter->num_tx_queues = 1;
1053         adapter->num_q_vectors = 1;
1054         if (!pci_enable_msi(adapter->pdev))
1055                 adapter->flags |= IGB_FLAG_HAS_MSI;
1056 out:
1057         /* Notify the stack of the (possibly) reduced queue counts. */
1058         netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1059         return netif_set_real_num_rx_queues(adapter->netdev,
1060                                             adapter->num_rx_queues);
1061 }
1062
1063 /**
1064  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1065  * @adapter: board private structure to initialize
1066  *
1067  * We allocate one q_vector per queue interrupt.  If allocation fails we
1068  * return -ENOMEM.
1069  **/
1070 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1071 {
1072         struct igb_q_vector *q_vector;
1073         struct e1000_hw *hw = &adapter->hw;
1074         int v_idx;
1075
1076         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1077                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1078                 if (!q_vector)
1079                         goto err_out;
1080                 q_vector->adapter = adapter;
1081                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1082                 q_vector->itr_val = IGB_START_ITR;
1083                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1084                 adapter->q_vector[v_idx] = q_vector;
1085         }
1086         return 0;
1087
1088 err_out:
1089         igb_free_q_vectors(adapter);
1090         return -ENOMEM;
1091 }
1092
1093 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1094                                       int ring_idx, int v_idx)
1095 {
1096         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1097
1098         q_vector->rx_ring = adapter->rx_ring[ring_idx];
1099         q_vector->rx_ring->q_vector = q_vector;
1100         q_vector->itr_val = adapter->rx_itr_setting;
1101         if (q_vector->itr_val && q_vector->itr_val <= 3)
1102                 q_vector->itr_val = IGB_START_ITR;
1103 }
1104
1105 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1106                                       int ring_idx, int v_idx)
1107 {
1108         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1109
1110         q_vector->tx_ring = adapter->tx_ring[ring_idx];
1111         q_vector->tx_ring->q_vector = q_vector;
1112         q_vector->itr_val = adapter->tx_itr_setting;
1113         if (q_vector->itr_val && q_vector->itr_val <= 3)
1114                 q_vector->itr_val = IGB_START_ITR;
1115 }
1116
1117 /**
1118  * igb_map_ring_to_vector - maps allocated queues to vectors
1119  *
1120  * This function maps the recently allocated queues to vectors.
1121  **/
1122 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1123 {
1124         int i;
1125         int v_idx = 0;
1126
1127         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1128             (adapter->num_q_vectors < adapter->num_tx_queues))
1129                 return -ENOMEM;
1130
1131         if (adapter->num_q_vectors >=
1132             (adapter->num_rx_queues + adapter->num_tx_queues)) {
1133                 for (i = 0; i < adapter->num_rx_queues; i++)
1134                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1135                 for (i = 0; i < adapter->num_tx_queues; i++)
1136                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1137         } else {
1138                 for (i = 0; i < adapter->num_rx_queues; i++) {
1139                         if (i < adapter->num_tx_queues)
1140                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1141                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1142                 }
1143                 for (; i < adapter->num_tx_queues; i++)
1144                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1145         }
1146         return 0;
1147 }
1148
1149 /**
1150  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1151  *
1152  * This function initializes the interrupts and allocates all of the queues.
1153  **/
1154 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1155 {
1156         struct pci_dev *pdev = adapter->pdev;
1157         int err;
1158
1159         err = igb_set_interrupt_capability(adapter);
1160         if (err)
1161                 return err;
1162
1163         err = igb_alloc_q_vectors(adapter);
1164         if (err) {
1165                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1166                 goto err_alloc_q_vectors;
1167         }
1168
1169         err = igb_alloc_queues(adapter);
1170         if (err) {
1171                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1172                 goto err_alloc_queues;
1173         }
1174
1175         err = igb_map_ring_to_vector(adapter);
1176         if (err) {
1177                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1178                 goto err_map_queues;
1179         }
1180
1181
1182         return 0;
1183 err_map_queues:
1184         igb_free_queues(adapter);
1185 err_alloc_queues:
1186         igb_free_q_vectors(adapter);
1187 err_alloc_q_vectors:
1188         igb_reset_interrupt_capability(adapter);
1189         return err;
1190 }
1191
1192 /**
1193  * igb_request_irq - initialize interrupts
1194  *
1195  * Attempts to configure interrupts using the best available
1196  * capabilities of the hardware and kernel.
1197  **/
1198 static int igb_request_irq(struct igb_adapter *adapter)
1199 {
1200         struct net_device *netdev = adapter->netdev;
1201         struct pci_dev *pdev = adapter->pdev;
1202         int err = 0;
1203
1204         if (adapter->msix_entries) {
1205                 err = igb_request_msix(adapter);
1206                 if (!err)
1207                         goto request_done;
1208                 /* fall back to MSI */
1209                 igb_clear_interrupt_scheme(adapter);
1210                 if (!pci_enable_msi(adapter->pdev))
1211                         adapter->flags |= IGB_FLAG_HAS_MSI;
1212                 igb_free_all_tx_resources(adapter);
1213                 igb_free_all_rx_resources(adapter);
1214                 adapter->num_tx_queues = 1;
1215                 adapter->num_rx_queues = 1;
1216                 adapter->num_q_vectors = 1;
1217                 err = igb_alloc_q_vectors(adapter);
1218                 if (err) {
1219                         dev_err(&pdev->dev,
1220                                 "Unable to allocate memory for vectors\n");
1221                         goto request_done;
1222                 }
1223                 err = igb_alloc_queues(adapter);
1224                 if (err) {
1225                         dev_err(&pdev->dev,
1226                                 "Unable to allocate memory for queues\n");
1227                         igb_free_q_vectors(adapter);
1228                         goto request_done;
1229                 }
1230                 igb_setup_all_tx_resources(adapter);
1231                 igb_setup_all_rx_resources(adapter);
1232         } else {
1233                 igb_assign_vector(adapter->q_vector[0], 0);
1234         }
1235
1236         if (adapter->flags & IGB_FLAG_HAS_MSI) {
1237                 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1238                                   netdev->name, adapter);
1239                 if (!err)
1240                         goto request_done;
1241
1242                 /* fall back to legacy interrupts */
1243                 igb_reset_interrupt_capability(adapter);
1244                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1245         }
1246
1247         err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1248                           netdev->name, adapter);
1249
1250         if (err)
1251                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1252                         err);
1253
1254 request_done:
1255         return err;
1256 }
1257
1258 static void igb_free_irq(struct igb_adapter *adapter)
1259 {
1260         if (adapter->msix_entries) {
1261                 int vector = 0, i;
1262
1263                 free_irq(adapter->msix_entries[vector++].vector, adapter);
1264
1265                 for (i = 0; i < adapter->num_q_vectors; i++) {
1266                         struct igb_q_vector *q_vector = adapter->q_vector[i];
1267                         free_irq(adapter->msix_entries[vector++].vector,
1268                                  q_vector);
1269                 }
1270         } else {
1271                 free_irq(adapter->pdev->irq, adapter);
1272         }
1273 }
1274
1275 /**
1276  * igb_irq_disable - Mask off interrupt generation on the NIC
1277  * @adapter: board private structure
1278  **/
1279 static void igb_irq_disable(struct igb_adapter *adapter)
1280 {
1281         struct e1000_hw *hw = &adapter->hw;
1282
1283         /*
1284          * we need to be careful when disabling interrupts.  The VFs are also
1285          * mapped into these registers and so clearing the bits can cause
1286          * issues on the VF drivers so we only need to clear what we set
1287          */
1288         if (adapter->msix_entries) {
1289                 u32 regval = rd32(E1000_EIAM);
1290                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1291                 wr32(E1000_EIMC, adapter->eims_enable_mask);
1292                 regval = rd32(E1000_EIAC);
1293                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1294         }
1295
1296         wr32(E1000_IAM, 0);
1297         wr32(E1000_IMC, ~0);
1298         wrfl();
1299         if (adapter->msix_entries) {
1300                 int i;
1301                 for (i = 0; i < adapter->num_q_vectors; i++)
1302                         synchronize_irq(adapter->msix_entries[i].vector);
1303         } else {
1304                 synchronize_irq(adapter->pdev->irq);
1305         }
1306 }
1307
1308 /**
1309  * igb_irq_enable - Enable default interrupt generation settings
1310  * @adapter: board private structure
1311  **/
1312 static void igb_irq_enable(struct igb_adapter *adapter)
1313 {
1314         struct e1000_hw *hw = &adapter->hw;
1315
1316         if (adapter->msix_entries) {
1317                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1318                 u32 regval = rd32(E1000_EIAC);
1319                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1320                 regval = rd32(E1000_EIAM);
1321                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1322                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1323                 if (adapter->vfs_allocated_count) {
1324                         wr32(E1000_MBVFIMR, 0xFF);
1325                         ims |= E1000_IMS_VMMB;
1326                 }
1327                 if (adapter->hw.mac.type == e1000_82580)
1328                         ims |= E1000_IMS_DRSTA;
1329
1330                 wr32(E1000_IMS, ims);
1331         } else {
1332                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1333                                 E1000_IMS_DRSTA);
1334                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1335                                 E1000_IMS_DRSTA);
1336         }
1337 }
1338
1339 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1340 {
1341         struct e1000_hw *hw = &adapter->hw;
1342         u16 vid = adapter->hw.mng_cookie.vlan_id;
1343         u16 old_vid = adapter->mng_vlan_id;
1344
1345         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1346                 /* add VID to filter table */
1347                 igb_vfta_set(hw, vid, true);
1348                 adapter->mng_vlan_id = vid;
1349         } else {
1350                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1351         }
1352
1353         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1354             (vid != old_vid) &&
1355             !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1356                 /* remove VID from filter table */
1357                 igb_vfta_set(hw, old_vid, false);
1358         }
1359 }
1360
1361 /**
1362  * igb_release_hw_control - release control of the h/w to f/w
1363  * @adapter: address of board private structure
1364  *
1365  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1366  * For ASF and Pass Through versions of f/w this means that the
1367  * driver is no longer loaded.
1368  *
1369  **/
1370 static void igb_release_hw_control(struct igb_adapter *adapter)
1371 {
1372         struct e1000_hw *hw = &adapter->hw;
1373         u32 ctrl_ext;
1374
1375         /* Let firmware take over control of h/w */
1376         ctrl_ext = rd32(E1000_CTRL_EXT);
1377         wr32(E1000_CTRL_EXT,
1378                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1379 }
1380
1381 /**
1382  * igb_get_hw_control - get control of the h/w from f/w
1383  * @adapter: address of board private structure
1384  *
1385  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1386  * For ASF and Pass Through versions of f/w this means that
1387  * the driver is loaded.
1388  *
1389  **/
1390 static void igb_get_hw_control(struct igb_adapter *adapter)
1391 {
1392         struct e1000_hw *hw = &adapter->hw;
1393         u32 ctrl_ext;
1394
1395         /* Let firmware know the driver has taken over */
1396         ctrl_ext = rd32(E1000_CTRL_EXT);
1397         wr32(E1000_CTRL_EXT,
1398                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1399 }
1400
1401 /**
1402  * igb_configure - configure the hardware for RX and TX
1403  * @adapter: private board structure
1404  **/
1405 static void igb_configure(struct igb_adapter *adapter)
1406 {
1407         struct net_device *netdev = adapter->netdev;
1408         int i;
1409
1410         igb_get_hw_control(adapter);
1411         igb_set_rx_mode(netdev);
1412
1413         igb_restore_vlan(adapter);
1414
1415         igb_setup_tctl(adapter);
1416         igb_setup_mrqc(adapter);
1417         igb_setup_rctl(adapter);
1418
1419         igb_configure_tx(adapter);
1420         igb_configure_rx(adapter);
1421
1422         igb_rx_fifo_flush_82575(&adapter->hw);
1423
1424         /* call igb_desc_unused which always leaves
1425          * at least 1 descriptor unused to make sure
1426          * next_to_use != next_to_clean */
1427         for (i = 0; i < adapter->num_rx_queues; i++) {
1428                 struct igb_ring *ring = adapter->rx_ring[i];
1429                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1430         }
1431 }
1432
1433 /**
1434  * igb_power_up_link - Power up the phy/serdes link
1435  * @adapter: address of board private structure
1436  **/
1437 void igb_power_up_link(struct igb_adapter *adapter)
1438 {
1439         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1440                 igb_power_up_phy_copper(&adapter->hw);
1441         else
1442                 igb_power_up_serdes_link_82575(&adapter->hw);
1443 }
1444
1445 /**
1446  * igb_power_down_link - Power down the phy/serdes link
1447  * @adapter: address of board private structure
1448  */
1449 static void igb_power_down_link(struct igb_adapter *adapter)
1450 {
1451         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1452                 igb_power_down_phy_copper_82575(&adapter->hw);
1453         else
1454                 igb_shutdown_serdes_link_82575(&adapter->hw);
1455 }
1456
1457 /**
1458  * igb_up - Open the interface and prepare it to handle traffic
1459  * @adapter: board private structure
1460  **/
1461 int igb_up(struct igb_adapter *adapter)
1462 {
1463         struct e1000_hw *hw = &adapter->hw;
1464         int i;
1465
1466         /* hardware has been reset, we need to reload some things */
1467         igb_configure(adapter);
1468
1469         clear_bit(__IGB_DOWN, &adapter->state);
1470
1471         for (i = 0; i < adapter->num_q_vectors; i++) {
1472                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1473                 napi_enable(&q_vector->napi);
1474         }
1475         if (adapter->msix_entries)
1476                 igb_configure_msix(adapter);
1477         else
1478                 igb_assign_vector(adapter->q_vector[0], 0);
1479
1480         /* Clear any pending interrupts. */
1481         rd32(E1000_ICR);
1482         igb_irq_enable(adapter);
1483
1484         /* notify VFs that reset has been completed */
1485         if (adapter->vfs_allocated_count) {
1486                 u32 reg_data = rd32(E1000_CTRL_EXT);
1487                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1488                 wr32(E1000_CTRL_EXT, reg_data);
1489         }
1490
1491         netif_tx_start_all_queues(adapter->netdev);
1492
1493         /* start the watchdog. */
1494         hw->mac.get_link_status = 1;
1495         schedule_work(&adapter->watchdog_task);
1496
1497         return 0;
1498 }
1499
1500 void igb_down(struct igb_adapter *adapter)
1501 {
1502         struct net_device *netdev = adapter->netdev;
1503         struct e1000_hw *hw = &adapter->hw;
1504         u32 tctl, rctl;
1505         int i;
1506
1507         /* signal that we're down so the interrupt handler does not
1508          * reschedule our watchdog timer */
1509         set_bit(__IGB_DOWN, &adapter->state);
1510
1511         /* disable receives in the hardware */
1512         rctl = rd32(E1000_RCTL);
1513         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1514         /* flush and sleep below */
1515
1516         netif_tx_stop_all_queues(netdev);
1517
1518         /* disable transmits in the hardware */
1519         tctl = rd32(E1000_TCTL);
1520         tctl &= ~E1000_TCTL_EN;
1521         wr32(E1000_TCTL, tctl);
1522         /* flush both disables and wait for them to finish */
1523         wrfl();
1524         msleep(10);
1525
1526         for (i = 0; i < adapter->num_q_vectors; i++) {
1527                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1528                 napi_disable(&q_vector->napi);
1529         }
1530
1531         igb_irq_disable(adapter);
1532
1533         del_timer_sync(&adapter->watchdog_timer);
1534         del_timer_sync(&adapter->phy_info_timer);
1535
1536         netif_carrier_off(netdev);
1537
1538         /* record the stats before reset*/
1539         spin_lock(&adapter->stats64_lock);
1540         igb_update_stats(adapter, &adapter->stats64);
1541         spin_unlock(&adapter->stats64_lock);
1542
1543         adapter->link_speed = 0;
1544         adapter->link_duplex = 0;
1545
1546         if (!pci_channel_offline(adapter->pdev))
1547                 igb_reset(adapter);
1548         igb_clean_all_tx_rings(adapter);
1549         igb_clean_all_rx_rings(adapter);
1550 #ifdef CONFIG_IGB_DCA
1551
1552         /* since we reset the hardware DCA settings were cleared */
1553         igb_setup_dca(adapter);
1554 #endif
1555 }
1556
1557 void igb_reinit_locked(struct igb_adapter *adapter)
1558 {
1559         WARN_ON(in_interrupt());
1560         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1561                 msleep(1);
1562         igb_down(adapter);
1563         igb_up(adapter);
1564         clear_bit(__IGB_RESETTING, &adapter->state);
1565 }
1566
1567 void igb_reset(struct igb_adapter *adapter)
1568 {
1569         struct pci_dev *pdev = adapter->pdev;
1570         struct e1000_hw *hw = &adapter->hw;
1571         struct e1000_mac_info *mac = &hw->mac;
1572         struct e1000_fc_info *fc = &hw->fc;
1573         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1574         u16 hwm;
1575
1576         /* Repartition Pba for greater than 9k mtu
1577          * To take effect CTRL.RST is required.
1578          */
1579         switch (mac->type) {
1580         case e1000_i350:
1581         case e1000_82580:
1582                 pba = rd32(E1000_RXPBS);
1583                 pba = igb_rxpbs_adjust_82580(pba);
1584                 break;
1585         case e1000_82576:
1586                 pba = rd32(E1000_RXPBS);
1587                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1588                 break;
1589         case e1000_82575:
1590         default:
1591                 pba = E1000_PBA_34K;
1592                 break;
1593         }
1594
1595         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1596             (mac->type < e1000_82576)) {
1597                 /* adjust PBA for jumbo frames */
1598                 wr32(E1000_PBA, pba);
1599
1600                 /* To maintain wire speed transmits, the Tx FIFO should be
1601                  * large enough to accommodate two full transmit packets,
1602                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1603                  * the Rx FIFO should be large enough to accommodate at least
1604                  * one full receive packet and is similarly rounded up and
1605                  * expressed in KB. */
1606                 pba = rd32(E1000_PBA);
1607                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1608                 tx_space = pba >> 16;
1609                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1610                 pba &= 0xffff;
1611                 /* the tx fifo also stores 16 bytes of information about the tx
1612                  * but don't include ethernet FCS because hardware appends it */
1613                 min_tx_space = (adapter->max_frame_size +
1614                                 sizeof(union e1000_adv_tx_desc) -
1615                                 ETH_FCS_LEN) * 2;
1616                 min_tx_space = ALIGN(min_tx_space, 1024);
1617                 min_tx_space >>= 10;
1618                 /* software strips receive CRC, so leave room for it */
1619                 min_rx_space = adapter->max_frame_size;
1620                 min_rx_space = ALIGN(min_rx_space, 1024);
1621                 min_rx_space >>= 10;
1622
1623                 /* If current Tx allocation is less than the min Tx FIFO size,
1624                  * and the min Tx FIFO size is less than the current Rx FIFO
1625                  * allocation, take space away from current Rx allocation */
1626                 if (tx_space < min_tx_space &&
1627                     ((min_tx_space - tx_space) < pba)) {
1628                         pba = pba - (min_tx_space - tx_space);
1629
1630                         /* if short on rx space, rx wins and must trump tx
1631                          * adjustment */
1632                         if (pba < min_rx_space)
1633                                 pba = min_rx_space;
1634                 }
1635                 wr32(E1000_PBA, pba);
1636         }
1637
1638         /* flow control settings */
1639         /* The high water mark must be low enough to fit one full frame
1640          * (or the size used for early receive) above it in the Rx FIFO.
1641          * Set it to the lower of:
1642          * - 90% of the Rx FIFO size, or
1643          * - the full Rx FIFO size minus one full frame */
1644         hwm = min(((pba << 10) * 9 / 10),
1645                         ((pba << 10) - 2 * adapter->max_frame_size));
1646
1647         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1648         fc->low_water = fc->high_water - 16;
1649         fc->pause_time = 0xFFFF;
1650         fc->send_xon = 1;
1651         fc->current_mode = fc->requested_mode;
1652
1653         /* disable receive for all VFs and wait one second */
1654         if (adapter->vfs_allocated_count) {
1655                 int i;
1656                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1657                         adapter->vf_data[i].flags = 0;
1658
1659                 /* ping all the active vfs to let them know we are going down */
1660                 igb_ping_all_vfs(adapter);
1661
1662                 /* disable transmits and receives */
1663                 wr32(E1000_VFRE, 0);
1664                 wr32(E1000_VFTE, 0);
1665         }
1666
1667         /* Allow time for pending master requests to run */
1668         hw->mac.ops.reset_hw(hw);
1669         wr32(E1000_WUC, 0);
1670
1671         if (hw->mac.ops.init_hw(hw))
1672                 dev_err(&pdev->dev, "Hardware Error\n");
1673
1674         if (hw->mac.type == e1000_82580) {
1675                 u32 reg = rd32(E1000_PCIEMISC);
1676                 wr32(E1000_PCIEMISC,
1677                                 reg & ~E1000_PCIEMISC_LX_DECISION);
1678         }
1679         if (!netif_running(adapter->netdev))
1680                 igb_power_down_link(adapter);
1681
1682         igb_update_mng_vlan(adapter);
1683
1684         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1685         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1686
1687         igb_get_phy_info(hw);
1688 }
1689
1690 static const struct net_device_ops igb_netdev_ops = {
1691         .ndo_open               = igb_open,
1692         .ndo_stop               = igb_close,
1693         .ndo_start_xmit         = igb_xmit_frame_adv,
1694         .ndo_get_stats64        = igb_get_stats64,
1695         .ndo_set_rx_mode        = igb_set_rx_mode,
1696         .ndo_set_multicast_list = igb_set_rx_mode,
1697         .ndo_set_mac_address    = igb_set_mac,
1698         .ndo_change_mtu         = igb_change_mtu,
1699         .ndo_do_ioctl           = igb_ioctl,
1700         .ndo_tx_timeout         = igb_tx_timeout,
1701         .ndo_validate_addr      = eth_validate_addr,
1702         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1703         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1704         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1705         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1706         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1707         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1708         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1709 #ifdef CONFIG_NET_POLL_CONTROLLER
1710         .ndo_poll_controller    = igb_netpoll,
1711 #endif
1712 };
1713
1714 /**
1715  * igb_probe - Device Initialization Routine
1716  * @pdev: PCI device information struct
1717  * @ent: entry in igb_pci_tbl
1718  *
1719  * Returns 0 on success, negative on failure
1720  *
1721  * igb_probe initializes an adapter identified by a pci_dev structure.
1722  * The OS initialization, configuring of the adapter private structure,
1723  * and a hardware reset occur.
1724  **/
1725 static int __devinit igb_probe(struct pci_dev *pdev,
1726                                const struct pci_device_id *ent)
1727 {
1728         struct net_device *netdev;
1729         struct igb_adapter *adapter;
1730         struct e1000_hw *hw;
1731         u16 eeprom_data = 0;
1732         s32 ret_val;
1733         static int global_quad_port_a; /* global quad port a indication */
1734         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1735         unsigned long mmio_start, mmio_len;
1736         int err, pci_using_dac;
1737         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1738         u8 part_str[E1000_PBANUM_LENGTH];
1739
1740         /* Catch broken hardware that put the wrong VF device ID in
1741          * the PCIe SR-IOV capability.
1742          */
1743         if (pdev->is_virtfn) {
1744                 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1745                      pci_name(pdev), pdev->vendor, pdev->device);
1746                 return -EINVAL;
1747         }
1748
1749         err = pci_enable_device_mem(pdev);
1750         if (err)
1751                 return err;
1752
1753         pci_using_dac = 0;
1754         err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1755         if (!err) {
1756                 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1757                 if (!err)
1758                         pci_using_dac = 1;
1759         } else {
1760                 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1761                 if (err) {
1762                         err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1763                         if (err) {
1764                                 dev_err(&pdev->dev, "No usable DMA "
1765                                         "configuration, aborting\n");
1766                                 goto err_dma;
1767                         }
1768                 }
1769         }
1770
1771         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1772                                            IORESOURCE_MEM),
1773                                            igb_driver_name);
1774         if (err)
1775                 goto err_pci_reg;
1776
1777         pci_enable_pcie_error_reporting(pdev);
1778
1779         pci_set_master(pdev);
1780         pci_save_state(pdev);
1781
1782         err = -ENOMEM;
1783         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1784                                    IGB_ABS_MAX_TX_QUEUES);
1785         if (!netdev)
1786                 goto err_alloc_etherdev;
1787
1788         SET_NETDEV_DEV(netdev, &pdev->dev);
1789
1790         pci_set_drvdata(pdev, netdev);
1791         adapter = netdev_priv(netdev);
1792         adapter->netdev = netdev;
1793         adapter->pdev = pdev;
1794         hw = &adapter->hw;
1795         hw->back = adapter;
1796         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1797
1798         mmio_start = pci_resource_start(pdev, 0);
1799         mmio_len = pci_resource_len(pdev, 0);
1800
1801         err = -EIO;
1802         hw->hw_addr = ioremap(mmio_start, mmio_len);
1803         if (!hw->hw_addr)
1804                 goto err_ioremap;
1805
1806         netdev->netdev_ops = &igb_netdev_ops;
1807         igb_set_ethtool_ops(netdev);
1808         netdev->watchdog_timeo = 5 * HZ;
1809
1810         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1811
1812         netdev->mem_start = mmio_start;
1813         netdev->mem_end = mmio_start + mmio_len;
1814
1815         /* PCI config space info */
1816         hw->vendor_id = pdev->vendor;
1817         hw->device_id = pdev->device;
1818         hw->revision_id = pdev->revision;
1819         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1820         hw->subsystem_device_id = pdev->subsystem_device;
1821
1822         /* Copy the default MAC, PHY and NVM function pointers */
1823         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1824         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1825         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1826         /* Initialize skew-specific constants */
1827         err = ei->get_invariants(hw);
1828         if (err)
1829                 goto err_sw_init;
1830
1831         /* setup the private structure */
1832         err = igb_sw_init(adapter);
1833         if (err)
1834                 goto err_sw_init;
1835
1836         igb_get_bus_info_pcie(hw);
1837
1838         hw->phy.autoneg_wait_to_complete = false;
1839
1840         /* Copper options */
1841         if (hw->phy.media_type == e1000_media_type_copper) {
1842                 hw->phy.mdix = AUTO_ALL_MODES;
1843                 hw->phy.disable_polarity_correction = false;
1844                 hw->phy.ms_type = e1000_ms_hw_default;
1845         }
1846
1847         if (igb_check_reset_block(hw))
1848                 dev_info(&pdev->dev,
1849                         "PHY reset is blocked due to SOL/IDER session.\n");
1850
1851         netdev->features = NETIF_F_SG |
1852                            NETIF_F_IP_CSUM |
1853                            NETIF_F_HW_VLAN_TX |
1854                            NETIF_F_HW_VLAN_RX |
1855                            NETIF_F_HW_VLAN_FILTER;
1856
1857         netdev->features |= NETIF_F_IPV6_CSUM;
1858         netdev->features |= NETIF_F_TSO;
1859         netdev->features |= NETIF_F_TSO6;
1860         netdev->features |= NETIF_F_GRO;
1861
1862         netdev->vlan_features |= NETIF_F_TSO;
1863         netdev->vlan_features |= NETIF_F_TSO6;
1864         netdev->vlan_features |= NETIF_F_IP_CSUM;
1865         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1866         netdev->vlan_features |= NETIF_F_SG;
1867
1868         if (pci_using_dac) {
1869                 netdev->features |= NETIF_F_HIGHDMA;
1870                 netdev->vlan_features |= NETIF_F_HIGHDMA;
1871         }
1872
1873         if (hw->mac.type >= e1000_82576)
1874                 netdev->features |= NETIF_F_SCTP_CSUM;
1875
1876         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1877
1878         /* before reading the NVM, reset the controller to put the device in a
1879          * known good starting state */
1880         hw->mac.ops.reset_hw(hw);
1881
1882         /* make sure the NVM is good */
1883         if (igb_validate_nvm_checksum(hw) < 0) {
1884                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1885                 err = -EIO;
1886                 goto err_eeprom;
1887         }
1888
1889         /* copy the MAC address out of the NVM */
1890         if (hw->mac.ops.read_mac_addr(hw))
1891                 dev_err(&pdev->dev, "NVM Read Error\n");
1892
1893         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1894         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1895
1896         if (!is_valid_ether_addr(netdev->perm_addr)) {
1897                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1898                 err = -EIO;
1899                 goto err_eeprom;
1900         }
1901
1902         setup_timer(&adapter->watchdog_timer, igb_watchdog,
1903                     (unsigned long) adapter);
1904         setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
1905                     (unsigned long) adapter);
1906
1907         INIT_WORK(&adapter->reset_task, igb_reset_task);
1908         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1909
1910         /* Initialize link properties that are user-changeable */
1911         adapter->fc_autoneg = true;
1912         hw->mac.autoneg = true;
1913         hw->phy.autoneg_advertised = 0x2f;
1914
1915         hw->fc.requested_mode = e1000_fc_default;
1916         hw->fc.current_mode = e1000_fc_default;
1917
1918         igb_validate_mdi_setting(hw);
1919
1920         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1921          * enable the ACPI Magic Packet filter
1922          */
1923
1924         if (hw->bus.func == 0)
1925                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1926         else if (hw->mac.type == e1000_82580)
1927                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1928                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1929                                  &eeprom_data);
1930         else if (hw->bus.func == 1)
1931                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1932
1933         if (eeprom_data & eeprom_apme_mask)
1934                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1935
1936         /* now that we have the eeprom settings, apply the special cases where
1937          * the eeprom may be wrong or the board simply won't support wake on
1938          * lan on a particular port */
1939         switch (pdev->device) {
1940         case E1000_DEV_ID_82575GB_QUAD_COPPER:
1941                 adapter->eeprom_wol = 0;
1942                 break;
1943         case E1000_DEV_ID_82575EB_FIBER_SERDES:
1944         case E1000_DEV_ID_82576_FIBER:
1945         case E1000_DEV_ID_82576_SERDES:
1946                 /* Wake events only supported on port A for dual fiber
1947                  * regardless of eeprom setting */
1948                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1949                         adapter->eeprom_wol = 0;
1950                 break;
1951         case E1000_DEV_ID_82576_QUAD_COPPER:
1952         case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
1953                 /* if quad port adapter, disable WoL on all but port A */
1954                 if (global_quad_port_a != 0)
1955                         adapter->eeprom_wol = 0;
1956                 else
1957                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1958                 /* Reset for multiple quad port adapters */
1959                 if (++global_quad_port_a == 4)
1960                         global_quad_port_a = 0;
1961                 break;
1962         }
1963
1964         /* initialize the wol settings based on the eeprom settings */
1965         adapter->wol = adapter->eeprom_wol;
1966         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1967
1968         /* reset the hardware with the new settings */
1969         igb_reset(adapter);
1970
1971         /* let the f/w know that the h/w is now under the control of the
1972          * driver. */
1973         igb_get_hw_control(adapter);
1974
1975         strcpy(netdev->name, "eth%d");
1976         err = register_netdev(netdev);
1977         if (err)
1978                 goto err_register;
1979
1980         /* carrier off reporting is important to ethtool even BEFORE open */
1981         netif_carrier_off(netdev);
1982
1983 #ifdef CONFIG_IGB_DCA
1984         if (dca_add_requester(&pdev->dev) == 0) {
1985                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1986                 dev_info(&pdev->dev, "DCA enabled\n");
1987                 igb_setup_dca(adapter);
1988         }
1989
1990 #endif
1991         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1992         /* print bus type/speed/width info */
1993         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1994                  netdev->name,
1995                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
1996                   (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
1997                                                             "unknown"),
1998                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1999                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2000                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2001                    "unknown"),
2002                  netdev->dev_addr);
2003
2004         ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2005         if (ret_val)
2006                 strcpy(part_str, "Unknown");
2007         dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2008         dev_info(&pdev->dev,
2009                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2010                 adapter->msix_entries ? "MSI-X" :
2011                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2012                 adapter->num_rx_queues, adapter->num_tx_queues);
2013
2014         return 0;
2015
2016 err_register:
2017         igb_release_hw_control(adapter);
2018 err_eeprom:
2019         if (!igb_check_reset_block(hw))
2020                 igb_reset_phy(hw);
2021
2022         if (hw->flash_address)
2023                 iounmap(hw->flash_address);
2024 err_sw_init:
2025         igb_clear_interrupt_scheme(adapter);
2026         iounmap(hw->hw_addr);
2027 err_ioremap:
2028         free_netdev(netdev);
2029 err_alloc_etherdev:
2030         pci_release_selected_regions(pdev,
2031                                      pci_select_bars(pdev, IORESOURCE_MEM));
2032 err_pci_reg:
2033 err_dma:
2034         pci_disable_device(pdev);
2035         return err;
2036 }
2037
2038 /**
2039  * igb_remove - Device Removal Routine
2040  * @pdev: PCI device information struct
2041  *
2042  * igb_remove is called by the PCI subsystem to alert the driver
2043  * that it should release a PCI device.  The could be caused by a
2044  * Hot-Plug event, or because the driver is going to be removed from
2045  * memory.
2046  **/
2047 static void __devexit igb_remove(struct pci_dev *pdev)
2048 {
2049         struct net_device *netdev = pci_get_drvdata(pdev);
2050         struct igb_adapter *adapter = netdev_priv(netdev);
2051         struct e1000_hw *hw = &adapter->hw;
2052
2053         /*
2054          * The watchdog timer may be rescheduled, so explicitly
2055          * disable watchdog from being rescheduled.
2056          */
2057         set_bit(__IGB_DOWN, &adapter->state);
2058         del_timer_sync(&adapter->watchdog_timer);
2059         del_timer_sync(&adapter->phy_info_timer);
2060
2061         cancel_work_sync(&adapter->reset_task);
2062         cancel_work_sync(&adapter->watchdog_task);
2063
2064 #ifdef CONFIG_IGB_DCA
2065         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2066                 dev_info(&pdev->dev, "DCA disabled\n");
2067                 dca_remove_requester(&pdev->dev);
2068                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2069                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2070         }
2071 #endif
2072
2073         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
2074          * would have already happened in close and is redundant. */
2075         igb_release_hw_control(adapter);
2076
2077         unregister_netdev(netdev);
2078
2079         igb_clear_interrupt_scheme(adapter);
2080
2081 #ifdef CONFIG_PCI_IOV
2082         /* reclaim resources allocated to VFs */
2083         if (adapter->vf_data) {
2084                 /* disable iov and allow time for transactions to clear */
2085                 pci_disable_sriov(pdev);
2086                 msleep(500);
2087
2088                 kfree(adapter->vf_data);
2089                 adapter->vf_data = NULL;
2090                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2091                 msleep(100);
2092                 dev_info(&pdev->dev, "IOV Disabled\n");
2093         }
2094 #endif
2095
2096         iounmap(hw->hw_addr);
2097         if (hw->flash_address)
2098                 iounmap(hw->flash_address);
2099         pci_release_selected_regions(pdev,
2100                                      pci_select_bars(pdev, IORESOURCE_MEM));
2101
2102         free_netdev(netdev);
2103
2104         pci_disable_pcie_error_reporting(pdev);
2105
2106         pci_disable_device(pdev);
2107 }
2108
2109 /**
2110  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2111  * @adapter: board private structure to initialize
2112  *
2113  * This function initializes the vf specific data storage and then attempts to
2114  * allocate the VFs.  The reason for ordering it this way is because it is much
2115  * mor expensive time wise to disable SR-IOV than it is to allocate and free
2116  * the memory for the VFs.
2117  **/
2118 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2119 {
2120 #ifdef CONFIG_PCI_IOV
2121         struct pci_dev *pdev = adapter->pdev;
2122
2123         if (adapter->vfs_allocated_count) {
2124                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2125                                            sizeof(struct vf_data_storage),
2126                                            GFP_KERNEL);
2127                 /* if allocation failed then we do not support SR-IOV */
2128                 if (!adapter->vf_data) {
2129                         adapter->vfs_allocated_count = 0;
2130                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
2131                                 "Data Storage\n");
2132                 }
2133         }
2134
2135         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2136                 kfree(adapter->vf_data);
2137                 adapter->vf_data = NULL;
2138 #endif /* CONFIG_PCI_IOV */
2139                 adapter->vfs_allocated_count = 0;
2140 #ifdef CONFIG_PCI_IOV
2141         } else {
2142                 unsigned char mac_addr[ETH_ALEN];
2143                 int i;
2144                 dev_info(&pdev->dev, "%d vfs allocated\n",
2145                          adapter->vfs_allocated_count);
2146                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2147                         random_ether_addr(mac_addr);
2148                         igb_set_vf_mac(adapter, i, mac_addr);
2149                 }
2150         }
2151 #endif /* CONFIG_PCI_IOV */
2152 }
2153
2154
2155 /**
2156  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2157  * @adapter: board private structure to initialize
2158  *
2159  * igb_init_hw_timer initializes the function pointer and values for the hw
2160  * timer found in hardware.
2161  **/
2162 static void igb_init_hw_timer(struct igb_adapter *adapter)
2163 {
2164         struct e1000_hw *hw = &adapter->hw;
2165
2166         switch (hw->mac.type) {
2167         case e1000_i350:
2168         case e1000_82580:
2169                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2170                 adapter->cycles.read = igb_read_clock;
2171                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2172                 adapter->cycles.mult = 1;
2173                 /*
2174                  * The 82580 timesync updates the system timer every 8ns by 8ns
2175                  * and the value cannot be shifted.  Instead we need to shift
2176                  * the registers to generate a 64bit timer value.  As a result
2177                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2178                  * 24 in order to generate a larger value for synchronization.
2179                  */
2180                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2181                 /* disable system timer temporarily by setting bit 31 */
2182                 wr32(E1000_TSAUXC, 0x80000000);
2183                 wrfl();
2184
2185                 /* Set registers so that rollover occurs soon to test this. */
2186                 wr32(E1000_SYSTIMR, 0x00000000);
2187                 wr32(E1000_SYSTIML, 0x80000000);
2188                 wr32(E1000_SYSTIMH, 0x000000FF);
2189                 wrfl();
2190
2191                 /* enable system timer by clearing bit 31 */
2192                 wr32(E1000_TSAUXC, 0x0);
2193                 wrfl();
2194
2195                 timecounter_init(&adapter->clock,
2196                                  &adapter->cycles,
2197                                  ktime_to_ns(ktime_get_real()));
2198                 /*
2199                  * Synchronize our NIC clock against system wall clock. NIC
2200                  * time stamp reading requires ~3us per sample, each sample
2201                  * was pretty stable even under load => only require 10
2202                  * samples for each offset comparison.
2203                  */
2204                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2205                 adapter->compare.source = &adapter->clock;
2206                 adapter->compare.target = ktime_get_real;
2207                 adapter->compare.num_samples = 10;
2208                 timecompare_update(&adapter->compare, 0);
2209                 break;
2210         case e1000_82576:
2211                 /*
2212                  * Initialize hardware timer: we keep it running just in case
2213                  * that some program needs it later on.
2214                  */
2215                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2216                 adapter->cycles.read = igb_read_clock;
2217                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2218                 adapter->cycles.mult = 1;
2219                 /**
2220                  * Scale the NIC clock cycle by a large factor so that
2221                  * relatively small clock corrections can be added or
2222                  * substracted at each clock tick. The drawbacks of a large
2223                  * factor are a) that the clock register overflows more quickly
2224                  * (not such a big deal) and b) that the increment per tick has
2225                  * to fit into 24 bits.  As a result we need to use a shift of
2226                  * 19 so we can fit a value of 16 into the TIMINCA register.
2227                  */
2228                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2229                 wr32(E1000_TIMINCA,
2230                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
2231                                 (16 << IGB_82576_TSYNC_SHIFT));
2232
2233                 /* Set registers so that rollover occurs soon to test this. */
2234                 wr32(E1000_SYSTIML, 0x00000000);
2235                 wr32(E1000_SYSTIMH, 0xFF800000);
2236                 wrfl();
2237
2238                 timecounter_init(&adapter->clock,
2239                                  &adapter->cycles,
2240                                  ktime_to_ns(ktime_get_real()));
2241                 /*
2242                  * Synchronize our NIC clock against system wall clock. NIC
2243                  * time stamp reading requires ~3us per sample, each sample
2244                  * was pretty stable even under load => only require 10
2245                  * samples for each offset comparison.
2246                  */
2247                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2248                 adapter->compare.source = &adapter->clock;
2249                 adapter->compare.target = ktime_get_real;
2250                 adapter->compare.num_samples = 10;
2251                 timecompare_update(&adapter->compare, 0);
2252                 break;
2253         case e1000_82575:
2254                 /* 82575 does not support timesync */
2255         default:
2256                 break;
2257         }
2258
2259 }
2260
2261 /**
2262  * igb_sw_init - Initialize general software structures (struct igb_adapter)
2263  * @adapter: board private structure to initialize
2264  *
2265  * igb_sw_init initializes the Adapter private data structure.
2266  * Fields are initialized based on PCI device information and
2267  * OS network device settings (MTU size).
2268  **/
2269 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2270 {
2271         struct e1000_hw *hw = &adapter->hw;
2272         struct net_device *netdev = adapter->netdev;
2273         struct pci_dev *pdev = adapter->pdev;
2274
2275         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2276
2277         adapter->tx_ring_count = IGB_DEFAULT_TXD;
2278         adapter->rx_ring_count = IGB_DEFAULT_RXD;
2279         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2280         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2281
2282         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
2283         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2284
2285         spin_lock_init(&adapter->stats64_lock);
2286 #ifdef CONFIG_PCI_IOV
2287         if (hw->mac.type == e1000_82576)
2288                 adapter->vfs_allocated_count = (max_vfs > 7) ? 7 : max_vfs;
2289
2290 #endif /* CONFIG_PCI_IOV */
2291         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2292
2293         /*
2294          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2295          * then we should combine the queues into a queue pair in order to
2296          * conserve interrupts due to limited supply
2297          */
2298         if ((adapter->rss_queues > 4) ||
2299             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2300                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2301
2302         /* This call may decrease the number of queues */
2303         if (igb_init_interrupt_scheme(adapter)) {
2304                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2305                 return -ENOMEM;
2306         }
2307
2308         igb_init_hw_timer(adapter);
2309         igb_probe_vfs(adapter);
2310
2311         /* Explicitly disable IRQ since the NIC can be in any state. */
2312         igb_irq_disable(adapter);
2313
2314         set_bit(__IGB_DOWN, &adapter->state);
2315         return 0;
2316 }
2317
2318 /**
2319  * igb_open - Called when a network interface is made active
2320  * @netdev: network interface device structure
2321  *
2322  * Returns 0 on success, negative value on failure
2323  *
2324  * The open entry point is called when a network interface is made
2325  * active by the system (IFF_UP).  At this point all resources needed
2326  * for transmit and receive operations are allocated, the interrupt
2327  * handler is registered with the OS, the watchdog timer is started,
2328  * and the stack is notified that the interface is ready.
2329  **/
2330 static int igb_open(struct net_device *netdev)
2331 {
2332         struct igb_adapter *adapter = netdev_priv(netdev);
2333         struct e1000_hw *hw = &adapter->hw;
2334         int err;
2335         int i;
2336
2337         /* disallow open during test */
2338         if (test_bit(__IGB_TESTING, &adapter->state))
2339                 return -EBUSY;
2340
2341         netif_carrier_off(netdev);
2342
2343         /* allocate transmit descriptors */
2344         err = igb_setup_all_tx_resources(adapter);
2345         if (err)
2346                 goto err_setup_tx;
2347
2348         /* allocate receive descriptors */
2349         err = igb_setup_all_rx_resources(adapter);
2350         if (err)
2351                 goto err_setup_rx;
2352
2353         igb_power_up_link(adapter);
2354
2355         /* before we allocate an interrupt, we must be ready to handle it.
2356          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2357          * as soon as we call pci_request_irq, so we have to setup our
2358          * clean_rx handler before we do so.  */
2359         igb_configure(adapter);
2360
2361         err = igb_request_irq(adapter);
2362         if (err)
2363                 goto err_req_irq;
2364
2365         /* From here on the code is the same as igb_up() */
2366         clear_bit(__IGB_DOWN, &adapter->state);
2367
2368         for (i = 0; i < adapter->num_q_vectors; i++) {
2369                 struct igb_q_vector *q_vector = adapter->q_vector[i];
2370                 napi_enable(&q_vector->napi);
2371         }
2372
2373         /* Clear any pending interrupts. */
2374         rd32(E1000_ICR);
2375
2376         igb_irq_enable(adapter);
2377
2378         /* notify VFs that reset has been completed */
2379         if (adapter->vfs_allocated_count) {
2380                 u32 reg_data = rd32(E1000_CTRL_EXT);
2381                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2382                 wr32(E1000_CTRL_EXT, reg_data);
2383         }
2384
2385         netif_tx_start_all_queues(netdev);
2386
2387         /* start the watchdog. */
2388         hw->mac.get_link_status = 1;
2389         schedule_work(&adapter->watchdog_task);
2390
2391         return 0;
2392
2393 err_req_irq:
2394         igb_release_hw_control(adapter);
2395         igb_power_down_link(adapter);
2396         igb_free_all_rx_resources(adapter);
2397 err_setup_rx:
2398         igb_free_all_tx_resources(adapter);
2399 err_setup_tx:
2400         igb_reset(adapter);
2401
2402         return err;
2403 }
2404
2405 /**
2406  * igb_close - Disables a network interface
2407  * @netdev: network interface device structure
2408  *
2409  * Returns 0, this is not allowed to fail
2410  *
2411  * The close entry point is called when an interface is de-activated
2412  * by the OS.  The hardware is still under the driver's control, but
2413  * needs to be disabled.  A global MAC reset is issued to stop the
2414  * hardware, and all transmit and receive resources are freed.
2415  **/
2416 static int igb_close(struct net_device *netdev)
2417 {
2418         struct igb_adapter *adapter = netdev_priv(netdev);
2419
2420         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2421         igb_down(adapter);
2422
2423         igb_free_irq(adapter);
2424
2425         igb_free_all_tx_resources(adapter);
2426         igb_free_all_rx_resources(adapter);
2427
2428         return 0;
2429 }
2430
2431 /**
2432  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2433  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2434  *
2435  * Return 0 on success, negative on failure
2436  **/
2437 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2438 {
2439         struct device *dev = tx_ring->dev;
2440         int size;
2441
2442         size = sizeof(struct igb_buffer) * tx_ring->count;
2443         tx_ring->buffer_info = vzalloc(size);
2444         if (!tx_ring->buffer_info)
2445                 goto err;
2446
2447         /* round up to nearest 4K */
2448         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2449         tx_ring->size = ALIGN(tx_ring->size, 4096);
2450
2451         tx_ring->desc = dma_alloc_coherent(dev,
2452                                            tx_ring->size,
2453                                            &tx_ring->dma,
2454                                            GFP_KERNEL);
2455
2456         if (!tx_ring->desc)
2457                 goto err;
2458
2459         tx_ring->next_to_use = 0;
2460         tx_ring->next_to_clean = 0;
2461         return 0;
2462
2463 err:
2464         vfree(tx_ring->buffer_info);
2465         dev_err(dev,
2466                 "Unable to allocate memory for the transmit descriptor ring\n");
2467         return -ENOMEM;
2468 }
2469
2470 /**
2471  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2472  *                                (Descriptors) for all queues
2473  * @adapter: board private structure
2474  *
2475  * Return 0 on success, negative on failure
2476  **/
2477 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2478 {
2479         struct pci_dev *pdev = adapter->pdev;
2480         int i, err = 0;
2481
2482         for (i = 0; i < adapter->num_tx_queues; i++) {
2483                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2484                 if (err) {
2485                         dev_err(&pdev->dev,
2486                                 "Allocation for Tx Queue %u failed\n", i);
2487                         for (i--; i >= 0; i--)
2488                                 igb_free_tx_resources(adapter->tx_ring[i]);
2489                         break;
2490                 }
2491         }
2492
2493         for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2494                 int r_idx = i % adapter->num_tx_queues;
2495                 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2496         }
2497         return err;
2498 }
2499
2500 /**
2501  * igb_setup_tctl - configure the transmit control registers
2502  * @adapter: Board private structure
2503  **/
2504 void igb_setup_tctl(struct igb_adapter *adapter)
2505 {
2506         struct e1000_hw *hw = &adapter->hw;
2507         u32 tctl;
2508
2509         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2510         wr32(E1000_TXDCTL(0), 0);
2511
2512         /* Program the Transmit Control Register */
2513         tctl = rd32(E1000_TCTL);
2514         tctl &= ~E1000_TCTL_CT;
2515         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2516                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2517
2518         igb_config_collision_dist(hw);
2519
2520         /* Enable transmits */
2521         tctl |= E1000_TCTL_EN;
2522
2523         wr32(E1000_TCTL, tctl);
2524 }
2525
2526 /**
2527  * igb_configure_tx_ring - Configure transmit ring after Reset
2528  * @adapter: board private structure
2529  * @ring: tx ring to configure
2530  *
2531  * Configure a transmit ring after a reset.
2532  **/
2533 void igb_configure_tx_ring(struct igb_adapter *adapter,
2534                            struct igb_ring *ring)
2535 {
2536         struct e1000_hw *hw = &adapter->hw;
2537         u32 txdctl;
2538         u64 tdba = ring->dma;
2539         int reg_idx = ring->reg_idx;
2540
2541         /* disable the queue */
2542         txdctl = rd32(E1000_TXDCTL(reg_idx));
2543         wr32(E1000_TXDCTL(reg_idx),
2544                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2545         wrfl();
2546         mdelay(10);
2547
2548         wr32(E1000_TDLEN(reg_idx),
2549                         ring->count * sizeof(union e1000_adv_tx_desc));
2550         wr32(E1000_TDBAL(reg_idx),
2551                         tdba & 0x00000000ffffffffULL);
2552         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2553
2554         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2555         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2556         writel(0, ring->head);
2557         writel(0, ring->tail);
2558
2559         txdctl |= IGB_TX_PTHRESH;
2560         txdctl |= IGB_TX_HTHRESH << 8;
2561         txdctl |= IGB_TX_WTHRESH << 16;
2562
2563         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2564         wr32(E1000_TXDCTL(reg_idx), txdctl);
2565 }
2566
2567 /**
2568  * igb_configure_tx - Configure transmit Unit after Reset
2569  * @adapter: board private structure
2570  *
2571  * Configure the Tx unit of the MAC after a reset.
2572  **/
2573 static void igb_configure_tx(struct igb_adapter *adapter)
2574 {
2575         int i;
2576
2577         for (i = 0; i < adapter->num_tx_queues; i++)
2578                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2579 }
2580
2581 /**
2582  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2583  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2584  *
2585  * Returns 0 on success, negative on failure
2586  **/
2587 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2588 {
2589         struct device *dev = rx_ring->dev;
2590         int size, desc_len;
2591
2592         size = sizeof(struct igb_buffer) * rx_ring->count;
2593         rx_ring->buffer_info = vzalloc(size);
2594         if (!rx_ring->buffer_info)
2595                 goto err;
2596
2597         desc_len = sizeof(union e1000_adv_rx_desc);
2598
2599         /* Round up to nearest 4K */
2600         rx_ring->size = rx_ring->count * desc_len;
2601         rx_ring->size = ALIGN(rx_ring->size, 4096);
2602
2603         rx_ring->desc = dma_alloc_coherent(dev,
2604                                            rx_ring->size,
2605                                            &rx_ring->dma,
2606                                            GFP_KERNEL);
2607
2608         if (!rx_ring->desc)
2609                 goto err;
2610
2611         rx_ring->next_to_clean = 0;
2612         rx_ring->next_to_use = 0;
2613
2614         return 0;
2615
2616 err:
2617         vfree(rx_ring->buffer_info);
2618         rx_ring->buffer_info = NULL;
2619         dev_err(dev, "Unable to allocate memory for the receive descriptor"
2620                 " ring\n");
2621         return -ENOMEM;
2622 }
2623
2624 /**
2625  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2626  *                                (Descriptors) for all queues
2627  * @adapter: board private structure
2628  *
2629  * Return 0 on success, negative on failure
2630  **/
2631 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2632 {
2633         struct pci_dev *pdev = adapter->pdev;
2634         int i, err = 0;
2635
2636         for (i = 0; i < adapter->num_rx_queues; i++) {
2637                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2638                 if (err) {
2639                         dev_err(&pdev->dev,
2640                                 "Allocation for Rx Queue %u failed\n", i);
2641                         for (i--; i >= 0; i--)
2642                                 igb_free_rx_resources(adapter->rx_ring[i]);
2643                         break;
2644                 }
2645         }
2646
2647         return err;
2648 }
2649
2650 /**
2651  * igb_setup_mrqc - configure the multiple receive queue control registers
2652  * @adapter: Board private structure
2653  **/
2654 static void igb_setup_mrqc(struct igb_adapter *adapter)
2655 {
2656         struct e1000_hw *hw = &adapter->hw;
2657         u32 mrqc, rxcsum;
2658         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2659         union e1000_reta {
2660                 u32 dword;
2661                 u8  bytes[4];
2662         } reta;
2663         static const u8 rsshash[40] = {
2664                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2665                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2666                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2667                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2668
2669         /* Fill out hash function seeds */
2670         for (j = 0; j < 10; j++) {
2671                 u32 rsskey = rsshash[(j * 4)];
2672                 rsskey |= rsshash[(j * 4) + 1] << 8;
2673                 rsskey |= rsshash[(j * 4) + 2] << 16;
2674                 rsskey |= rsshash[(j * 4) + 3] << 24;
2675                 array_wr32(E1000_RSSRK(0), j, rsskey);
2676         }
2677
2678         num_rx_queues = adapter->rss_queues;
2679
2680         if (adapter->vfs_allocated_count) {
2681                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2682                 switch (hw->mac.type) {
2683                 case e1000_i350:
2684                 case e1000_82580:
2685                         num_rx_queues = 1;
2686                         shift = 0;
2687                         break;
2688                 case e1000_82576:
2689                         shift = 3;
2690                         num_rx_queues = 2;
2691                         break;
2692                 case e1000_82575:
2693                         shift = 2;
2694                         shift2 = 6;
2695                 default:
2696                         break;
2697                 }
2698         } else {
2699                 if (hw->mac.type == e1000_82575)
2700                         shift = 6;
2701         }
2702
2703         for (j = 0; j < (32 * 4); j++) {
2704                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2705                 if (shift2)
2706                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2707                 if ((j & 3) == 3)
2708                         wr32(E1000_RETA(j >> 2), reta.dword);
2709         }
2710
2711         /*
2712          * Disable raw packet checksumming so that RSS hash is placed in
2713          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2714          * offloads as they are enabled by default
2715          */
2716         rxcsum = rd32(E1000_RXCSUM);
2717         rxcsum |= E1000_RXCSUM_PCSD;
2718
2719         if (adapter->hw.mac.type >= e1000_82576)
2720                 /* Enable Receive Checksum Offload for SCTP */
2721                 rxcsum |= E1000_RXCSUM_CRCOFL;
2722
2723         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2724         wr32(E1000_RXCSUM, rxcsum);
2725
2726         /* If VMDq is enabled then we set the appropriate mode for that, else
2727          * we default to RSS so that an RSS hash is calculated per packet even
2728          * if we are only using one queue */
2729         if (adapter->vfs_allocated_count) {
2730                 if (hw->mac.type > e1000_82575) {
2731                         /* Set the default pool for the PF's first queue */
2732                         u32 vtctl = rd32(E1000_VT_CTL);
2733                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2734                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2735                         vtctl |= adapter->vfs_allocated_count <<
2736                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2737                         wr32(E1000_VT_CTL, vtctl);
2738                 }
2739                 if (adapter->rss_queues > 1)
2740                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2741                 else
2742                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2743         } else {
2744                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2745         }
2746         igb_vmm_control(adapter);
2747
2748         /*
2749          * Generate RSS hash based on TCP port numbers and/or
2750          * IPv4/v6 src and dst addresses since UDP cannot be
2751          * hashed reliably due to IP fragmentation
2752          */
2753         mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2754                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2755                 E1000_MRQC_RSS_FIELD_IPV6 |
2756                 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2757                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2758
2759         wr32(E1000_MRQC, mrqc);
2760 }
2761
2762 /**
2763  * igb_setup_rctl - configure the receive control registers
2764  * @adapter: Board private structure
2765  **/
2766 void igb_setup_rctl(struct igb_adapter *adapter)
2767 {
2768         struct e1000_hw *hw = &adapter->hw;
2769         u32 rctl;
2770
2771         rctl = rd32(E1000_RCTL);
2772
2773         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2774         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2775
2776         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2777                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2778
2779         /*
2780          * enable stripping of CRC. It's unlikely this will break BMC
2781          * redirection as it did with e1000. Newer features require
2782          * that the HW strips the CRC.
2783          */
2784         rctl |= E1000_RCTL_SECRC;
2785
2786         /* disable store bad packets and clear size bits. */
2787         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2788
2789         /* enable LPE to prevent packets larger than max_frame_size */
2790         rctl |= E1000_RCTL_LPE;
2791
2792         /* disable queue 0 to prevent tail write w/o re-config */
2793         wr32(E1000_RXDCTL(0), 0);
2794
2795         /* Attention!!!  For SR-IOV PF driver operations you must enable
2796          * queue drop for all VF and PF queues to prevent head of line blocking
2797          * if an un-trusted VF does not provide descriptors to hardware.
2798          */
2799         if (adapter->vfs_allocated_count) {
2800                 /* set all queue drop enable bits */
2801                 wr32(E1000_QDE, ALL_QUEUES);
2802         }
2803
2804         wr32(E1000_RCTL, rctl);
2805 }
2806
2807 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2808                                    int vfn)
2809 {
2810         struct e1000_hw *hw = &adapter->hw;
2811         u32 vmolr;
2812
2813         /* if it isn't the PF check to see if VFs are enabled and
2814          * increase the size to support vlan tags */
2815         if (vfn < adapter->vfs_allocated_count &&
2816             adapter->vf_data[vfn].vlans_enabled)
2817                 size += VLAN_TAG_SIZE;
2818
2819         vmolr = rd32(E1000_VMOLR(vfn));
2820         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2821         vmolr |= size | E1000_VMOLR_LPE;
2822         wr32(E1000_VMOLR(vfn), vmolr);
2823
2824         return 0;
2825 }
2826
2827 /**
2828  * igb_rlpml_set - set maximum receive packet size
2829  * @adapter: board private structure
2830  *
2831  * Configure maximum receivable packet size.
2832  **/
2833 static void igb_rlpml_set(struct igb_adapter *adapter)
2834 {
2835         u32 max_frame_size = adapter->max_frame_size;
2836         struct e1000_hw *hw = &adapter->hw;
2837         u16 pf_id = adapter->vfs_allocated_count;
2838
2839         if (adapter->vlgrp)
2840                 max_frame_size += VLAN_TAG_SIZE;
2841
2842         /* if vfs are enabled we set RLPML to the largest possible request
2843          * size and set the VMOLR RLPML to the size we need */
2844         if (pf_id) {
2845                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2846                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2847         }
2848
2849         wr32(E1000_RLPML, max_frame_size);
2850 }
2851
2852 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2853                                  int vfn, bool aupe)
2854 {
2855         struct e1000_hw *hw = &adapter->hw;
2856         u32 vmolr;
2857
2858         /*
2859          * This register exists only on 82576 and newer so if we are older then
2860          * we should exit and do nothing
2861          */
2862         if (hw->mac.type < e1000_82576)
2863                 return;
2864
2865         vmolr = rd32(E1000_VMOLR(vfn));
2866         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2867         if (aupe)
2868                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
2869         else
2870                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2871
2872         /* clear all bits that might not be set */
2873         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2874
2875         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2876                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2877         /*
2878          * for VMDq only allow the VFs and pool 0 to accept broadcast and
2879          * multicast packets
2880          */
2881         if (vfn <= adapter->vfs_allocated_count)
2882                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
2883
2884         wr32(E1000_VMOLR(vfn), vmolr);
2885 }
2886
2887 /**
2888  * igb_configure_rx_ring - Configure a receive ring after Reset
2889  * @adapter: board private structure
2890  * @ring: receive ring to be configured
2891  *
2892  * Configure the Rx unit of the MAC after a reset.
2893  **/
2894 void igb_configure_rx_ring(struct igb_adapter *adapter,
2895                            struct igb_ring *ring)
2896 {
2897         struct e1000_hw *hw = &adapter->hw;
2898         u64 rdba = ring->dma;
2899         int reg_idx = ring->reg_idx;
2900         u32 srrctl, rxdctl;
2901
2902         /* disable the queue */
2903         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2904         wr32(E1000_RXDCTL(reg_idx),
2905                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2906
2907         /* Set DMA base address registers */
2908         wr32(E1000_RDBAL(reg_idx),
2909              rdba & 0x00000000ffffffffULL);
2910         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2911         wr32(E1000_RDLEN(reg_idx),
2912                        ring->count * sizeof(union e1000_adv_rx_desc));
2913
2914         /* initialize head and tail */
2915         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2916         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2917         writel(0, ring->head);
2918         writel(0, ring->tail);
2919
2920         /* set descriptor configuration */
2921         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2922                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2923                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2924 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2925                 srrctl |= IGB_RXBUFFER_16384 >>
2926                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2927 #else
2928                 srrctl |= (PAGE_SIZE / 2) >>
2929                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2930 #endif
2931                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2932         } else {
2933                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2934                          E1000_SRRCTL_BSIZEPKT_SHIFT;
2935                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2936         }
2937         if (hw->mac.type == e1000_82580)
2938                 srrctl |= E1000_SRRCTL_TIMESTAMP;
2939         /* Only set Drop Enable if we are supporting multiple queues */
2940         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
2941                 srrctl |= E1000_SRRCTL_DROP_EN;
2942
2943         wr32(E1000_SRRCTL(reg_idx), srrctl);
2944
2945         /* set filtering for VMDQ pools */
2946         igb_set_vmolr(adapter, reg_idx & 0x7, true);
2947
2948         /* enable receive descriptor fetching */
2949         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2950         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2951         rxdctl &= 0xFFF00000;
2952         rxdctl |= IGB_RX_PTHRESH;
2953         rxdctl |= IGB_RX_HTHRESH << 8;
2954         rxdctl |= IGB_RX_WTHRESH << 16;
2955         wr32(E1000_RXDCTL(reg_idx), rxdctl);
2956 }
2957
2958 /**
2959  * igb_configure_rx - Configure receive Unit after Reset
2960  * @adapter: board private structure
2961  *
2962  * Configure the Rx unit of the MAC after a reset.
2963  **/
2964 static void igb_configure_rx(struct igb_adapter *adapter)
2965 {
2966         int i;
2967
2968         /* set UTA to appropriate mode */
2969         igb_set_uta(adapter);
2970
2971         /* set the correct pool for the PF default MAC address in entry 0 */
2972         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2973                          adapter->vfs_allocated_count);
2974
2975         /* Setup the HW Rx Head and Tail Descriptor Pointers and
2976          * the Base and Length of the Rx Descriptor Ring */
2977         for (i = 0; i < adapter->num_rx_queues; i++)
2978                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
2979 }
2980
2981 /**
2982  * igb_free_tx_resources - Free Tx Resources per Queue
2983  * @tx_ring: Tx descriptor ring for a specific queue
2984  *
2985  * Free all transmit software resources
2986  **/
2987 void igb_free_tx_resources(struct igb_ring *tx_ring)
2988 {
2989         igb_clean_tx_ring(tx_ring);
2990
2991         vfree(tx_ring->buffer_info);
2992         tx_ring->buffer_info = NULL;
2993
2994         /* if not set, then don't free */
2995         if (!tx_ring->desc)
2996                 return;
2997
2998         dma_free_coherent(tx_ring->dev, tx_ring->size,
2999                           tx_ring->desc, tx_ring->dma);
3000
3001         tx_ring->desc = NULL;
3002 }
3003
3004 /**
3005  * igb_free_all_tx_resources - Free Tx Resources for All Queues
3006  * @adapter: board private structure
3007  *
3008  * Free all transmit software resources
3009  **/
3010 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3011 {
3012         int i;
3013
3014         for (i = 0; i < adapter->num_tx_queues; i++)
3015                 igb_free_tx_resources(adapter->tx_ring[i]);
3016 }
3017
3018 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
3019                                     struct igb_buffer *buffer_info)
3020 {
3021         if (buffer_info->dma) {
3022                 if (buffer_info->mapped_as_page)
3023                         dma_unmap_page(tx_ring->dev,
3024                                         buffer_info->dma,
3025                                         buffer_info->length,
3026                                         DMA_TO_DEVICE);
3027                 else
3028                         dma_unmap_single(tx_ring->dev,
3029                                         buffer_info->dma,
3030                                         buffer_info->length,
3031                                         DMA_TO_DEVICE);
3032                 buffer_info->dma = 0;
3033         }
3034         if (buffer_info->skb) {
3035                 dev_kfree_skb_any(buffer_info->skb);
3036                 buffer_info->skb = NULL;
3037         }
3038         buffer_info->time_stamp = 0;
3039         buffer_info->length = 0;
3040         buffer_info->next_to_watch = 0;
3041         buffer_info->mapped_as_page = false;
3042 }
3043
3044 /**
3045  * igb_clean_tx_ring - Free Tx Buffers
3046  * @tx_ring: ring to be cleaned
3047  **/
3048 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3049 {
3050         struct igb_buffer *buffer_info;
3051         unsigned long size;
3052         unsigned int i;
3053
3054         if (!tx_ring->buffer_info)
3055                 return;
3056         /* Free all the Tx ring sk_buffs */
3057
3058         for (i = 0; i < tx_ring->count; i++) {
3059                 buffer_info = &tx_ring->buffer_info[i];
3060                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3061         }
3062
3063         size = sizeof(struct igb_buffer) * tx_ring->count;
3064         memset(tx_ring->buffer_info, 0, size);
3065
3066         /* Zero out the descriptor ring */
3067         memset(tx_ring->desc, 0, tx_ring->size);
3068
3069         tx_ring->next_to_use = 0;
3070         tx_ring->next_to_clean = 0;
3071 }
3072
3073 /**
3074  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3075  * @adapter: board private structure
3076  **/
3077 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3078 {
3079         int i;
3080
3081         for (i = 0; i < adapter->num_tx_queues; i++)
3082                 igb_clean_tx_ring(adapter->tx_ring[i]);
3083 }
3084
3085 /**
3086  * igb_free_rx_resources - Free Rx Resources
3087  * @rx_ring: ring to clean the resources from
3088  *
3089  * Free all receive software resources
3090  **/
3091 void igb_free_rx_resources(struct igb_ring *rx_ring)
3092 {
3093         igb_clean_rx_ring(rx_ring);
3094
3095         vfree(rx_ring->buffer_info);
3096         rx_ring->buffer_info = NULL;
3097
3098         /* if not set, then don't free */
3099         if (!rx_ring->desc)
3100                 return;
3101
3102         dma_free_coherent(rx_ring->dev, rx_ring->size,
3103                           rx_ring->desc, rx_ring->dma);
3104
3105         rx_ring->desc = NULL;
3106 }
3107
3108 /**
3109  * igb_free_all_rx_resources - Free Rx Resources for All Queues
3110  * @adapter: board private structure
3111  *
3112  * Free all receive software resources
3113  **/
3114 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3115 {
3116         int i;
3117
3118         for (i = 0; i < adapter->num_rx_queues; i++)
3119                 igb_free_rx_resources(adapter->rx_ring[i]);
3120 }
3121
3122 /**
3123  * igb_clean_rx_ring - Free Rx Buffers per Queue
3124  * @rx_ring: ring to free buffers from
3125  **/
3126 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3127 {
3128         struct igb_buffer *buffer_info;
3129         unsigned long size;
3130         unsigned int i;
3131
3132         if (!rx_ring->buffer_info)
3133                 return;
3134
3135         /* Free all the Rx ring sk_buffs */
3136         for (i = 0; i < rx_ring->count; i++) {
3137                 buffer_info = &rx_ring->buffer_info[i];
3138                 if (buffer_info->dma) {
3139                         dma_unmap_single(rx_ring->dev,
3140                                          buffer_info->dma,
3141                                          rx_ring->rx_buffer_len,
3142                                          DMA_FROM_DEVICE);
3143                         buffer_info->dma = 0;
3144                 }
3145
3146                 if (buffer_info->skb) {
3147                         dev_kfree_skb(buffer_info->skb);
3148                         buffer_info->skb = NULL;
3149                 }
3150                 if (buffer_info->page_dma) {
3151                         dma_unmap_page(rx_ring->dev,
3152                                        buffer_info->page_dma,
3153                                        PAGE_SIZE / 2,
3154                                        DMA_FROM_DEVICE);
3155                         buffer_info->page_dma = 0;
3156                 }
3157                 if (buffer_info->page) {
3158                         put_page(buffer_info->page);
3159                         buffer_info->page = NULL;
3160                         buffer_info->page_offset = 0;
3161                 }
3162         }
3163
3164         size = sizeof(struct igb_buffer) * rx_ring->count;
3165         memset(rx_ring->buffer_info, 0, size);
3166
3167         /* Zero out the descriptor ring */
3168         memset(rx_ring->desc, 0, rx_ring->size);
3169
3170         rx_ring->next_to_clean = 0;
3171         rx_ring->next_to_use = 0;
3172 }
3173
3174 /**
3175  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3176  * @adapter: board private structure
3177  **/
3178 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3179 {
3180         int i;
3181
3182         for (i = 0; i < adapter->num_rx_queues; i++)
3183                 igb_clean_rx_ring(adapter->rx_ring[i]);
3184 }
3185
3186 /**
3187  * igb_set_mac - Change the Ethernet Address of the NIC
3188  * @netdev: network interface device structure
3189  * @p: pointer to an address structure
3190  *
3191  * Returns 0 on success, negative on failure
3192  **/
3193 static int igb_set_mac(struct net_device *netdev, void *p)
3194 {
3195         struct igb_adapter *adapter = netdev_priv(netdev);
3196         struct e1000_hw *hw = &adapter->hw;
3197         struct sockaddr *addr = p;
3198
3199         if (!is_valid_ether_addr(addr->sa_data))
3200                 return -EADDRNOTAVAIL;
3201
3202         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3203         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3204
3205         /* set the correct pool for the new PF MAC address in entry 0 */
3206         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3207                          adapter->vfs_allocated_count);
3208
3209         return 0;
3210 }
3211
3212 /**
3213  * igb_write_mc_addr_list - write multicast addresses to MTA
3214  * @netdev: network interface device structure
3215  *
3216  * Writes multicast address list to the MTA hash table.
3217  * Returns: -ENOMEM on failure
3218  *                0 on no addresses written
3219  *                X on writing X addresses to MTA
3220  **/
3221 static int igb_write_mc_addr_list(struct net_device *netdev)
3222 {
3223         struct igb_adapter *adapter = netdev_priv(netdev);
3224         struct e1000_hw *hw = &adapter->hw;
3225         struct netdev_hw_addr *ha;
3226         u8  *mta_list;
3227         int i;
3228
3229         if (netdev_mc_empty(netdev)) {
3230                 /* nothing to program, so clear mc list */
3231                 igb_update_mc_addr_list(hw, NULL, 0);
3232                 igb_restore_vf_multicasts(adapter);
3233                 return 0;
3234         }
3235
3236         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3237         if (!mta_list)
3238                 return -ENOMEM;
3239
3240         /* The shared function expects a packed array of only addresses. */
3241         i = 0;
3242         netdev_for_each_mc_addr(ha, netdev)
3243                 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3244
3245         igb_update_mc_addr_list(hw, mta_list, i);
3246         kfree(mta_list);
3247
3248         return netdev_mc_count(netdev);
3249 }
3250
3251 /**
3252  * igb_write_uc_addr_list - write unicast addresses to RAR table
3253  * @netdev: network interface device structure
3254  *
3255  * Writes unicast address list to the RAR table.
3256  * Returns: -ENOMEM on failure/insufficient address space
3257  *                0 on no addresses written
3258  *                X on writing X addresses to the RAR table
3259  **/
3260 static int igb_write_uc_addr_list(struct net_device *netdev)
3261 {
3262         struct igb_adapter *adapter = netdev_priv(netdev);
3263         struct e1000_hw *hw = &adapter->hw;
3264         unsigned int vfn = adapter->vfs_allocated_count;
3265         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3266         int count = 0;
3267
3268         /* return ENOMEM indicating insufficient memory for addresses */
3269         if (netdev_uc_count(netdev) > rar_entries)
3270                 return -ENOMEM;
3271
3272         if (!netdev_uc_empty(netdev) && rar_entries) {
3273                 struct netdev_hw_addr *ha;
3274
3275                 netdev_for_each_uc_addr(ha, netdev) {
3276                         if (!rar_entries)
3277                                 break;
3278                         igb_rar_set_qsel(adapter, ha->addr,
3279                                          rar_entries--,
3280                                          vfn);
3281                         count++;
3282                 }
3283         }
3284         /* write the addresses in reverse order to avoid write combining */
3285         for (; rar_entries > 0 ; rar_entries--) {
3286                 wr32(E1000_RAH(rar_entries), 0);
3287                 wr32(E1000_RAL(rar_entries), 0);
3288         }
3289         wrfl();
3290
3291         return count;
3292 }
3293
3294 /**
3295  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3296  * @netdev: network interface device structure
3297  *
3298  * The set_rx_mode entry point is called whenever the unicast or multicast
3299  * address lists or the network interface flags are updated.  This routine is
3300  * responsible for configuring the hardware for proper unicast, multicast,
3301  * promiscuous mode, and all-multi behavior.
3302  **/
3303 static void igb_set_rx_mode(struct net_device *netdev)
3304 {
3305         struct igb_adapter *adapter = netdev_priv(netdev);
3306         struct e1000_hw *hw = &adapter->hw;
3307         unsigned int vfn = adapter->vfs_allocated_count;
3308         u32 rctl, vmolr = 0;
3309         int count;
3310
3311         /* Check for Promiscuous and All Multicast modes */
3312         rctl = rd32(E1000_RCTL);
3313
3314         /* clear the effected bits */
3315         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3316
3317         if (netdev->flags & IFF_PROMISC) {
3318                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3319                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3320         } else {
3321                 if (netdev->flags & IFF_ALLMULTI) {
3322                         rctl |= E1000_RCTL_MPE;
3323                         vmolr |= E1000_VMOLR_MPME;
3324                 } else {
3325                         /*
3326                          * Write addresses to the MTA, if the attempt fails
3327                          * then we should just turn on promiscous mode so
3328                          * that we can at least receive multicast traffic
3329                          */
3330                         count = igb_write_mc_addr_list(netdev);
3331                         if (count < 0) {
3332                                 rctl |= E1000_RCTL_MPE;
3333                                 vmolr |= E1000_VMOLR_MPME;
3334                         } else if (count) {
3335                                 vmolr |= E1000_VMOLR_ROMPE;
3336                         }
3337                 }
3338                 /*
3339                  * Write addresses to available RAR registers, if there is not
3340                  * sufficient space to store all the addresses then enable
3341                  * unicast promiscous mode
3342                  */
3343                 count = igb_write_uc_addr_list(netdev);
3344                 if (count < 0) {
3345                         rctl |= E1000_RCTL_UPE;
3346                         vmolr |= E1000_VMOLR_ROPE;
3347                 }
3348                 rctl |= E1000_RCTL_VFE;
3349         }
3350         wr32(E1000_RCTL, rctl);
3351
3352         /*
3353          * In order to support SR-IOV and eventually VMDq it is necessary to set
3354          * the VMOLR to enable the appropriate modes.  Without this workaround
3355          * we will have issues with VLAN tag stripping not being done for frames
3356          * that are only arriving because we are the default pool
3357          */
3358         if (hw->mac.type < e1000_82576)
3359                 return;
3360
3361         vmolr |= rd32(E1000_VMOLR(vfn)) &
3362                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3363         wr32(E1000_VMOLR(vfn), vmolr);
3364         igb_restore_vf_multicasts(adapter);
3365 }
3366
3367 /* Need to wait a few seconds after link up to get diagnostic information from
3368  * the phy */
3369 static void igb_update_phy_info(unsigned long data)
3370 {
3371         struct igb_adapter *adapter = (struct igb_adapter *) data;
3372         igb_get_phy_info(&adapter->hw);
3373 }
3374
3375 /**
3376  * igb_has_link - check shared code for link and determine up/down
3377  * @adapter: pointer to driver private info
3378  **/
3379 bool igb_has_link(struct igb_adapter *adapter)
3380 {
3381         struct e1000_hw *hw = &adapter->hw;
3382         bool link_active = false;
3383         s32 ret_val = 0;
3384
3385         /* get_link_status is set on LSC (link status) interrupt or
3386          * rx sequence error interrupt.  get_link_status will stay
3387          * false until the e1000_check_for_link establishes link
3388          * for copper adapters ONLY
3389          */
3390         switch (hw->phy.media_type) {
3391         case e1000_media_type_copper:
3392                 if (hw->mac.get_link_status) {
3393                         ret_val = hw->mac.ops.check_for_link(hw);
3394                         link_active = !hw->mac.get_link_status;
3395                 } else {
3396                         link_active = true;
3397                 }
3398                 break;
3399         case e1000_media_type_internal_serdes:
3400                 ret_val = hw->mac.ops.check_for_link(hw);
3401                 link_active = hw->mac.serdes_has_link;
3402                 break;
3403         default:
3404         case e1000_media_type_unknown:
3405                 break;
3406         }
3407
3408         return link_active;
3409 }
3410
3411 /**
3412  * igb_watchdog - Timer Call-back
3413  * @data: pointer to adapter cast into an unsigned long
3414  **/
3415 static void igb_watchdog(unsigned long data)
3416 {
3417         struct igb_adapter *adapter = (struct igb_adapter *)data;
3418         /* Do the rest outside of interrupt context */
3419         schedule_work(&adapter->watchdog_task);
3420 }
3421
3422 static void igb_watchdog_task(struct work_struct *work)
3423 {
3424         struct igb_adapter *adapter = container_of(work,
3425                                                    struct igb_adapter,
3426                                                    watchdog_task);
3427         struct e1000_hw *hw = &adapter->hw;
3428         struct net_device *netdev = adapter->netdev;
3429         u32 link;
3430         int i;
3431
3432         link = igb_has_link(adapter);
3433         if (link) {
3434                 if (!netif_carrier_ok(netdev)) {
3435                         u32 ctrl;
3436                         hw->mac.ops.get_speed_and_duplex(hw,
3437                                                          &adapter->link_speed,
3438                                                          &adapter->link_duplex);
3439
3440                         ctrl = rd32(E1000_CTRL);
3441                         /* Links status message must follow this format */
3442                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3443                                  "Flow Control: %s\n",
3444                                netdev->name,
3445                                adapter->link_speed,
3446                                adapter->link_duplex == FULL_DUPLEX ?
3447                                  "Full Duplex" : "Half Duplex",
3448                                ((ctrl & E1000_CTRL_TFCE) &&
3449                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3450                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3451                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3452
3453                         /* adjust timeout factor according to speed/duplex */
3454                         adapter->tx_timeout_factor = 1;
3455                         switch (adapter->link_speed) {
3456                         case SPEED_10:
3457                                 adapter->tx_timeout_factor = 14;
3458                                 break;
3459                         case SPEED_100:
3460                                 /* maybe add some timeout factor ? */
3461                                 break;
3462                         }
3463
3464                         netif_carrier_on(netdev);
3465
3466                         igb_ping_all_vfs(adapter);
3467
3468                         /* link state has changed, schedule phy info update */
3469                         if (!test_bit(__IGB_DOWN, &adapter->state))
3470                                 mod_timer(&adapter->phy_info_timer,
3471                                           round_jiffies(jiffies + 2 * HZ));
3472                 }
3473         } else {
3474                 if (netif_carrier_ok(netdev)) {
3475                         adapter->link_speed = 0;
3476                         adapter->link_duplex = 0;
3477                         /* Links status message must follow this format */
3478                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3479                                netdev->name);
3480                         netif_carrier_off(netdev);
3481
3482                         igb_ping_all_vfs(adapter);
3483
3484                         /* link state has changed, schedule phy info update */
3485                         if (!test_bit(__IGB_DOWN, &adapter->state))
3486                                 mod_timer(&adapter->phy_info_timer,
3487                                           round_jiffies(jiffies + 2 * HZ));
3488                 }
3489         }
3490
3491         spin_lock(&adapter->stats64_lock);
3492         igb_update_stats(adapter, &adapter->stats64);
3493         spin_unlock(&adapter->stats64_lock);
3494
3495         for (i = 0; i < adapter->num_tx_queues; i++) {
3496                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3497                 if (!netif_carrier_ok(netdev)) {
3498                         /* We've lost link, so the controller stops DMA,
3499                          * but we've got queued Tx work that's never going
3500                          * to get done, so reset controller to flush Tx.
3501                          * (Do the reset outside of interrupt context). */
3502                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3503                                 adapter->tx_timeout_count++;
3504                                 schedule_work(&adapter->reset_task);
3505                                 /* return immediately since reset is imminent */
3506                                 return;
3507                         }
3508                 }
3509
3510                 /* Force detection of hung controller every watchdog period */
3511                 tx_ring->detect_tx_hung = true;
3512         }
3513
3514         /* Cause software interrupt to ensure rx ring is cleaned */
3515         if (adapter->msix_entries) {
3516                 u32 eics = 0;
3517                 for (i = 0; i < adapter->num_q_vectors; i++) {
3518                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3519                         eics |= q_vector->eims_value;
3520                 }
3521                 wr32(E1000_EICS, eics);
3522         } else {
3523                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3524         }
3525
3526         /* Reset the timer */
3527         if (!test_bit(__IGB_DOWN, &adapter->state))
3528                 mod_timer(&adapter->watchdog_timer,
3529                           round_jiffies(jiffies + 2 * HZ));
3530 }
3531
3532 enum latency_range {
3533         lowest_latency = 0,
3534         low_latency = 1,
3535         bulk_latency = 2,
3536         latency_invalid = 255
3537 };
3538
3539 /**
3540  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3541  *
3542  *      Stores a new ITR value based on strictly on packet size.  This
3543  *      algorithm is less sophisticated than that used in igb_update_itr,
3544  *      due to the difficulty of synchronizing statistics across multiple
3545  *      receive rings.  The divisors and thresholds used by this function
3546  *      were determined based on theoretical maximum wire speed and testing
3547  *      data, in order to minimize response time while increasing bulk
3548  *      throughput.
3549  *      This functionality is controlled by the InterruptThrottleRate module
3550  *      parameter (see igb_param.c)
3551  *      NOTE:  This function is called only when operating in a multiqueue
3552  *             receive environment.
3553  * @q_vector: pointer to q_vector
3554  **/
3555 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3556 {
3557         int new_val = q_vector->itr_val;
3558         int avg_wire_size = 0;
3559         struct igb_adapter *adapter = q_vector->adapter;
3560         struct igb_ring *ring;
3561         unsigned int packets;
3562
3563         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3564          * ints/sec - ITR timer value of 120 ticks.
3565          */
3566         if (adapter->link_speed != SPEED_1000) {
3567                 new_val = 976;
3568                 goto set_itr_val;
3569         }
3570
3571         ring = q_vector->rx_ring;
3572         if (ring) {
3573                 packets = ACCESS_ONCE(ring->total_packets);
3574
3575                 if (packets)
3576                         avg_wire_size = ring->total_bytes / packets;
3577         }
3578
3579         ring = q_vector->tx_ring;
3580         if (ring) {
3581                 packets = ACCESS_ONCE(ring->total_packets);
3582
3583                 if (packets)
3584                         avg_wire_size = max_t(u32, avg_wire_size,
3585                                               ring->total_bytes / packets);
3586         }
3587
3588         /* if avg_wire_size isn't set no work was done */
3589         if (!avg_wire_size)
3590                 goto clear_counts;
3591
3592         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3593         avg_wire_size += 24;
3594
3595         /* Don't starve jumbo frames */
3596         avg_wire_size = min(avg_wire_size, 3000);
3597
3598         /* Give a little boost to mid-size frames */
3599         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3600                 new_val = avg_wire_size / 3;
3601         else
3602                 new_val = avg_wire_size / 2;
3603
3604         /* when in itr mode 3 do not exceed 20K ints/sec */
3605         if (adapter->rx_itr_setting == 3 && new_val < 196)
3606                 new_val = 196;
3607
3608 set_itr_val:
3609         if (new_val != q_vector->itr_val) {
3610                 q_vector->itr_val = new_val;
3611                 q_vector->set_itr = 1;
3612         }
3613 clear_counts:
3614         if (q_vector->rx_ring) {
3615                 q_vector->rx_ring->total_bytes = 0;
3616                 q_vector->rx_ring->total_packets = 0;
3617         }
3618         if (q_vector->tx_ring) {
3619                 q_vector->tx_ring->total_bytes = 0;
3620                 q_vector->tx_ring->total_packets = 0;
3621         }
3622 }
3623
3624 /**
3625  * igb_update_itr - update the dynamic ITR value based on statistics
3626  *      Stores a new ITR value based on packets and byte
3627  *      counts during the last interrupt.  The advantage of per interrupt
3628  *      computation is faster updates and more accurate ITR for the current
3629  *      traffic pattern.  Constants in this function were computed
3630  *      based on theoretical maximum wire speed and thresholds were set based
3631  *      on testing data as well as attempting to minimize response time
3632  *      while increasing bulk throughput.
3633  *      this functionality is controlled by the InterruptThrottleRate module
3634  *      parameter (see igb_param.c)
3635  *      NOTE:  These calculations are only valid when operating in a single-
3636  *             queue environment.
3637  * @adapter: pointer to adapter
3638  * @itr_setting: current q_vector->itr_val
3639  * @packets: the number of packets during this measurement interval
3640  * @bytes: the number of bytes during this measurement interval
3641  **/
3642 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3643                                    int packets, int bytes)
3644 {
3645         unsigned int retval = itr_setting;
3646
3647         if (packets == 0)
3648                 goto update_itr_done;
3649
3650         switch (itr_setting) {
3651         case lowest_latency:
3652                 /* handle TSO and jumbo frames */
3653                 if (bytes/packets > 8000)
3654                         retval = bulk_latency;
3655                 else if ((packets < 5) && (bytes > 512))
3656                         retval = low_latency;
3657                 break;
3658         case low_latency:  /* 50 usec aka 20000 ints/s */
3659                 if (bytes > 10000) {
3660                         /* this if handles the TSO accounting */
3661                         if (bytes/packets > 8000) {
3662                                 retval = bulk_latency;
3663                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3664                                 retval = bulk_latency;
3665                         } else if ((packets > 35)) {
3666                                 retval = lowest_latency;
3667                         }
3668                 } else if (bytes/packets > 2000) {
3669                         retval = bulk_latency;
3670                 } else if (packets <= 2 && bytes < 512) {
3671                         retval = lowest_latency;
3672                 }
3673                 break;
3674         case bulk_latency: /* 250 usec aka 4000 ints/s */
3675                 if (bytes > 25000) {
3676                         if (packets > 35)
3677                                 retval = low_latency;
3678                 } else if (bytes < 1500) {
3679                         retval = low_latency;
3680                 }
3681                 break;
3682         }
3683
3684 update_itr_done:
3685         return retval;
3686 }
3687
3688 static void igb_set_itr(struct igb_adapter *adapter)
3689 {
3690         struct igb_q_vector *q_vector = adapter->q_vector[0];
3691         u16 current_itr;
3692         u32 new_itr = q_vector->itr_val;
3693
3694         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3695         if (adapter->link_speed != SPEED_1000) {
3696                 current_itr = 0;
3697                 new_itr = 4000;
3698                 goto set_itr_now;
3699         }
3700
3701         adapter->rx_itr = igb_update_itr(adapter,
3702                                     adapter->rx_itr,
3703                                     q_vector->rx_ring->total_packets,
3704                                     q_vector->rx_ring->total_bytes);
3705
3706         adapter->tx_itr = igb_update_itr(adapter,
3707                                     adapter->tx_itr,
3708                                     q_vector->tx_ring->total_packets,
3709                                     q_vector->tx_ring->total_bytes);
3710         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3711
3712         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3713         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3714                 current_itr = low_latency;
3715
3716         switch (current_itr) {
3717         /* counts and packets in update_itr are dependent on these numbers */
3718         case lowest_latency:
3719                 new_itr = 56;  /* aka 70,000 ints/sec */
3720                 break;
3721         case low_latency:
3722                 new_itr = 196; /* aka 20,000 ints/sec */
3723                 break;
3724         case bulk_latency:
3725                 new_itr = 980; /* aka 4,000 ints/sec */
3726                 break;
3727         default:
3728                 break;
3729         }
3730
3731 set_itr_now:
3732         q_vector->rx_ring->total_bytes = 0;
3733         q_vector->rx_ring->total_packets = 0;
3734         q_vector->tx_ring->total_bytes = 0;
3735         q_vector->tx_ring->total_packets = 0;
3736
3737         if (new_itr != q_vector->itr_val) {
3738                 /* this attempts to bias the interrupt rate towards Bulk
3739                  * by adding intermediate steps when interrupt rate is
3740                  * increasing */
3741                 new_itr = new_itr > q_vector->itr_val ?
3742                              max((new_itr * q_vector->itr_val) /
3743                                  (new_itr + (q_vector->itr_val >> 2)),
3744                                  new_itr) :
3745                              new_itr;
3746                 /* Don't write the value here; it resets the adapter's
3747                  * internal timer, and causes us to delay far longer than
3748                  * we should between interrupts.  Instead, we write the ITR
3749                  * value at the beginning of the next interrupt so the timing
3750                  * ends up being correct.
3751                  */
3752                 q_vector->itr_val = new_itr;
3753                 q_vector->set_itr = 1;
3754         }
3755 }
3756
3757 #define IGB_TX_FLAGS_CSUM               0x00000001
3758 #define IGB_TX_FLAGS_VLAN               0x00000002
3759 #define IGB_TX_FLAGS_TSO                0x00000004
3760 #define IGB_TX_FLAGS_IPV4               0x00000008
3761 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3762 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3763 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3764
3765 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3766                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3767 {
3768         struct e1000_adv_tx_context_desc *context_desc;
3769         unsigned int i;
3770         int err;
3771         struct igb_buffer *buffer_info;
3772         u32 info = 0, tu_cmd = 0;
3773         u32 mss_l4len_idx;
3774         u8 l4len;
3775
3776         if (skb_header_cloned(skb)) {
3777                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3778                 if (err)
3779                         return err;
3780         }
3781
3782         l4len = tcp_hdrlen(skb);
3783         *hdr_len += l4len;
3784
3785         if (skb->protocol == htons(ETH_P_IP)) {
3786                 struct iphdr *iph = ip_hdr(skb);
3787                 iph->tot_len = 0;
3788                 iph->check = 0;
3789                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3790                                                          iph->daddr, 0,
3791                                                          IPPROTO_TCP,
3792                                                          0);
3793         } else if (skb_is_gso_v6(skb)) {
3794                 ipv6_hdr(skb)->payload_len = 0;
3795                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3796                                                        &ipv6_hdr(skb)->daddr,
3797                                                        0, IPPROTO_TCP, 0);
3798         }
3799
3800         i = tx_ring->next_to_use;
3801
3802         buffer_info = &tx_ring->buffer_info[i];
3803         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3804         /* VLAN MACLEN IPLEN */
3805         if (tx_flags & IGB_TX_FLAGS_VLAN)
3806                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3807         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3808         *hdr_len += skb_network_offset(skb);
3809         info |= skb_network_header_len(skb);
3810         *hdr_len += skb_network_header_len(skb);
3811         context_desc->vlan_macip_lens = cpu_to_le32(info);
3812
3813         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3814         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3815
3816         if (skb->protocol == htons(ETH_P_IP))
3817                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3818         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3819
3820         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3821
3822         /* MSS L4LEN IDX */
3823         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3824         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3825
3826         /* For 82575, context index must be unique per ring. */
3827         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3828                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3829
3830         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3831         context_desc->seqnum_seed = 0;
3832
3833         buffer_info->time_stamp = jiffies;
3834         buffer_info->next_to_watch = i;
3835         buffer_info->dma = 0;
3836         i++;
3837         if (i == tx_ring->count)
3838                 i = 0;
3839
3840         tx_ring->next_to_use = i;
3841
3842         return true;
3843 }
3844
3845 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3846                                    struct sk_buff *skb, u32 tx_flags)
3847 {
3848         struct e1000_adv_tx_context_desc *context_desc;
3849         struct device *dev = tx_ring->dev;
3850         struct igb_buffer *buffer_info;
3851         u32 info = 0, tu_cmd = 0;
3852         unsigned int i;
3853
3854         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3855             (tx_flags & IGB_TX_FLAGS_VLAN)) {
3856                 i = tx_ring->next_to_use;
3857                 buffer_info = &tx_ring->buffer_info[i];
3858                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3859
3860                 if (tx_flags & IGB_TX_FLAGS_VLAN)
3861                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3862
3863                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3864                 if (skb->ip_summed == CHECKSUM_PARTIAL)
3865                         info |= skb_network_header_len(skb);
3866
3867                 context_desc->vlan_macip_lens = cpu_to_le32(info);
3868
3869                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3870
3871                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3872                         __be16 protocol;
3873
3874                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3875                                 const struct vlan_ethhdr *vhdr =
3876                                           (const struct vlan_ethhdr*)skb->data;
3877
3878                                 protocol = vhdr->h_vlan_encapsulated_proto;
3879                         } else {
3880                                 protocol = skb->protocol;
3881                         }
3882
3883                         switch (protocol) {
3884                         case cpu_to_be16(ETH_P_IP):
3885                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3886                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3887                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3888                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3889                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3890                                 break;
3891                         case cpu_to_be16(ETH_P_IPV6):
3892                                 /* XXX what about other V6 headers?? */
3893                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3894                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3895                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3896                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3897                                 break;
3898                         default:
3899                                 if (unlikely(net_ratelimit()))
3900                                         dev_warn(dev,
3901                                             "partial checksum but proto=%x!\n",
3902                                             skb->protocol);
3903                                 break;
3904                         }
3905                 }
3906
3907                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3908                 context_desc->seqnum_seed = 0;
3909                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3910                         context_desc->mss_l4len_idx =
3911                                 cpu_to_le32(tx_ring->reg_idx << 4);
3912
3913                 buffer_info->time_stamp = jiffies;
3914                 buffer_info->next_to_watch = i;
3915                 buffer_info->dma = 0;
3916
3917                 i++;
3918                 if (i == tx_ring->count)
3919                         i = 0;
3920                 tx_ring->next_to_use = i;
3921
3922                 return true;
3923         }
3924         return false;
3925 }
3926
3927 #define IGB_MAX_TXD_PWR 16
3928 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
3929
3930 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3931                                  unsigned int first)
3932 {
3933         struct igb_buffer *buffer_info;
3934         struct device *dev = tx_ring->dev;
3935         unsigned int hlen = skb_headlen(skb);
3936         unsigned int count = 0, i;
3937         unsigned int f;
3938         u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
3939
3940         i = tx_ring->next_to_use;
3941
3942         buffer_info = &tx_ring->buffer_info[i];
3943         BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD);
3944         buffer_info->length = hlen;
3945         /* set time_stamp *before* dma to help avoid a possible race */
3946         buffer_info->time_stamp = jiffies;
3947         buffer_info->next_to_watch = i;
3948         buffer_info->dma = dma_map_single(dev, skb->data, hlen,
3949                                           DMA_TO_DEVICE);
3950         if (dma_mapping_error(dev, buffer_info->dma))
3951                 goto dma_error;
3952
3953         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3954                 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f];
3955                 unsigned int len = frag->size;
3956
3957                 count++;
3958                 i++;
3959                 if (i == tx_ring->count)
3960                         i = 0;
3961
3962                 buffer_info = &tx_ring->buffer_info[i];
3963                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3964                 buffer_info->length = len;
3965                 buffer_info->time_stamp = jiffies;
3966                 buffer_info->next_to_watch = i;
3967                 buffer_info->mapped_as_page = true;
3968                 buffer_info->dma = dma_map_page(dev,
3969                                                 frag->page,
3970                                                 frag->page_offset,
3971                                                 len,
3972                                                 DMA_TO_DEVICE);
3973                 if (dma_mapping_error(dev, buffer_info->dma))
3974                         goto dma_error;
3975
3976         }
3977
3978         tx_ring->buffer_info[i].skb = skb;
3979         tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags;
3980         /* multiply data chunks by size of headers */
3981         tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len;
3982         tx_ring->buffer_info[i].gso_segs = gso_segs;
3983         tx_ring->buffer_info[first].next_to_watch = i;
3984
3985         return ++count;
3986
3987 dma_error:
3988         dev_err(dev, "TX DMA map failed\n");
3989
3990         /* clear timestamp and dma mappings for failed buffer_info mapping */
3991         buffer_info->dma = 0;
3992         buffer_info->time_stamp = 0;
3993         buffer_info->length = 0;
3994         buffer_info->next_to_watch = 0;
3995         buffer_info->mapped_as_page = false;
3996
3997         /* clear timestamp and dma mappings for remaining portion of packet */
3998         while (count--) {
3999                 if (i == 0)
4000                         i = tx_ring->count;
4001                 i--;
4002                 buffer_info = &tx_ring->buffer_info[i];
4003                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4004         }
4005
4006         return 0;
4007 }
4008
4009 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
4010                                     u32 tx_flags, int count, u32 paylen,
4011                                     u8 hdr_len)
4012 {
4013         union e1000_adv_tx_desc *tx_desc;
4014         struct igb_buffer *buffer_info;
4015         u32 olinfo_status = 0, cmd_type_len;
4016         unsigned int i = tx_ring->next_to_use;
4017
4018         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
4019                         E1000_ADVTXD_DCMD_DEXT);
4020
4021         if (tx_flags & IGB_TX_FLAGS_VLAN)
4022                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
4023
4024         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4025                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
4026
4027         if (tx_flags & IGB_TX_FLAGS_TSO) {
4028                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
4029
4030                 /* insert tcp checksum */
4031                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4032
4033                 /* insert ip checksum */
4034                 if (tx_flags & IGB_TX_FLAGS_IPV4)
4035                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4036
4037         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
4038                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4039         }
4040
4041         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
4042             (tx_flags & (IGB_TX_FLAGS_CSUM |
4043                          IGB_TX_FLAGS_TSO |
4044                          IGB_TX_FLAGS_VLAN)))
4045                 olinfo_status |= tx_ring->reg_idx << 4;
4046
4047         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
4048
4049         do {
4050                 buffer_info = &tx_ring->buffer_info[i];
4051                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4052                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4053                 tx_desc->read.cmd_type_len =
4054                         cpu_to_le32(cmd_type_len | buffer_info->length);
4055                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4056                 count--;
4057                 i++;
4058                 if (i == tx_ring->count)
4059                         i = 0;
4060         } while (count > 0);
4061
4062         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
4063         /* Force memory writes to complete before letting h/w
4064          * know there are new descriptors to fetch.  (Only
4065          * applicable for weak-ordered memory model archs,
4066          * such as IA-64). */
4067         wmb();
4068
4069         tx_ring->next_to_use = i;
4070         writel(i, tx_ring->tail);
4071         /* we need this if more than one processor can write to our tail
4072          * at a time, it syncronizes IO on IA64/Altix systems */
4073         mmiowb();
4074 }
4075
4076 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4077 {
4078         struct net_device *netdev = tx_ring->netdev;
4079
4080         netif_stop_subqueue(netdev, tx_ring->queue_index);
4081
4082         /* Herbert's original patch had:
4083          *  smp_mb__after_netif_stop_queue();
4084          * but since that doesn't exist yet, just open code it. */
4085         smp_mb();
4086
4087         /* We need to check again in a case another CPU has just
4088          * made room available. */
4089         if (igb_desc_unused(tx_ring) < size)
4090                 return -EBUSY;
4091
4092         /* A reprieve! */
4093         netif_wake_subqueue(netdev, tx_ring->queue_index);
4094
4095         u64_stats_update_begin(&tx_ring->tx_syncp2);
4096         tx_ring->tx_stats.restart_queue2++;
4097         u64_stats_update_end(&tx_ring->tx_syncp2);
4098
4099         return 0;
4100 }
4101
4102 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4103 {
4104         if (igb_desc_unused(tx_ring) >= size)
4105                 return 0;
4106         return __igb_maybe_stop_tx(tx_ring, size);
4107 }
4108
4109 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
4110                                     struct igb_ring *tx_ring)
4111 {
4112         int tso = 0, count;
4113         u32 tx_flags = 0;
4114         u16 first;
4115         u8 hdr_len = 0;
4116
4117         /* need: 1 descriptor per page,
4118          *       + 2 desc gap to keep tail from touching head,
4119          *       + 1 desc for skb->data,
4120          *       + 1 desc for context descriptor,
4121          * otherwise try next time */
4122         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4123                 /* this is a hard error */
4124                 return NETDEV_TX_BUSY;
4125         }
4126
4127         if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4128                 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4129                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4130         }
4131
4132         if (vlan_tx_tag_present(skb)) {
4133                 tx_flags |= IGB_TX_FLAGS_VLAN;
4134                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4135         }
4136
4137         if (skb->protocol == htons(ETH_P_IP))
4138                 tx_flags |= IGB_TX_FLAGS_IPV4;
4139
4140         first = tx_ring->next_to_use;
4141         if (skb_is_gso(skb)) {
4142                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
4143
4144                 if (tso < 0) {
4145                         dev_kfree_skb_any(skb);
4146                         return NETDEV_TX_OK;
4147                 }
4148         }
4149
4150         if (tso)
4151                 tx_flags |= IGB_TX_FLAGS_TSO;
4152         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
4153                  (skb->ip_summed == CHECKSUM_PARTIAL))
4154                 tx_flags |= IGB_TX_FLAGS_CSUM;
4155
4156         /*
4157          * count reflects descriptors mapped, if 0 or less then mapping error
4158          * has occured and we need to rewind the descriptor queue
4159          */
4160         count = igb_tx_map_adv(tx_ring, skb, first);
4161         if (!count) {
4162                 dev_kfree_skb_any(skb);
4163                 tx_ring->buffer_info[first].time_stamp = 0;
4164                 tx_ring->next_to_use = first;
4165                 return NETDEV_TX_OK;
4166         }
4167
4168         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
4169
4170         /* Make sure there is space in the ring for the next send. */
4171         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4172
4173         return NETDEV_TX_OK;
4174 }
4175
4176 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
4177                                       struct net_device *netdev)
4178 {
4179         struct igb_adapter *adapter = netdev_priv(netdev);
4180         struct igb_ring *tx_ring;
4181         int r_idx = 0;
4182
4183         if (test_bit(__IGB_DOWN, &adapter->state)) {
4184                 dev_kfree_skb_any(skb);
4185                 return NETDEV_TX_OK;
4186         }
4187
4188         if (skb->len <= 0) {
4189                 dev_kfree_skb_any(skb);
4190                 return NETDEV_TX_OK;
4191         }
4192
4193         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
4194         tx_ring = adapter->multi_tx_table[r_idx];
4195
4196         /* This goes back to the question of how to logically map a tx queue
4197          * to a flow.  Right now, performance is impacted slightly negatively
4198          * if using multiple tx queues.  If the stack breaks away from a
4199          * single qdisc implementation, we can look at this again. */
4200         return igb_xmit_frame_ring_adv(skb, tx_ring);
4201 }
4202
4203 /**
4204  * igb_tx_timeout - Respond to a Tx Hang
4205  * @netdev: network interface device structure
4206  **/
4207 static void igb_tx_timeout(struct net_device *netdev)
4208 {
4209         struct igb_adapter *adapter = netdev_priv(netdev);
4210         struct e1000_hw *hw = &adapter->hw;
4211
4212         /* Do the reset outside of interrupt context */
4213         adapter->tx_timeout_count++;
4214
4215         if (hw->mac.type == e1000_82580)
4216                 hw->dev_spec._82575.global_device_reset = true;
4217
4218         schedule_work(&adapter->reset_task);
4219         wr32(E1000_EICS,
4220              (adapter->eims_enable_mask & ~adapter->eims_other));
4221 }
4222
4223 static void igb_reset_task(struct work_struct *work)
4224 {
4225         struct igb_adapter *adapter;
4226         adapter = container_of(work, struct igb_adapter, reset_task);
4227
4228         igb_dump(adapter);
4229         netdev_err(adapter->netdev, "Reset adapter\n");
4230         igb_reinit_locked(adapter);
4231 }
4232
4233 /**
4234  * igb_get_stats64 - Get System Network Statistics
4235  * @netdev: network interface device structure
4236  * @stats: rtnl_link_stats64 pointer
4237  *
4238  **/
4239 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4240                                                  struct rtnl_link_stats64 *stats)
4241 {
4242         struct igb_adapter *adapter = netdev_priv(netdev);
4243
4244         spin_lock(&adapter->stats64_lock);
4245         igb_update_stats(adapter, &adapter->stats64);
4246         memcpy(stats, &adapter->stats64, sizeof(*stats));
4247         spin_unlock(&adapter->stats64_lock);
4248
4249         return stats;
4250 }
4251
4252 /**
4253  * igb_change_mtu - Change the Maximum Transfer Unit
4254  * @netdev: network interface device structure
4255  * @new_mtu: new value for maximum frame size
4256  *
4257  * Returns 0 on success, negative on failure
4258  **/
4259 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4260 {
4261         struct igb_adapter *adapter = netdev_priv(netdev);
4262         struct pci_dev *pdev = adapter->pdev;
4263         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
4264         u32 rx_buffer_len, i;
4265
4266         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4267                 dev_err(&pdev->dev, "Invalid MTU setting\n");
4268                 return -EINVAL;
4269         }
4270
4271         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4272                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4273                 return -EINVAL;
4274         }
4275
4276         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4277                 msleep(1);
4278
4279         /* igb_down has a dependency on max_frame_size */
4280         adapter->max_frame_size = max_frame;
4281
4282         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
4283          * means we reserve 2 more, this pushes us to allocate from the next
4284          * larger slab size.
4285          * i.e. RXBUFFER_2048 --> size-4096 slab
4286          */
4287
4288         if (adapter->hw.mac.type == e1000_82580)
4289                 max_frame += IGB_TS_HDR_LEN;
4290
4291         if (max_frame <= IGB_RXBUFFER_1024)
4292                 rx_buffer_len = IGB_RXBUFFER_1024;
4293         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
4294                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
4295         else
4296                 rx_buffer_len = IGB_RXBUFFER_128;
4297
4298         if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
4299              (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
4300                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
4301
4302         if ((adapter->hw.mac.type == e1000_82580) &&
4303             (rx_buffer_len == IGB_RXBUFFER_128))
4304                 rx_buffer_len += IGB_RXBUFFER_64;
4305
4306         if (netif_running(netdev))
4307                 igb_down(adapter);
4308
4309         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4310                  netdev->mtu, new_mtu);
4311         netdev->mtu = new_mtu;
4312
4313         for (i = 0; i < adapter->num_rx_queues; i++)
4314                 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
4315
4316         if (netif_running(netdev))
4317                 igb_up(adapter);
4318         else
4319                 igb_reset(adapter);
4320
4321         clear_bit(__IGB_RESETTING, &adapter->state);
4322
4323         return 0;
4324 }
4325
4326 /**
4327  * igb_update_stats - Update the board statistics counters
4328  * @adapter: board private structure
4329  **/
4330
4331 void igb_update_stats(struct igb_adapter *adapter,
4332                       struct rtnl_link_stats64 *net_stats)
4333 {
4334         struct e1000_hw *hw = &adapter->hw;
4335         struct pci_dev *pdev = adapter->pdev;
4336         u32 reg, mpc;
4337         u16 phy_tmp;
4338         int i;
4339         u64 bytes, packets;
4340         unsigned int start;
4341         u64 _bytes, _packets;
4342
4343 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4344
4345         /*
4346          * Prevent stats update while adapter is being reset, or if the pci
4347          * connection is down.
4348          */
4349         if (adapter->link_speed == 0)
4350                 return;
4351         if (pci_channel_offline(pdev))
4352                 return;
4353
4354         bytes = 0;
4355         packets = 0;
4356         for (i = 0; i < adapter->num_rx_queues; i++) {
4357                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4358                 struct igb_ring *ring = adapter->rx_ring[i];
4359
4360                 ring->rx_stats.drops += rqdpc_tmp;
4361                 net_stats->rx_fifo_errors += rqdpc_tmp;
4362
4363                 do {
4364                         start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4365                         _bytes = ring->rx_stats.bytes;
4366                         _packets = ring->rx_stats.packets;
4367                 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4368                 bytes += _bytes;
4369                 packets += _packets;
4370         }
4371
4372         net_stats->rx_bytes = bytes;
4373         net_stats->rx_packets = packets;
4374
4375         bytes = 0;
4376         packets = 0;
4377         for (i = 0; i < adapter->num_tx_queues; i++) {
4378                 struct igb_ring *ring = adapter->tx_ring[i];
4379                 do {
4380                         start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4381                         _bytes = ring->tx_stats.bytes;
4382                         _packets = ring->tx_stats.packets;
4383                 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4384                 bytes += _bytes;
4385                 packets += _packets;
4386         }
4387         net_stats->tx_bytes = bytes;
4388         net_stats->tx_packets = packets;
4389
4390         /* read stats registers */
4391         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4392         adapter->stats.gprc += rd32(E1000_GPRC);
4393         adapter->stats.gorc += rd32(E1000_GORCL);
4394         rd32(E1000_GORCH); /* clear GORCL */
4395         adapter->stats.bprc += rd32(E1000_BPRC);
4396         adapter->stats.mprc += rd32(E1000_MPRC);
4397         adapter->stats.roc += rd32(E1000_ROC);
4398
4399         adapter->stats.prc64 += rd32(E1000_PRC64);
4400         adapter->stats.prc127 += rd32(E1000_PRC127);
4401         adapter->stats.prc255 += rd32(E1000_PRC255);
4402         adapter->stats.prc511 += rd32(E1000_PRC511);
4403         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4404         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4405         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4406         adapter->stats.sec += rd32(E1000_SEC);
4407
4408         mpc = rd32(E1000_MPC);
4409         adapter->stats.mpc += mpc;
4410         net_stats->rx_fifo_errors += mpc;
4411         adapter->stats.scc += rd32(E1000_SCC);
4412         adapter->stats.ecol += rd32(E1000_ECOL);
4413         adapter->stats.mcc += rd32(E1000_MCC);
4414         adapter->stats.latecol += rd32(E1000_LATECOL);
4415         adapter->stats.dc += rd32(E1000_DC);
4416         adapter->stats.rlec += rd32(E1000_RLEC);
4417         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4418         adapter->stats.xontxc += rd32(E1000_XONTXC);
4419         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4420         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4421         adapter->stats.fcruc += rd32(E1000_FCRUC);
4422         adapter->stats.gptc += rd32(E1000_GPTC);
4423         adapter->stats.gotc += rd32(E1000_GOTCL);
4424         rd32(E1000_GOTCH); /* clear GOTCL */
4425         adapter->stats.rnbc += rd32(E1000_RNBC);
4426         adapter->stats.ruc += rd32(E1000_RUC);
4427         adapter->stats.rfc += rd32(E1000_RFC);
4428         adapter->stats.rjc += rd32(E1000_RJC);
4429         adapter->stats.tor += rd32(E1000_TORH);
4430         adapter->stats.tot += rd32(E1000_TOTH);
4431         adapter->stats.tpr += rd32(E1000_TPR);
4432
4433         adapter->stats.ptc64 += rd32(E1000_PTC64);
4434         adapter->stats.ptc127 += rd32(E1000_PTC127);
4435         adapter->stats.ptc255 += rd32(E1000_PTC255);
4436         adapter->stats.ptc511 += rd32(E1000_PTC511);
4437         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4438         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4439
4440         adapter->stats.mptc += rd32(E1000_MPTC);
4441         adapter->stats.bptc += rd32(E1000_BPTC);
4442
4443         adapter->stats.tpt += rd32(E1000_TPT);
4444         adapter->stats.colc += rd32(E1000_COLC);
4445
4446         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4447         /* read internal phy specific stats */
4448         reg = rd32(E1000_CTRL_EXT);
4449         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4450                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4451                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4452         }
4453
4454         adapter->stats.tsctc += rd32(E1000_TSCTC);
4455         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4456
4457         adapter->stats.iac += rd32(E1000_IAC);
4458         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4459         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4460         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4461         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4462         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4463         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4464         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4465         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4466
4467         /* Fill out the OS statistics structure */
4468         net_stats->multicast = adapter->stats.mprc;
4469         net_stats->collisions = adapter->stats.colc;
4470
4471         /* Rx Errors */
4472
4473         /* RLEC on some newer hardware can be incorrect so build
4474          * our own version based on RUC and ROC */
4475         net_stats->rx_errors = adapter->stats.rxerrc +
4476                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4477                 adapter->stats.ruc + adapter->stats.roc +
4478                 adapter->stats.cexterr;
4479         net_stats->rx_length_errors = adapter->stats.ruc +
4480                                       adapter->stats.roc;
4481         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4482         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4483         net_stats->rx_missed_errors = adapter->stats.mpc;
4484
4485         /* Tx Errors */
4486         net_stats->tx_errors = adapter->stats.ecol +
4487                                adapter->stats.latecol;
4488         net_stats->tx_aborted_errors = adapter->stats.ecol;
4489         net_stats->tx_window_errors = adapter->stats.latecol;
4490         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4491
4492         /* Tx Dropped needs to be maintained elsewhere */
4493
4494         /* Phy Stats */
4495         if (hw->phy.media_type == e1000_media_type_copper) {
4496                 if ((adapter->link_speed == SPEED_1000) &&
4497                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4498                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4499                         adapter->phy_stats.idle_errors += phy_tmp;
4500                 }
4501         }
4502
4503         /* Management Stats */
4504         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4505         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4506         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4507 }
4508
4509 static irqreturn_t igb_msix_other(int irq, void *data)
4510 {
4511         struct igb_adapter *adapter = data;
4512         struct e1000_hw *hw = &adapter->hw;
4513         u32 icr = rd32(E1000_ICR);
4514         /* reading ICR causes bit 31 of EICR to be cleared */
4515
4516         if (icr & E1000_ICR_DRSTA)
4517                 schedule_work(&adapter->reset_task);
4518
4519         if (icr & E1000_ICR_DOUTSYNC) {
4520                 /* HW is reporting DMA is out of sync */
4521                 adapter->stats.doosync++;
4522         }
4523
4524         /* Check for a mailbox event */
4525         if (icr & E1000_ICR_VMMB)
4526                 igb_msg_task(adapter);
4527
4528         if (icr & E1000_ICR_LSC) {
4529                 hw->mac.get_link_status = 1;
4530                 /* guard against interrupt when we're going down */
4531                 if (!test_bit(__IGB_DOWN, &adapter->state))
4532                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4533         }
4534
4535         if (adapter->vfs_allocated_count)
4536                 wr32(E1000_IMS, E1000_IMS_LSC |
4537                                 E1000_IMS_VMMB |
4538                                 E1000_IMS_DOUTSYNC);
4539         else
4540                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4541         wr32(E1000_EIMS, adapter->eims_other);
4542
4543         return IRQ_HANDLED;
4544 }
4545
4546 static void igb_write_itr(struct igb_q_vector *q_vector)
4547 {
4548         struct igb_adapter *adapter = q_vector->adapter;
4549         u32 itr_val = q_vector->itr_val & 0x7FFC;
4550
4551         if (!q_vector->set_itr)
4552                 return;
4553
4554         if (!itr_val)
4555                 itr_val = 0x4;
4556
4557         if (adapter->hw.mac.type == e1000_82575)
4558                 itr_val |= itr_val << 16;
4559         else
4560                 itr_val |= 0x8000000;
4561
4562         writel(itr_val, q_vector->itr_register);
4563         q_vector->set_itr = 0;
4564 }
4565
4566 static irqreturn_t igb_msix_ring(int irq, void *data)
4567 {
4568         struct igb_q_vector *q_vector = data;
4569
4570         /* Write the ITR value calculated from the previous interrupt. */
4571         igb_write_itr(q_vector);
4572
4573         napi_schedule(&q_vector->napi);
4574
4575         return IRQ_HANDLED;
4576 }
4577
4578 #ifdef CONFIG_IGB_DCA
4579 static void igb_update_dca(struct igb_q_vector *q_vector)
4580 {
4581         struct igb_adapter *adapter = q_vector->adapter;
4582         struct e1000_hw *hw = &adapter->hw;
4583         int cpu = get_cpu();
4584
4585         if (q_vector->cpu == cpu)
4586                 goto out_no_update;
4587
4588         if (q_vector->tx_ring) {
4589                 int q = q_vector->tx_ring->reg_idx;
4590                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4591                 if (hw->mac.type == e1000_82575) {
4592                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4593                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4594                 } else {
4595                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4596                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4597                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4598                 }
4599                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4600                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4601         }
4602         if (q_vector->rx_ring) {
4603                 int q = q_vector->rx_ring->reg_idx;
4604                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4605                 if (hw->mac.type == e1000_82575) {
4606                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4607                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4608                 } else {
4609                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4610                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4611                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4612                 }
4613                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4614                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4615                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4616                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4617         }
4618         q_vector->cpu = cpu;
4619 out_no_update:
4620         put_cpu();
4621 }
4622
4623 static void igb_setup_dca(struct igb_adapter *adapter)
4624 {
4625         struct e1000_hw *hw = &adapter->hw;
4626         int i;
4627
4628         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4629                 return;
4630
4631         /* Always use CB2 mode, difference is masked in the CB driver. */
4632         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4633
4634         for (i = 0; i < adapter->num_q_vectors; i++) {
4635                 adapter->q_vector[i]->cpu = -1;
4636                 igb_update_dca(adapter->q_vector[i]);
4637         }
4638 }
4639
4640 static int __igb_notify_dca(struct device *dev, void *data)
4641 {
4642         struct net_device *netdev = dev_get_drvdata(dev);
4643         struct igb_adapter *adapter = netdev_priv(netdev);
4644         struct pci_dev *pdev = adapter->pdev;
4645         struct e1000_hw *hw = &adapter->hw;
4646         unsigned long event = *(unsigned long *)data;
4647
4648         switch (event) {
4649         case DCA_PROVIDER_ADD:
4650                 /* if already enabled, don't do it again */
4651                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4652                         break;
4653                 if (dca_add_requester(dev) == 0) {
4654                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4655                         dev_info(&pdev->dev, "DCA enabled\n");
4656                         igb_setup_dca(adapter);
4657                         break;
4658                 }
4659                 /* Fall Through since DCA is disabled. */
4660         case DCA_PROVIDER_REMOVE:
4661                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4662                         /* without this a class_device is left
4663                          * hanging around in the sysfs model */
4664                         dca_remove_requester(dev);
4665                         dev_info(&pdev->dev, "DCA disabled\n");
4666                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4667                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4668                 }
4669                 break;
4670         }
4671
4672         return 0;
4673 }
4674
4675 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4676                           void *p)
4677 {
4678         int ret_val;
4679
4680         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4681                                          __igb_notify_dca);
4682
4683         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4684 }
4685 #endif /* CONFIG_IGB_DCA */
4686
4687 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4688 {
4689         struct e1000_hw *hw = &adapter->hw;
4690         u32 ping;
4691         int i;
4692
4693         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4694                 ping = E1000_PF_CONTROL_MSG;
4695                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4696                         ping |= E1000_VT_MSGTYPE_CTS;
4697                 igb_write_mbx(hw, &ping, 1, i);
4698         }
4699 }
4700
4701 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4702 {
4703         struct e1000_hw *hw = &adapter->hw;
4704         u32 vmolr = rd32(E1000_VMOLR(vf));
4705         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4706
4707         vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4708                             IGB_VF_FLAG_MULTI_PROMISC);
4709         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4710
4711         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4712                 vmolr |= E1000_VMOLR_MPME;
4713                 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4714                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4715         } else {
4716                 /*
4717                  * if we have hashes and we are clearing a multicast promisc
4718                  * flag we need to write the hashes to the MTA as this step
4719                  * was previously skipped
4720                  */
4721                 if (vf_data->num_vf_mc_hashes > 30) {
4722                         vmolr |= E1000_VMOLR_MPME;
4723                 } else if (vf_data->num_vf_mc_hashes) {
4724                         int j;
4725                         vmolr |= E1000_VMOLR_ROMPE;
4726                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4727                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4728                 }
4729         }
4730
4731         wr32(E1000_VMOLR(vf), vmolr);
4732
4733         /* there are flags left unprocessed, likely not supported */
4734         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4735                 return -EINVAL;
4736
4737         return 0;
4738
4739 }
4740
4741 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4742                                   u32 *msgbuf, u32 vf)
4743 {
4744         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4745         u16 *hash_list = (u16 *)&msgbuf[1];
4746         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4747         int i;
4748
4749         /* salt away the number of multicast addresses assigned
4750          * to this VF for later use to restore when the PF multi cast
4751          * list changes
4752          */
4753         vf_data->num_vf_mc_hashes = n;
4754
4755         /* only up to 30 hash values supported */
4756         if (n > 30)
4757                 n = 30;
4758
4759         /* store the hashes for later use */
4760         for (i = 0; i < n; i++)
4761                 vf_data->vf_mc_hashes[i] = hash_list[i];
4762
4763         /* Flush and reset the mta with the new values */
4764         igb_set_rx_mode(adapter->netdev);
4765
4766         return 0;
4767 }
4768
4769 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4770 {
4771         struct e1000_hw *hw = &adapter->hw;
4772         struct vf_data_storage *vf_data;
4773         int i, j;
4774
4775         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4776                 u32 vmolr = rd32(E1000_VMOLR(i));
4777                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4778
4779                 vf_data = &adapter->vf_data[i];
4780
4781                 if ((vf_data->num_vf_mc_hashes > 30) ||
4782                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4783                         vmolr |= E1000_VMOLR_MPME;
4784                 } else if (vf_data->num_vf_mc_hashes) {
4785                         vmolr |= E1000_VMOLR_ROMPE;
4786                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4787                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4788                 }
4789                 wr32(E1000_VMOLR(i), vmolr);
4790         }
4791 }
4792
4793 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4794 {
4795         struct e1000_hw *hw = &adapter->hw;
4796         u32 pool_mask, reg, vid;
4797         int i;
4798
4799         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4800
4801         /* Find the vlan filter for this id */
4802         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4803                 reg = rd32(E1000_VLVF(i));
4804
4805                 /* remove the vf from the pool */
4806                 reg &= ~pool_mask;
4807
4808                 /* if pool is empty then remove entry from vfta */
4809                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4810                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4811                         reg = 0;
4812                         vid = reg & E1000_VLVF_VLANID_MASK;
4813                         igb_vfta_set(hw, vid, false);
4814                 }
4815
4816                 wr32(E1000_VLVF(i), reg);
4817         }
4818
4819         adapter->vf_data[vf].vlans_enabled = 0;
4820 }
4821
4822 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4823 {
4824         struct e1000_hw *hw = &adapter->hw;
4825         u32 reg, i;
4826
4827         /* The vlvf table only exists on 82576 hardware and newer */
4828         if (hw->mac.type < e1000_82576)
4829                 return -1;
4830
4831         /* we only need to do this if VMDq is enabled */
4832         if (!adapter->vfs_allocated_count)
4833                 return -1;
4834
4835         /* Find the vlan filter for this id */
4836         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4837                 reg = rd32(E1000_VLVF(i));
4838                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4839                     vid == (reg & E1000_VLVF_VLANID_MASK))
4840                         break;
4841         }
4842
4843         if (add) {
4844                 if (i == E1000_VLVF_ARRAY_SIZE) {
4845                         /* Did not find a matching VLAN ID entry that was
4846                          * enabled.  Search for a free filter entry, i.e.
4847                          * one without the enable bit set
4848                          */
4849                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4850                                 reg = rd32(E1000_VLVF(i));
4851                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4852                                         break;
4853                         }
4854                 }
4855                 if (i < E1000_VLVF_ARRAY_SIZE) {
4856                         /* Found an enabled/available entry */
4857                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4858
4859                         /* if !enabled we need to set this up in vfta */
4860                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4861                                 /* add VID to filter table */
4862                                 igb_vfta_set(hw, vid, true);
4863                                 reg |= E1000_VLVF_VLANID_ENABLE;
4864                         }
4865                         reg &= ~E1000_VLVF_VLANID_MASK;
4866                         reg |= vid;
4867                         wr32(E1000_VLVF(i), reg);
4868
4869                         /* do not modify RLPML for PF devices */
4870                         if (vf >= adapter->vfs_allocated_count)
4871                                 return 0;
4872
4873                         if (!adapter->vf_data[vf].vlans_enabled) {
4874                                 u32 size;
4875                                 reg = rd32(E1000_VMOLR(vf));
4876                                 size = reg & E1000_VMOLR_RLPML_MASK;
4877                                 size += 4;
4878                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4879                                 reg |= size;
4880                                 wr32(E1000_VMOLR(vf), reg);
4881                         }
4882
4883                         adapter->vf_data[vf].vlans_enabled++;
4884                         return 0;
4885                 }
4886         } else {
4887                 if (i < E1000_VLVF_ARRAY_SIZE) {
4888                         /* remove vf from the pool */
4889                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4890                         /* if pool is empty then remove entry from vfta */
4891                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4892                                 reg = 0;
4893                                 igb_vfta_set(hw, vid, false);
4894                         }
4895                         wr32(E1000_VLVF(i), reg);
4896
4897                         /* do not modify RLPML for PF devices */
4898                         if (vf >= adapter->vfs_allocated_count)
4899                                 return 0;
4900
4901                         adapter->vf_data[vf].vlans_enabled--;
4902                         if (!adapter->vf_data[vf].vlans_enabled) {
4903                                 u32 size;
4904                                 reg = rd32(E1000_VMOLR(vf));
4905                                 size = reg & E1000_VMOLR_RLPML_MASK;
4906                                 size -= 4;
4907                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4908                                 reg |= size;
4909                                 wr32(E1000_VMOLR(vf), reg);
4910                         }
4911                 }
4912         }
4913         return 0;
4914 }
4915
4916 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
4917 {
4918         struct e1000_hw *hw = &adapter->hw;
4919
4920         if (vid)
4921                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
4922         else
4923                 wr32(E1000_VMVIR(vf), 0);
4924 }
4925
4926 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
4927                                int vf, u16 vlan, u8 qos)
4928 {
4929         int err = 0;
4930         struct igb_adapter *adapter = netdev_priv(netdev);
4931
4932         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
4933                 return -EINVAL;
4934         if (vlan || qos) {
4935                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
4936                 if (err)
4937                         goto out;
4938                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
4939                 igb_set_vmolr(adapter, vf, !vlan);
4940                 adapter->vf_data[vf].pf_vlan = vlan;
4941                 adapter->vf_data[vf].pf_qos = qos;
4942                 dev_info(&adapter->pdev->dev,
4943                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
4944                 if (test_bit(__IGB_DOWN, &adapter->state)) {
4945                         dev_warn(&adapter->pdev->dev,
4946                                  "The VF VLAN has been set,"
4947                                  " but the PF device is not up.\n");
4948                         dev_warn(&adapter->pdev->dev,
4949                                  "Bring the PF device up before"
4950                                  " attempting to use the VF device.\n");
4951                 }
4952         } else {
4953                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
4954                                    false, vf);
4955                 igb_set_vmvir(adapter, vlan, vf);
4956                 igb_set_vmolr(adapter, vf, true);
4957                 adapter->vf_data[vf].pf_vlan = 0;
4958                 adapter->vf_data[vf].pf_qos = 0;
4959        }
4960 out:
4961        return err;
4962 }
4963
4964 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4965 {
4966         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4967         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4968
4969         return igb_vlvf_set(adapter, vid, add, vf);
4970 }
4971
4972 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
4973 {
4974         /* clear flags */
4975         adapter->vf_data[vf].flags &= ~(IGB_VF_FLAG_PF_SET_MAC);
4976         adapter->vf_data[vf].last_nack = jiffies;
4977
4978         /* reset offloads to defaults */
4979         igb_set_vmolr(adapter, vf, true);
4980
4981         /* reset vlans for device */
4982         igb_clear_vf_vfta(adapter, vf);
4983         if (adapter->vf_data[vf].pf_vlan)
4984                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
4985                                     adapter->vf_data[vf].pf_vlan,
4986                                     adapter->vf_data[vf].pf_qos);
4987         else
4988                 igb_clear_vf_vfta(adapter, vf);
4989
4990         /* reset multicast table array for vf */
4991         adapter->vf_data[vf].num_vf_mc_hashes = 0;
4992
4993         /* Flush and reset the mta with the new values */
4994         igb_set_rx_mode(adapter->netdev);
4995 }
4996
4997 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4998 {
4999         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5000
5001         /* generate a new mac address as we were hotplug removed/added */
5002         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5003                 random_ether_addr(vf_mac);
5004
5005         /* process remaining reset events */
5006         igb_vf_reset(adapter, vf);
5007 }
5008
5009 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5010 {
5011         struct e1000_hw *hw = &adapter->hw;
5012         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5013         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5014         u32 reg, msgbuf[3];
5015         u8 *addr = (u8 *)(&msgbuf[1]);
5016
5017         /* process all the same items cleared in a function level reset */
5018         igb_vf_reset(adapter, vf);
5019
5020         /* set vf mac address */
5021         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5022
5023         /* enable transmit and receive for vf */
5024         reg = rd32(E1000_VFTE);
5025         wr32(E1000_VFTE, reg | (1 << vf));
5026         reg = rd32(E1000_VFRE);
5027         wr32(E1000_VFRE, reg | (1 << vf));
5028
5029         adapter->vf_data[vf].flags = IGB_VF_FLAG_CTS;
5030
5031         /* reply to reset with ack and vf mac address */
5032         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5033         memcpy(addr, vf_mac, 6);
5034         igb_write_mbx(hw, msgbuf, 3, vf);
5035 }
5036
5037 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5038 {
5039         /*
5040          * The VF MAC Address is stored in a packed array of bytes
5041          * starting at the second 32 bit word of the msg array
5042          */
5043         unsigned char *addr = (char *)&msg[1];
5044         int err = -1;
5045
5046         if (is_valid_ether_addr(addr))
5047                 err = igb_set_vf_mac(adapter, vf, addr);
5048
5049         return err;
5050 }
5051
5052 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5053 {
5054         struct e1000_hw *hw = &adapter->hw;
5055         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5056         u32 msg = E1000_VT_MSGTYPE_NACK;
5057
5058         /* if device isn't clear to send it shouldn't be reading either */
5059         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5060             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5061                 igb_write_mbx(hw, &msg, 1, vf);
5062                 vf_data->last_nack = jiffies;
5063         }
5064 }
5065
5066 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5067 {
5068         struct pci_dev *pdev = adapter->pdev;
5069         u32 msgbuf[E1000_VFMAILBOX_SIZE];
5070         struct e1000_hw *hw = &adapter->hw;
5071         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5072         s32 retval;
5073
5074         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5075
5076         if (retval) {
5077                 /* if receive failed revoke VF CTS stats and restart init */
5078                 dev_err(&pdev->dev, "Error receiving message from VF\n");
5079                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5080                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5081                         return;
5082                 goto out;
5083         }
5084
5085         /* this is a message we already processed, do nothing */
5086         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5087                 return;
5088
5089         /*
5090          * until the vf completes a reset it should not be
5091          * allowed to start any configuration.
5092          */
5093
5094         if (msgbuf[0] == E1000_VF_RESET) {
5095                 igb_vf_reset_msg(adapter, vf);
5096                 return;
5097         }
5098
5099         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5100                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5101                         return;
5102                 retval = -1;
5103                 goto out;
5104         }
5105
5106         switch ((msgbuf[0] & 0xFFFF)) {
5107         case E1000_VF_SET_MAC_ADDR:
5108                 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5109                 break;
5110         case E1000_VF_SET_PROMISC:
5111                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5112                 break;
5113         case E1000_VF_SET_MULTICAST:
5114                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5115                 break;
5116         case E1000_VF_SET_LPE:
5117                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5118                 break;
5119         case E1000_VF_SET_VLAN:
5120                 if (adapter->vf_data[vf].pf_vlan)
5121                         retval = -1;
5122                 else
5123                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5124                 break;
5125         default:
5126                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5127                 retval = -1;
5128                 break;
5129         }
5130
5131         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5132 out:
5133         /* notify the VF of the results of what it sent us */
5134         if (retval)
5135                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5136         else
5137                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5138
5139         igb_write_mbx(hw, msgbuf, 1, vf);
5140 }
5141
5142 static void igb_msg_task(struct igb_adapter *adapter)
5143 {
5144         struct e1000_hw *hw = &adapter->hw;
5145         u32 vf;
5146
5147         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5148                 /* process any reset requests */
5149                 if (!igb_check_for_rst(hw, vf))
5150                         igb_vf_reset_event(adapter, vf);
5151
5152                 /* process any messages pending */
5153                 if (!igb_check_for_msg(hw, vf))
5154                         igb_rcv_msg_from_vf(adapter, vf);
5155
5156                 /* process any acks */
5157                 if (!igb_check_for_ack(hw, vf))
5158                         igb_rcv_ack_from_vf(adapter, vf);
5159         }
5160 }
5161
5162 /**
5163  *  igb_set_uta - Set unicast filter table address
5164  *  @adapter: board private structure
5165  *
5166  *  The unicast table address is a register array of 32-bit registers.
5167  *  The table is meant to be used in a way similar to how the MTA is used
5168  *  however due to certain limitations in the hardware it is necessary to
5169  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
5170  *  enable bit to allow vlan tag stripping when promiscous mode is enabled
5171  **/
5172 static void igb_set_uta(struct igb_adapter *adapter)
5173 {
5174         struct e1000_hw *hw = &adapter->hw;
5175         int i;
5176
5177         /* The UTA table only exists on 82576 hardware and newer */
5178         if (hw->mac.type < e1000_82576)
5179                 return;
5180
5181         /* we only need to do this if VMDq is enabled */
5182         if (!adapter->vfs_allocated_count)
5183                 return;
5184
5185         for (i = 0; i < hw->mac.uta_reg_count; i++)
5186                 array_wr32(E1000_UTA, i, ~0);
5187 }
5188
5189 /**
5190  * igb_intr_msi - Interrupt Handler
5191  * @irq: interrupt number
5192  * @data: pointer to a network interface device structure
5193  **/
5194 static irqreturn_t igb_intr_msi(int irq, void *data)
5195 {
5196         struct igb_adapter *adapter = data;
5197         struct igb_q_vector *q_vector = adapter->q_vector[0];
5198         struct e1000_hw *hw = &adapter->hw;
5199         /* read ICR disables interrupts using IAM */
5200         u32 icr = rd32(E1000_ICR);
5201
5202         igb_write_itr(q_vector);
5203
5204         if (icr & E1000_ICR_DRSTA)
5205                 schedule_work(&adapter->reset_task);
5206
5207         if (icr & E1000_ICR_DOUTSYNC) {
5208                 /* HW is reporting DMA is out of sync */
5209                 adapter->stats.doosync++;
5210         }
5211
5212         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5213                 hw->mac.get_link_status = 1;
5214                 if (!test_bit(__IGB_DOWN, &adapter->state))
5215                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5216         }
5217
5218         napi_schedule(&q_vector->napi);
5219
5220         return IRQ_HANDLED;
5221 }
5222
5223 /**
5224  * igb_intr - Legacy Interrupt Handler
5225  * @irq: interrupt number
5226  * @data: pointer to a network interface device structure
5227  **/
5228 static irqreturn_t igb_intr(int irq, void *data)
5229 {
5230         struct igb_adapter *adapter = data;
5231         struct igb_q_vector *q_vector = adapter->q_vector[0];
5232         struct e1000_hw *hw = &adapter->hw;
5233         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5234          * need for the IMC write */
5235         u32 icr = rd32(E1000_ICR);
5236         if (!icr)
5237                 return IRQ_NONE;  /* Not our interrupt */
5238
5239         igb_write_itr(q_vector);
5240
5241         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5242          * not set, then the adapter didn't send an interrupt */
5243         if (!(icr & E1000_ICR_INT_ASSERTED))
5244                 return IRQ_NONE;
5245
5246         if (icr & E1000_ICR_DRSTA)
5247                 schedule_work(&adapter->reset_task);
5248
5249         if (icr & E1000_ICR_DOUTSYNC) {
5250                 /* HW is reporting DMA is out of sync */
5251                 adapter->stats.doosync++;
5252         }
5253
5254         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5255                 hw->mac.get_link_status = 1;
5256                 /* guard against interrupt when we're going down */
5257                 if (!test_bit(__IGB_DOWN, &adapter->state))
5258                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5259         }
5260
5261         napi_schedule(&q_vector->napi);
5262
5263         return IRQ_HANDLED;
5264 }
5265
5266 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5267 {
5268         struct igb_adapter *adapter = q_vector->adapter;
5269         struct e1000_hw *hw = &adapter->hw;
5270
5271         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5272             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5273                 if (!adapter->msix_entries)
5274                         igb_set_itr(adapter);
5275                 else
5276                         igb_update_ring_itr(q_vector);
5277         }
5278
5279         if (!test_bit(__IGB_DOWN, &adapter->state)) {
5280                 if (adapter->msix_entries)
5281                         wr32(E1000_EIMS, q_vector->eims_value);
5282                 else
5283                         igb_irq_enable(adapter);
5284         }
5285 }
5286
5287 /**
5288  * igb_poll - NAPI Rx polling callback
5289  * @napi: napi polling structure
5290  * @budget: count of how many packets we should handle
5291  **/
5292 static int igb_poll(struct napi_struct *napi, int budget)
5293 {
5294         struct igb_q_vector *q_vector = container_of(napi,
5295                                                      struct igb_q_vector,
5296                                                      napi);
5297         int tx_clean_complete = 1, work_done = 0;
5298
5299 #ifdef CONFIG_IGB_DCA
5300         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5301                 igb_update_dca(q_vector);
5302 #endif
5303         if (q_vector->tx_ring)
5304                 tx_clean_complete = igb_clean_tx_irq(q_vector);
5305
5306         if (q_vector->rx_ring)
5307                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
5308
5309         if (!tx_clean_complete)
5310                 work_done = budget;
5311
5312         /* If not enough Rx work done, exit the polling mode */
5313         if (work_done < budget) {
5314                 napi_complete(napi);
5315                 igb_ring_irq_enable(q_vector);
5316         }
5317
5318         return work_done;
5319 }
5320
5321 /**
5322  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5323  * @adapter: board private structure
5324  * @shhwtstamps: timestamp structure to update
5325  * @regval: unsigned 64bit system time value.
5326  *
5327  * We need to convert the system time value stored in the RX/TXSTMP registers
5328  * into a hwtstamp which can be used by the upper level timestamping functions
5329  */
5330 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5331                                    struct skb_shared_hwtstamps *shhwtstamps,
5332                                    u64 regval)
5333 {
5334         u64 ns;
5335
5336         /*
5337          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5338          * 24 to match clock shift we setup earlier.
5339          */
5340         if (adapter->hw.mac.type == e1000_82580)
5341                 regval <<= IGB_82580_TSYNC_SHIFT;
5342
5343         ns = timecounter_cyc2time(&adapter->clock, regval);
5344         timecompare_update(&adapter->compare, ns);
5345         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5346         shhwtstamps->hwtstamp = ns_to_ktime(ns);
5347         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5348 }
5349
5350 /**
5351  * igb_tx_hwtstamp - utility function which checks for TX time stamp
5352  * @q_vector: pointer to q_vector containing needed info
5353  * @buffer: pointer to igb_buffer structure
5354  *
5355  * If we were asked to do hardware stamping and such a time stamp is
5356  * available, then it must have been for this skb here because we only
5357  * allow only one such packet into the queue.
5358  */
5359 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *buffer_info)
5360 {
5361         struct igb_adapter *adapter = q_vector->adapter;
5362         struct e1000_hw *hw = &adapter->hw;
5363         struct skb_shared_hwtstamps shhwtstamps;
5364         u64 regval;
5365
5366         /* if skb does not support hw timestamp or TX stamp not valid exit */
5367         if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) ||
5368             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5369                 return;
5370
5371         regval = rd32(E1000_TXSTMPL);
5372         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5373
5374         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5375         skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5376 }
5377
5378 /**
5379  * igb_clean_tx_irq - Reclaim resources after transmit completes
5380  * @q_vector: pointer to q_vector containing needed info
5381  * returns true if ring is completely cleaned
5382  **/
5383 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5384 {
5385         struct igb_adapter *adapter = q_vector->adapter;
5386         struct igb_ring *tx_ring = q_vector->tx_ring;
5387         struct net_device *netdev = tx_ring->netdev;
5388         struct e1000_hw *hw = &adapter->hw;
5389         struct igb_buffer *buffer_info;
5390         union e1000_adv_tx_desc *tx_desc, *eop_desc;
5391         unsigned int total_bytes = 0, total_packets = 0;
5392         unsigned int i, eop, count = 0;
5393         bool cleaned = false;
5394
5395         i = tx_ring->next_to_clean;
5396         eop = tx_ring->buffer_info[i].next_to_watch;
5397         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5398
5399         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5400                (count < tx_ring->count)) {
5401                 rmb();  /* read buffer_info after eop_desc status */
5402                 for (cleaned = false; !cleaned; count++) {
5403                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5404                         buffer_info = &tx_ring->buffer_info[i];
5405                         cleaned = (i == eop);
5406
5407                         if (buffer_info->skb) {
5408                                 total_bytes += buffer_info->bytecount;
5409                                 /* gso_segs is currently only valid for tcp */
5410                                 total_packets += buffer_info->gso_segs;
5411                                 igb_tx_hwtstamp(q_vector, buffer_info);
5412                         }
5413
5414                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5415                         tx_desc->wb.status = 0;
5416
5417                         i++;
5418                         if (i == tx_ring->count)
5419                                 i = 0;
5420                 }
5421                 eop = tx_ring->buffer_info[i].next_to_watch;
5422                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5423         }
5424
5425         tx_ring->next_to_clean = i;
5426
5427         if (unlikely(count &&
5428                      netif_carrier_ok(netdev) &&
5429                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5430                 /* Make sure that anybody stopping the queue after this
5431                  * sees the new next_to_clean.
5432                  */
5433                 smp_mb();
5434                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5435                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5436                         netif_wake_subqueue(netdev, tx_ring->queue_index);
5437
5438                         u64_stats_update_begin(&tx_ring->tx_syncp);
5439                         tx_ring->tx_stats.restart_queue++;
5440                         u64_stats_update_end(&tx_ring->tx_syncp);
5441                 }
5442         }
5443
5444         if (tx_ring->detect_tx_hung) {
5445                 /* Detect a transmit hang in hardware, this serializes the
5446                  * check with the clearing of time_stamp and movement of i */
5447                 tx_ring->detect_tx_hung = false;
5448                 if (tx_ring->buffer_info[i].time_stamp &&
5449                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5450                                (adapter->tx_timeout_factor * HZ)) &&
5451                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5452
5453                         /* detected Tx unit hang */
5454                         dev_err(tx_ring->dev,
5455                                 "Detected Tx Unit Hang\n"
5456                                 "  Tx Queue             <%d>\n"
5457                                 "  TDH                  <%x>\n"
5458                                 "  TDT                  <%x>\n"
5459                                 "  next_to_use          <%x>\n"
5460                                 "  next_to_clean        <%x>\n"
5461                                 "buffer_info[next_to_clean]\n"
5462                                 "  time_stamp           <%lx>\n"
5463                                 "  next_to_watch        <%x>\n"
5464                                 "  jiffies              <%lx>\n"
5465                                 "  desc.status          <%x>\n",
5466                                 tx_ring->queue_index,
5467                                 readl(tx_ring->head),
5468                                 readl(tx_ring->tail),
5469                                 tx_ring->next_to_use,
5470                                 tx_ring->next_to_clean,
5471                                 tx_ring->buffer_info[eop].time_stamp,
5472                                 eop,
5473                                 jiffies,
5474                                 eop_desc->wb.status);
5475                         netif_stop_subqueue(netdev, tx_ring->queue_index);
5476                 }
5477         }
5478         tx_ring->total_bytes += total_bytes;
5479         tx_ring->total_packets += total_packets;
5480         u64_stats_update_begin(&tx_ring->tx_syncp);
5481         tx_ring->tx_stats.bytes += total_bytes;
5482         tx_ring->tx_stats.packets += total_packets;
5483         u64_stats_update_end(&tx_ring->tx_syncp);
5484         return count < tx_ring->count;
5485 }
5486
5487 /**
5488  * igb_receive_skb - helper function to handle rx indications
5489  * @q_vector: structure containing interrupt and ring information
5490  * @skb: packet to send up
5491  * @vlan_tag: vlan tag for packet
5492  **/
5493 static void igb_receive_skb(struct igb_q_vector *q_vector,
5494                             struct sk_buff *skb,
5495                             u16 vlan_tag)
5496 {
5497         struct igb_adapter *adapter = q_vector->adapter;
5498
5499         if (vlan_tag && adapter->vlgrp)
5500                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5501                                  vlan_tag, skb);
5502         else
5503                 napi_gro_receive(&q_vector->napi, skb);
5504 }
5505
5506 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5507                                        u32 status_err, struct sk_buff *skb)
5508 {
5509         skb_checksum_none_assert(skb);
5510
5511         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5512         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5513              (status_err & E1000_RXD_STAT_IXSM))
5514                 return;
5515
5516         /* TCP/UDP checksum error bit is set */
5517         if (status_err &
5518             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5519                 /*
5520                  * work around errata with sctp packets where the TCPE aka
5521                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5522                  * packets, (aka let the stack check the crc32c)
5523                  */
5524                 if ((skb->len == 60) &&
5525                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM)) {
5526                         u64_stats_update_begin(&ring->rx_syncp);
5527                         ring->rx_stats.csum_err++;
5528                         u64_stats_update_end(&ring->rx_syncp);
5529                 }
5530                 /* let the stack verify checksum errors */
5531                 return;
5532         }
5533         /* It must be a TCP or UDP packet with a valid checksum */
5534         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5535                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5536
5537         dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5538 }
5539
5540 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5541                                    struct sk_buff *skb)
5542 {
5543         struct igb_adapter *adapter = q_vector->adapter;
5544         struct e1000_hw *hw = &adapter->hw;
5545         u64 regval;
5546
5547         /*
5548          * If this bit is set, then the RX registers contain the time stamp. No
5549          * other packet will be time stamped until we read these registers, so
5550          * read the registers to make them available again. Because only one
5551          * packet can be time stamped at a time, we know that the register
5552          * values must belong to this one here and therefore we don't need to
5553          * compare any of the additional attributes stored for it.
5554          *
5555          * If nothing went wrong, then it should have a shared tx_flags that we
5556          * can turn into a skb_shared_hwtstamps.
5557          */
5558         if (staterr & E1000_RXDADV_STAT_TSIP) {
5559                 u32 *stamp = (u32 *)skb->data;
5560                 regval = le32_to_cpu(*(stamp + 2));
5561                 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5562                 skb_pull(skb, IGB_TS_HDR_LEN);
5563         } else {
5564                 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5565                         return;
5566
5567                 regval = rd32(E1000_RXSTMPL);
5568                 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5569         }
5570
5571         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5572 }
5573 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5574                                union e1000_adv_rx_desc *rx_desc)
5575 {
5576         /* HW will not DMA in data larger than the given buffer, even if it
5577          * parses the (NFS, of course) header to be larger.  In that case, it
5578          * fills the header buffer and spills the rest into the page.
5579          */
5580         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5581                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5582         if (hlen > rx_ring->rx_buffer_len)
5583                 hlen = rx_ring->rx_buffer_len;
5584         return hlen;
5585 }
5586
5587 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5588                                  int *work_done, int budget)
5589 {
5590         struct igb_ring *rx_ring = q_vector->rx_ring;
5591         struct net_device *netdev = rx_ring->netdev;
5592         struct device *dev = rx_ring->dev;
5593         union e1000_adv_rx_desc *rx_desc , *next_rxd;
5594         struct igb_buffer *buffer_info , *next_buffer;
5595         struct sk_buff *skb;
5596         bool cleaned = false;
5597         int cleaned_count = 0;
5598         int current_node = numa_node_id();
5599         unsigned int total_bytes = 0, total_packets = 0;
5600         unsigned int i;
5601         u32 staterr;
5602         u16 length;
5603         u16 vlan_tag;
5604
5605         i = rx_ring->next_to_clean;
5606         buffer_info = &rx_ring->buffer_info[i];
5607         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5608         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5609
5610         while (staterr & E1000_RXD_STAT_DD) {
5611                 if (*work_done >= budget)
5612                         break;
5613                 (*work_done)++;
5614                 rmb(); /* read descriptor and rx_buffer_info after status DD */
5615
5616                 skb = buffer_info->skb;
5617                 prefetch(skb->data - NET_IP_ALIGN);
5618                 buffer_info->skb = NULL;
5619
5620                 i++;
5621                 if (i == rx_ring->count)
5622                         i = 0;
5623
5624                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5625                 prefetch(next_rxd);
5626                 next_buffer = &rx_ring->buffer_info[i];
5627
5628                 length = le16_to_cpu(rx_desc->wb.upper.length);
5629                 cleaned = true;
5630                 cleaned_count++;
5631
5632                 if (buffer_info->dma) {
5633                         dma_unmap_single(dev, buffer_info->dma,
5634                                          rx_ring->rx_buffer_len,
5635                                          DMA_FROM_DEVICE);
5636                         buffer_info->dma = 0;
5637                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5638                                 skb_put(skb, length);
5639                                 goto send_up;
5640                         }
5641                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5642                 }
5643
5644                 if (length) {
5645                         dma_unmap_page(dev, buffer_info->page_dma,
5646                                        PAGE_SIZE / 2, DMA_FROM_DEVICE);
5647                         buffer_info->page_dma = 0;
5648
5649                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5650                                                 buffer_info->page,
5651                                                 buffer_info->page_offset,
5652                                                 length);
5653
5654                         if ((page_count(buffer_info->page) != 1) ||
5655                             (page_to_nid(buffer_info->page) != current_node))
5656                                 buffer_info->page = NULL;
5657                         else
5658                                 get_page(buffer_info->page);
5659
5660                         skb->len += length;
5661                         skb->data_len += length;
5662                         skb->truesize += length;
5663                 }
5664
5665                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5666                         buffer_info->skb = next_buffer->skb;
5667                         buffer_info->dma = next_buffer->dma;
5668                         next_buffer->skb = skb;
5669                         next_buffer->dma = 0;
5670                         goto next_desc;
5671                 }
5672 send_up:
5673                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5674                         dev_kfree_skb_irq(skb);
5675                         goto next_desc;
5676                 }
5677
5678                 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5679                         igb_rx_hwtstamp(q_vector, staterr, skb);
5680                 total_bytes += skb->len;
5681                 total_packets++;
5682
5683                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5684
5685                 skb->protocol = eth_type_trans(skb, netdev);
5686                 skb_record_rx_queue(skb, rx_ring->queue_index);
5687
5688                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5689                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5690
5691                 igb_receive_skb(q_vector, skb, vlan_tag);
5692
5693 next_desc:
5694                 rx_desc->wb.upper.status_error = 0;
5695
5696                 /* return some buffers to hardware, one at a time is too slow */
5697                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5698                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5699                         cleaned_count = 0;
5700                 }
5701
5702                 /* use prefetched values */
5703                 rx_desc = next_rxd;
5704                 buffer_info = next_buffer;
5705                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5706         }
5707
5708         rx_ring->next_to_clean = i;
5709         cleaned_count = igb_desc_unused(rx_ring);
5710
5711         if (cleaned_count)
5712                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5713
5714         rx_ring->total_packets += total_packets;
5715         rx_ring->total_bytes += total_bytes;
5716         u64_stats_update_begin(&rx_ring->rx_syncp);
5717         rx_ring->rx_stats.packets += total_packets;
5718         rx_ring->rx_stats.bytes += total_bytes;
5719         u64_stats_update_end(&rx_ring->rx_syncp);
5720         return cleaned;
5721 }
5722
5723 /**
5724  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5725  * @adapter: address of board private structure
5726  **/
5727 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5728 {
5729         struct net_device *netdev = rx_ring->netdev;
5730         union e1000_adv_rx_desc *rx_desc;
5731         struct igb_buffer *buffer_info;
5732         struct sk_buff *skb;
5733         unsigned int i;
5734         int bufsz;
5735
5736         i = rx_ring->next_to_use;
5737         buffer_info = &rx_ring->buffer_info[i];
5738
5739         bufsz = rx_ring->rx_buffer_len;
5740
5741         while (cleaned_count--) {
5742                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5743
5744                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5745                         if (!buffer_info->page) {
5746                                 buffer_info->page = netdev_alloc_page(netdev);
5747                                 if (unlikely(!buffer_info->page)) {
5748                                         u64_stats_update_begin(&rx_ring->rx_syncp);
5749                                         rx_ring->rx_stats.alloc_failed++;
5750                                         u64_stats_update_end(&rx_ring->rx_syncp);
5751                                         goto no_buffers;
5752                                 }
5753                                 buffer_info->page_offset = 0;
5754                         } else {
5755                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5756                         }
5757                         buffer_info->page_dma =
5758                                 dma_map_page(rx_ring->dev, buffer_info->page,
5759                                              buffer_info->page_offset,
5760                                              PAGE_SIZE / 2,
5761                                              DMA_FROM_DEVICE);
5762                         if (dma_mapping_error(rx_ring->dev,
5763                                               buffer_info->page_dma)) {
5764                                 buffer_info->page_dma = 0;
5765                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5766                                 rx_ring->rx_stats.alloc_failed++;
5767                                 u64_stats_update_end(&rx_ring->rx_syncp);
5768                                 goto no_buffers;
5769                         }
5770                 }
5771
5772                 skb = buffer_info->skb;
5773                 if (!skb) {
5774                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5775                         if (unlikely(!skb)) {
5776                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5777                                 rx_ring->rx_stats.alloc_failed++;
5778                                 u64_stats_update_end(&rx_ring->rx_syncp);
5779                                 goto no_buffers;
5780                         }
5781
5782                         buffer_info->skb = skb;
5783                 }
5784                 if (!buffer_info->dma) {
5785                         buffer_info->dma = dma_map_single(rx_ring->dev,
5786                                                           skb->data,
5787                                                           bufsz,
5788                                                           DMA_FROM_DEVICE);
5789                         if (dma_mapping_error(rx_ring->dev,
5790                                               buffer_info->dma)) {
5791                                 buffer_info->dma = 0;
5792                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5793                                 rx_ring->rx_stats.alloc_failed++;
5794                                 u64_stats_update_end(&rx_ring->rx_syncp);
5795                                 goto no_buffers;
5796                         }
5797                 }
5798                 /* Refresh the desc even if buffer_addrs didn't change because
5799                  * each write-back erases this info. */
5800                 if (bufsz < IGB_RXBUFFER_1024) {
5801                         rx_desc->read.pkt_addr =
5802                              cpu_to_le64(buffer_info->page_dma);
5803                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5804                 } else {
5805                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5806                         rx_desc->read.hdr_addr = 0;
5807                 }
5808
5809                 i++;
5810                 if (i == rx_ring->count)
5811                         i = 0;
5812                 buffer_info = &rx_ring->buffer_info[i];
5813         }
5814
5815 no_buffers:
5816         if (rx_ring->next_to_use != i) {
5817                 rx_ring->next_to_use = i;
5818                 if (i == 0)
5819                         i = (rx_ring->count - 1);
5820                 else
5821                         i--;
5822
5823                 /* Force memory writes to complete before letting h/w
5824                  * know there are new descriptors to fetch.  (Only
5825                  * applicable for weak-ordered memory model archs,
5826                  * such as IA-64). */
5827                 wmb();
5828                 writel(i, rx_ring->tail);
5829         }
5830 }
5831
5832 /**
5833  * igb_mii_ioctl -
5834  * @netdev:
5835  * @ifreq:
5836  * @cmd:
5837  **/
5838 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5839 {
5840         struct igb_adapter *adapter = netdev_priv(netdev);
5841         struct mii_ioctl_data *data = if_mii(ifr);
5842
5843         if (adapter->hw.phy.media_type != e1000_media_type_copper)
5844                 return -EOPNOTSUPP;
5845
5846         switch (cmd) {
5847         case SIOCGMIIPHY:
5848                 data->phy_id = adapter->hw.phy.addr;
5849                 break;
5850         case SIOCGMIIREG:
5851                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5852                                      &data->val_out))
5853                         return -EIO;
5854                 break;
5855         case SIOCSMIIREG:
5856         default:
5857                 return -EOPNOTSUPP;
5858         }
5859         return 0;
5860 }
5861
5862 /**
5863  * igb_hwtstamp_ioctl - control hardware time stamping
5864  * @netdev:
5865  * @ifreq:
5866  * @cmd:
5867  *
5868  * Outgoing time stamping can be enabled and disabled. Play nice and
5869  * disable it when requested, although it shouldn't case any overhead
5870  * when no packet needs it. At most one packet in the queue may be
5871  * marked for time stamping, otherwise it would be impossible to tell
5872  * for sure to which packet the hardware time stamp belongs.
5873  *
5874  * Incoming time stamping has to be configured via the hardware
5875  * filters. Not all combinations are supported, in particular event
5876  * type has to be specified. Matching the kind of event packet is
5877  * not supported, with the exception of "all V2 events regardless of
5878  * level 2 or 4".
5879  *
5880  **/
5881 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5882                               struct ifreq *ifr, int cmd)
5883 {
5884         struct igb_adapter *adapter = netdev_priv(netdev);
5885         struct e1000_hw *hw = &adapter->hw;
5886         struct hwtstamp_config config;
5887         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5888         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5889         u32 tsync_rx_cfg = 0;
5890         bool is_l4 = false;
5891         bool is_l2 = false;
5892         u32 regval;
5893
5894         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5895                 return -EFAULT;
5896
5897         /* reserved for future extensions */
5898         if (config.flags)
5899                 return -EINVAL;
5900
5901         switch (config.tx_type) {
5902         case HWTSTAMP_TX_OFF:
5903                 tsync_tx_ctl = 0;
5904         case HWTSTAMP_TX_ON:
5905                 break;
5906         default:
5907                 return -ERANGE;
5908         }
5909
5910         switch (config.rx_filter) {
5911         case HWTSTAMP_FILTER_NONE:
5912                 tsync_rx_ctl = 0;
5913                 break;
5914         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5915         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5916         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5917         case HWTSTAMP_FILTER_ALL:
5918                 /*
5919                  * register TSYNCRXCFG must be set, therefore it is not
5920                  * possible to time stamp both Sync and Delay_Req messages
5921                  * => fall back to time stamping all packets
5922                  */
5923                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5924                 config.rx_filter = HWTSTAMP_FILTER_ALL;
5925                 break;
5926         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5927                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5928                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5929                 is_l4 = true;
5930                 break;
5931         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5932                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5933                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5934                 is_l4 = true;
5935                 break;
5936         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5937         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5938                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5939                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5940                 is_l2 = true;
5941                 is_l4 = true;
5942                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5943                 break;
5944         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5945         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5946                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5947                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5948                 is_l2 = true;
5949                 is_l4 = true;
5950                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5951                 break;
5952         case HWTSTAMP_FILTER_PTP_V2_EVENT:
5953         case HWTSTAMP_FILTER_PTP_V2_SYNC:
5954         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5955                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5956                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5957                 is_l2 = true;
5958                 break;
5959         default:
5960                 return -ERANGE;
5961         }
5962
5963         if (hw->mac.type == e1000_82575) {
5964                 if (tsync_rx_ctl | tsync_tx_ctl)
5965                         return -EINVAL;
5966                 return 0;
5967         }
5968
5969         /*
5970          * Per-packet timestamping only works if all packets are
5971          * timestamped, so enable timestamping in all packets as
5972          * long as one rx filter was configured.
5973          */
5974         if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
5975                 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5976                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5977         }
5978
5979         /* enable/disable TX */
5980         regval = rd32(E1000_TSYNCTXCTL);
5981         regval &= ~E1000_TSYNCTXCTL_ENABLED;
5982         regval |= tsync_tx_ctl;
5983         wr32(E1000_TSYNCTXCTL, regval);
5984
5985         /* enable/disable RX */
5986         regval = rd32(E1000_TSYNCRXCTL);
5987         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5988         regval |= tsync_rx_ctl;
5989         wr32(E1000_TSYNCRXCTL, regval);
5990
5991         /* define which PTP packets are time stamped */
5992         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5993
5994         /* define ethertype filter for timestamped packets */
5995         if (is_l2)
5996                 wr32(E1000_ETQF(3),
5997                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5998                                  E1000_ETQF_1588 | /* enable timestamping */
5999                                  ETH_P_1588));     /* 1588 eth protocol type */
6000         else
6001                 wr32(E1000_ETQF(3), 0);
6002
6003 #define PTP_PORT 319
6004         /* L4 Queue Filter[3]: filter by destination port and protocol */
6005         if (is_l4) {
6006                 u32 ftqf = (IPPROTO_UDP /* UDP */
6007                         | E1000_FTQF_VF_BP /* VF not compared */
6008                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6009                         | E1000_FTQF_MASK); /* mask all inputs */
6010                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6011
6012                 wr32(E1000_IMIR(3), htons(PTP_PORT));
6013                 wr32(E1000_IMIREXT(3),
6014                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6015                 if (hw->mac.type == e1000_82576) {
6016                         /* enable source port check */
6017                         wr32(E1000_SPQF(3), htons(PTP_PORT));
6018                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6019                 }
6020                 wr32(E1000_FTQF(3), ftqf);
6021         } else {
6022                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6023         }
6024         wrfl();
6025
6026         adapter->hwtstamp_config = config;
6027
6028         /* clear TX/RX time stamp registers, just to be sure */
6029         regval = rd32(E1000_TXSTMPH);
6030         regval = rd32(E1000_RXSTMPH);
6031
6032         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6033                 -EFAULT : 0;
6034 }
6035
6036 /**
6037  * igb_ioctl -
6038  * @netdev:
6039  * @ifreq:
6040  * @cmd:
6041  **/
6042 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6043 {
6044         switch (cmd) {
6045         case SIOCGMIIPHY:
6046         case SIOCGMIIREG:
6047         case SIOCSMIIREG:
6048                 return igb_mii_ioctl(netdev, ifr, cmd);
6049         case SIOCSHWTSTAMP:
6050                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6051         default:
6052                 return -EOPNOTSUPP;
6053         }
6054 }
6055
6056 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6057 {
6058         struct igb_adapter *adapter = hw->back;
6059         u16 cap_offset;
6060
6061         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6062         if (!cap_offset)
6063                 return -E1000_ERR_CONFIG;
6064
6065         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6066
6067         return 0;
6068 }
6069
6070 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6071 {
6072         struct igb_adapter *adapter = hw->back;
6073         u16 cap_offset;
6074
6075         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6076         if (!cap_offset)
6077                 return -E1000_ERR_CONFIG;
6078
6079         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6080
6081         return 0;
6082 }
6083
6084 static void igb_vlan_rx_register(struct net_device *netdev,
6085                                  struct vlan_group *grp)
6086 {
6087         struct igb_adapter *adapter = netdev_priv(netdev);
6088         struct e1000_hw *hw = &adapter->hw;
6089         u32 ctrl, rctl;
6090
6091         igb_irq_disable(adapter);
6092         adapter->vlgrp = grp;
6093
6094         if (grp) {
6095                 /* enable VLAN tag insert/strip */
6096                 ctrl = rd32(E1000_CTRL);
6097                 ctrl |= E1000_CTRL_VME;
6098                 wr32(E1000_CTRL, ctrl);
6099
6100                 /* Disable CFI check */
6101                 rctl = rd32(E1000_RCTL);
6102                 rctl &= ~E1000_RCTL_CFIEN;
6103                 wr32(E1000_RCTL, rctl);
6104         } else {
6105                 /* disable VLAN tag insert/strip */
6106                 ctrl = rd32(E1000_CTRL);
6107                 ctrl &= ~E1000_CTRL_VME;
6108                 wr32(E1000_CTRL, ctrl);
6109         }
6110
6111         igb_rlpml_set(adapter);
6112
6113         if (!test_bit(__IGB_DOWN, &adapter->state))
6114                 igb_irq_enable(adapter);
6115 }
6116
6117 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6118 {
6119         struct igb_adapter *adapter = netdev_priv(netdev);
6120         struct e1000_hw *hw = &adapter->hw;
6121         int pf_id = adapter->vfs_allocated_count;
6122
6123         /* attempt to add filter to vlvf array */
6124         igb_vlvf_set(adapter, vid, true, pf_id);
6125
6126         /* add the filter since PF can receive vlans w/o entry in vlvf */
6127         igb_vfta_set(hw, vid, true);
6128 }
6129
6130 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6131 {
6132         struct igb_adapter *adapter = netdev_priv(netdev);
6133         struct e1000_hw *hw = &adapter->hw;
6134         int pf_id = adapter->vfs_allocated_count;
6135         s32 err;
6136
6137         igb_irq_disable(adapter);
6138         vlan_group_set_device(adapter->vlgrp, vid, NULL);
6139
6140         if (!test_bit(__IGB_DOWN, &adapter->state))
6141                 igb_irq_enable(adapter);
6142
6143         /* remove vlan from VLVF table array */
6144         err = igb_vlvf_set(adapter, vid, false, pf_id);
6145
6146         /* if vid was not present in VLVF just remove it from table */
6147         if (err)
6148                 igb_vfta_set(hw, vid, false);
6149 }
6150
6151 static void igb_restore_vlan(struct igb_adapter *adapter)
6152 {
6153         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
6154
6155         if (adapter->vlgrp) {
6156                 u16 vid;
6157                 for (vid = 0; vid < VLAN_N_VID; vid++) {
6158                         if (!vlan_group_get_device(adapter->vlgrp, vid))
6159                                 continue;
6160                         igb_vlan_rx_add_vid(adapter->netdev, vid);
6161                 }
6162         }
6163 }
6164
6165 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
6166 {
6167         struct pci_dev *pdev = adapter->pdev;
6168         struct e1000_mac_info *mac = &adapter->hw.mac;
6169
6170         mac->autoneg = 0;
6171
6172         /* Fiber NIC's only allow 1000 Gbps Full duplex */
6173         if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6174                 spddplx != (SPEED_1000 + DUPLEX_FULL)) {
6175                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6176                 return -EINVAL;
6177         }
6178
6179         switch (spddplx) {
6180         case SPEED_10 + DUPLEX_HALF:
6181                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6182                 break;
6183         case SPEED_10 + DUPLEX_FULL:
6184                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6185                 break;
6186         case SPEED_100 + DUPLEX_HALF:
6187                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6188                 break;
6189         case SPEED_100 + DUPLEX_FULL:
6190                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6191                 break;
6192         case SPEED_1000 + DUPLEX_FULL:
6193                 mac->autoneg = 1;
6194                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6195                 break;
6196         case SPEED_1000 + DUPLEX_HALF: /* not supported */
6197         default:
6198                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6199                 return -EINVAL;
6200         }
6201         return 0;
6202 }
6203
6204 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6205 {
6206         struct net_device *netdev = pci_get_drvdata(pdev);
6207         struct igb_adapter *adapter = netdev_priv(netdev);
6208         struct e1000_hw *hw = &adapter->hw;
6209         u32 ctrl, rctl, status;
6210         u32 wufc = adapter->wol;
6211 #ifdef CONFIG_PM
6212         int retval = 0;
6213 #endif
6214
6215         netif_device_detach(netdev);
6216
6217         if (netif_running(netdev))
6218                 igb_close(netdev);
6219
6220         igb_clear_interrupt_scheme(adapter);
6221
6222 #ifdef CONFIG_PM
6223         retval = pci_save_state(pdev);
6224         if (retval)
6225                 return retval;
6226 #endif
6227
6228         status = rd32(E1000_STATUS);
6229         if (status & E1000_STATUS_LU)
6230                 wufc &= ~E1000_WUFC_LNKC;
6231
6232         if (wufc) {
6233                 igb_setup_rctl(adapter);
6234                 igb_set_rx_mode(netdev);
6235
6236                 /* turn on all-multi mode if wake on multicast is enabled */
6237                 if (wufc & E1000_WUFC_MC) {
6238                         rctl = rd32(E1000_RCTL);
6239                         rctl |= E1000_RCTL_MPE;
6240                         wr32(E1000_RCTL, rctl);
6241                 }
6242
6243                 ctrl = rd32(E1000_CTRL);
6244                 /* advertise wake from D3Cold */
6245                 #define E1000_CTRL_ADVD3WUC 0x00100000
6246                 /* phy power management enable */
6247                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6248                 ctrl |= E1000_CTRL_ADVD3WUC;
6249                 wr32(E1000_CTRL, ctrl);
6250
6251                 /* Allow time for pending master requests to run */
6252                 igb_disable_pcie_master(hw);
6253
6254                 wr32(E1000_WUC, E1000_WUC_PME_EN);
6255                 wr32(E1000_WUFC, wufc);
6256         } else {
6257                 wr32(E1000_WUC, 0);
6258                 wr32(E1000_WUFC, 0);
6259         }
6260
6261         *enable_wake = wufc || adapter->en_mng_pt;
6262         if (!*enable_wake)
6263                 igb_power_down_link(adapter);
6264         else
6265                 igb_power_up_link(adapter);
6266
6267         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6268          * would have already happened in close and is redundant. */
6269         igb_release_hw_control(adapter);
6270
6271         pci_disable_device(pdev);
6272
6273         return 0;
6274 }
6275
6276 #ifdef CONFIG_PM
6277 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6278 {
6279         int retval;
6280         bool wake;
6281
6282         retval = __igb_shutdown(pdev, &wake);
6283         if (retval)
6284                 return retval;
6285
6286         if (wake) {
6287                 pci_prepare_to_sleep(pdev);
6288         } else {
6289                 pci_wake_from_d3(pdev, false);
6290                 pci_set_power_state(pdev, PCI_D3hot);
6291         }
6292
6293         return 0;
6294 }
6295
6296 static int igb_resume(struct pci_dev *pdev)
6297 {
6298         struct net_device *netdev = pci_get_drvdata(pdev);
6299         struct igb_adapter *adapter = netdev_priv(netdev);
6300         struct e1000_hw *hw = &adapter->hw;
6301         u32 err;
6302
6303         pci_set_power_state(pdev, PCI_D0);
6304         pci_restore_state(pdev);
6305         pci_save_state(pdev);
6306
6307         err = pci_enable_device_mem(pdev);
6308         if (err) {
6309                 dev_err(&pdev->dev,
6310                         "igb: Cannot enable PCI device from suspend\n");
6311                 return err;
6312         }
6313         pci_set_master(pdev);
6314
6315         pci_enable_wake(pdev, PCI_D3hot, 0);
6316         pci_enable_wake(pdev, PCI_D3cold, 0);
6317
6318         if (igb_init_interrupt_scheme(adapter)) {
6319                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6320                 return -ENOMEM;
6321         }
6322
6323         igb_reset(adapter);
6324
6325         /* let the f/w know that the h/w is now under the control of the
6326          * driver. */
6327         igb_get_hw_control(adapter);
6328
6329         wr32(E1000_WUS, ~0);
6330
6331         if (netif_running(netdev)) {
6332                 err = igb_open(netdev);
6333                 if (err)
6334                         return err;
6335         }
6336
6337         netif_device_attach(netdev);
6338
6339         return 0;
6340 }
6341 #endif
6342
6343 static void igb_shutdown(struct pci_dev *pdev)
6344 {
6345         bool wake;
6346
6347         __igb_shutdown(pdev, &wake);
6348
6349         if (system_state == SYSTEM_POWER_OFF) {
6350                 pci_wake_from_d3(pdev, wake);
6351                 pci_set_power_state(pdev, PCI_D3hot);
6352         }
6353 }
6354
6355 #ifdef CONFIG_NET_POLL_CONTROLLER
6356 /*
6357  * Polling 'interrupt' - used by things like netconsole to send skbs
6358  * without having to re-enable interrupts. It's not called while
6359  * the interrupt routine is executing.
6360  */
6361 static void igb_netpoll(struct net_device *netdev)
6362 {
6363         struct igb_adapter *adapter = netdev_priv(netdev);
6364         struct e1000_hw *hw = &adapter->hw;
6365         int i;
6366
6367         if (!adapter->msix_entries) {
6368                 struct igb_q_vector *q_vector = adapter->q_vector[0];
6369                 igb_irq_disable(adapter);
6370                 napi_schedule(&q_vector->napi);
6371                 return;
6372         }
6373
6374         for (i = 0; i < adapter->num_q_vectors; i++) {
6375                 struct igb_q_vector *q_vector = adapter->q_vector[i];
6376                 wr32(E1000_EIMC, q_vector->eims_value);
6377                 napi_schedule(&q_vector->napi);
6378         }
6379 }
6380 #endif /* CONFIG_NET_POLL_CONTROLLER */
6381
6382 /**
6383  * igb_io_error_detected - called when PCI error is detected
6384  * @pdev: Pointer to PCI device
6385  * @state: The current pci connection state
6386  *
6387  * This function is called after a PCI bus error affecting
6388  * this device has been detected.
6389  */
6390 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6391                                               pci_channel_state_t state)
6392 {
6393         struct net_device *netdev = pci_get_drvdata(pdev);
6394         struct igb_adapter *adapter = netdev_priv(netdev);
6395
6396         netif_device_detach(netdev);
6397
6398         if (state == pci_channel_io_perm_failure)
6399                 return PCI_ERS_RESULT_DISCONNECT;
6400
6401         if (netif_running(netdev))
6402                 igb_down(adapter);
6403         pci_disable_device(pdev);
6404
6405         /* Request a slot slot reset. */
6406         return PCI_ERS_RESULT_NEED_RESET;
6407 }
6408
6409 /**
6410  * igb_io_slot_reset - called after the pci bus has been reset.
6411  * @pdev: Pointer to PCI device
6412  *
6413  * Restart the card from scratch, as if from a cold-boot. Implementation
6414  * resembles the first-half of the igb_resume routine.
6415  */
6416 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6417 {
6418         struct net_device *netdev = pci_get_drvdata(pdev);
6419         struct igb_adapter *adapter = netdev_priv(netdev);
6420         struct e1000_hw *hw = &adapter->hw;
6421         pci_ers_result_t result;
6422         int err;
6423
6424         if (pci_enable_device_mem(pdev)) {
6425                 dev_err(&pdev->dev,
6426                         "Cannot re-enable PCI device after reset.\n");
6427                 result = PCI_ERS_RESULT_DISCONNECT;
6428         } else {
6429                 pci_set_master(pdev);
6430                 pci_restore_state(pdev);
6431                 pci_save_state(pdev);
6432
6433                 pci_enable_wake(pdev, PCI_D3hot, 0);
6434                 pci_enable_wake(pdev, PCI_D3cold, 0);
6435
6436                 igb_reset(adapter);
6437                 wr32(E1000_WUS, ~0);
6438                 result = PCI_ERS_RESULT_RECOVERED;
6439         }
6440
6441         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6442         if (err) {
6443                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6444                         "failed 0x%0x\n", err);
6445                 /* non-fatal, continue */
6446         }
6447
6448         return result;
6449 }
6450
6451 /**
6452  * igb_io_resume - called when traffic can start flowing again.
6453  * @pdev: Pointer to PCI device
6454  *
6455  * This callback is called when the error recovery driver tells us that
6456  * its OK to resume normal operation. Implementation resembles the
6457  * second-half of the igb_resume routine.
6458  */
6459 static void igb_io_resume(struct pci_dev *pdev)
6460 {
6461         struct net_device *netdev = pci_get_drvdata(pdev);
6462         struct igb_adapter *adapter = netdev_priv(netdev);
6463
6464         if (netif_running(netdev)) {
6465                 if (igb_up(adapter)) {
6466                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6467                         return;
6468                 }
6469         }
6470
6471         netif_device_attach(netdev);
6472
6473         /* let the f/w know that the h/w is now under the control of the
6474          * driver. */
6475         igb_get_hw_control(adapter);
6476 }
6477
6478 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6479                              u8 qsel)
6480 {
6481         u32 rar_low, rar_high;
6482         struct e1000_hw *hw = &adapter->hw;
6483
6484         /* HW expects these in little endian so we reverse the byte order
6485          * from network order (big endian) to little endian
6486          */
6487         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6488                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6489         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6490
6491         /* Indicate to hardware the Address is Valid. */
6492         rar_high |= E1000_RAH_AV;
6493
6494         if (hw->mac.type == e1000_82575)
6495                 rar_high |= E1000_RAH_POOL_1 * qsel;
6496         else
6497                 rar_high |= E1000_RAH_POOL_1 << qsel;
6498
6499         wr32(E1000_RAL(index), rar_low);
6500         wrfl();
6501         wr32(E1000_RAH(index), rar_high);
6502         wrfl();
6503 }
6504
6505 static int igb_set_vf_mac(struct igb_adapter *adapter,
6506                           int vf, unsigned char *mac_addr)
6507 {
6508         struct e1000_hw *hw = &adapter->hw;
6509         /* VF MAC addresses start at end of receive addresses and moves
6510          * torwards the first, as a result a collision should not be possible */
6511         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6512
6513         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6514
6515         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6516
6517         return 0;
6518 }
6519
6520 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6521 {
6522         struct igb_adapter *adapter = netdev_priv(netdev);
6523         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6524                 return -EINVAL;
6525         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6526         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6527         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6528                                       " change effective.");
6529         if (test_bit(__IGB_DOWN, &adapter->state)) {
6530                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6531                          " but the PF device is not up.\n");
6532                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6533                          " attempting to use the VF device.\n");
6534         }
6535         return igb_set_vf_mac(adapter, vf, mac);
6536 }
6537
6538 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6539 {
6540         return -EOPNOTSUPP;
6541 }
6542
6543 static int igb_ndo_get_vf_config(struct net_device *netdev,
6544                                  int vf, struct ifla_vf_info *ivi)
6545 {
6546         struct igb_adapter *adapter = netdev_priv(netdev);
6547         if (vf >= adapter->vfs_allocated_count)
6548                 return -EINVAL;
6549         ivi->vf = vf;
6550         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6551         ivi->tx_rate = 0;
6552         ivi->vlan = adapter->vf_data[vf].pf_vlan;
6553         ivi->qos = adapter->vf_data[vf].pf_qos;
6554         return 0;
6555 }
6556
6557 static void igb_vmm_control(struct igb_adapter *adapter)
6558 {
6559         struct e1000_hw *hw = &adapter->hw;
6560         u32 reg;
6561
6562         switch (hw->mac.type) {
6563         case e1000_82575:
6564         default:
6565                 /* replication is not supported for 82575 */
6566                 return;
6567         case e1000_82576:
6568                 /* notify HW that the MAC is adding vlan tags */
6569                 reg = rd32(E1000_DTXCTL);
6570                 reg |= E1000_DTXCTL_VLAN_ADDED;
6571                 wr32(E1000_DTXCTL, reg);
6572         case e1000_82580:
6573                 /* enable replication vlan tag stripping */
6574                 reg = rd32(E1000_RPLOLR);
6575                 reg |= E1000_RPLOLR_STRVLAN;
6576                 wr32(E1000_RPLOLR, reg);
6577         case e1000_i350:
6578                 /* none of the above registers are supported by i350 */
6579                 break;
6580         }
6581
6582         if (adapter->vfs_allocated_count) {
6583                 igb_vmdq_set_loopback_pf(hw, true);
6584                 igb_vmdq_set_replication_pf(hw, true);
6585         } else {
6586                 igb_vmdq_set_loopback_pf(hw, false);
6587                 igb_vmdq_set_replication_pf(hw, false);
6588         }
6589 }
6590
6591 /* igb_main.c */