igb: Update Intel copyright notice for driver source.
[linux-block.git] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <linux/slab.h>
36 #include <net/checksum.h>
37 #include <net/ip6_checksum.h>
38 #include <linux/net_tstamp.h>
39 #include <linux/mii.h>
40 #include <linux/ethtool.h>
41 #include <linux/if_vlan.h>
42 #include <linux/pci.h>
43 #include <linux/pci-aspm.h>
44 #include <linux/delay.h>
45 #include <linux/interrupt.h>
46 #include <linux/if_ether.h>
47 #include <linux/aer.h>
48 #ifdef CONFIG_IGB_DCA
49 #include <linux/dca.h>
50 #endif
51 #include "igb.h"
52
53 #define DRV_VERSION "2.1.0-k2"
54 char igb_driver_name[] = "igb";
55 char igb_driver_version[] = DRV_VERSION;
56 static const char igb_driver_string[] =
57                                 "Intel(R) Gigabit Ethernet Network Driver";
58 static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
59
60 static const struct e1000_info *igb_info_tbl[] = {
61         [board_82575] = &e1000_82575_info,
62 };
63
64 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
65         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
66         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
67         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
68         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
69         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
74         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
81         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
82         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
83         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
84         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
85         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
86         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
87         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
88         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
89         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
90         /* required last entry */
91         {0, }
92 };
93
94 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
95
96 void igb_reset(struct igb_adapter *);
97 static int igb_setup_all_tx_resources(struct igb_adapter *);
98 static int igb_setup_all_rx_resources(struct igb_adapter *);
99 static void igb_free_all_tx_resources(struct igb_adapter *);
100 static void igb_free_all_rx_resources(struct igb_adapter *);
101 static void igb_setup_mrqc(struct igb_adapter *);
102 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
103 static void __devexit igb_remove(struct pci_dev *pdev);
104 static int igb_sw_init(struct igb_adapter *);
105 static int igb_open(struct net_device *);
106 static int igb_close(struct net_device *);
107 static void igb_configure_tx(struct igb_adapter *);
108 static void igb_configure_rx(struct igb_adapter *);
109 static void igb_clean_all_tx_rings(struct igb_adapter *);
110 static void igb_clean_all_rx_rings(struct igb_adapter *);
111 static void igb_clean_tx_ring(struct igb_ring *);
112 static void igb_clean_rx_ring(struct igb_ring *);
113 static void igb_set_rx_mode(struct net_device *);
114 static void igb_update_phy_info(unsigned long);
115 static void igb_watchdog(unsigned long);
116 static void igb_watchdog_task(struct work_struct *);
117 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
118 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
119                                                  struct rtnl_link_stats64 *stats);
120 static int igb_change_mtu(struct net_device *, int);
121 static int igb_set_mac(struct net_device *, void *);
122 static void igb_set_uta(struct igb_adapter *adapter);
123 static irqreturn_t igb_intr(int irq, void *);
124 static irqreturn_t igb_intr_msi(int irq, void *);
125 static irqreturn_t igb_msix_other(int irq, void *);
126 static irqreturn_t igb_msix_ring(int irq, void *);
127 #ifdef CONFIG_IGB_DCA
128 static void igb_update_dca(struct igb_q_vector *);
129 static void igb_setup_dca(struct igb_adapter *);
130 #endif /* CONFIG_IGB_DCA */
131 static bool igb_clean_tx_irq(struct igb_q_vector *);
132 static int igb_poll(struct napi_struct *, int);
133 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
134 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
135 static void igb_tx_timeout(struct net_device *);
136 static void igb_reset_task(struct work_struct *);
137 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
138 static void igb_vlan_rx_add_vid(struct net_device *, u16);
139 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
140 static void igb_restore_vlan(struct igb_adapter *);
141 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
142 static void igb_ping_all_vfs(struct igb_adapter *);
143 static void igb_msg_task(struct igb_adapter *);
144 static void igb_vmm_control(struct igb_adapter *);
145 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
146 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
147 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
148 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
149                                int vf, u16 vlan, u8 qos);
150 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
151 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
152                                  struct ifla_vf_info *ivi);
153 static void igb_check_vf_rate_limit(struct igb_adapter *);
154
155 #ifdef CONFIG_PM
156 static int igb_suspend(struct pci_dev *, pm_message_t);
157 static int igb_resume(struct pci_dev *);
158 #endif
159 static void igb_shutdown(struct pci_dev *);
160 #ifdef CONFIG_IGB_DCA
161 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
162 static struct notifier_block dca_notifier = {
163         .notifier_call  = igb_notify_dca,
164         .next           = NULL,
165         .priority       = 0
166 };
167 #endif
168 #ifdef CONFIG_NET_POLL_CONTROLLER
169 /* for netdump / net console */
170 static void igb_netpoll(struct net_device *);
171 #endif
172 #ifdef CONFIG_PCI_IOV
173 static unsigned int max_vfs = 0;
174 module_param(max_vfs, uint, 0);
175 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
176                  "per physical function");
177 #endif /* CONFIG_PCI_IOV */
178
179 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
180                      pci_channel_state_t);
181 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
182 static void igb_io_resume(struct pci_dev *);
183
184 static struct pci_error_handlers igb_err_handler = {
185         .error_detected = igb_io_error_detected,
186         .slot_reset = igb_io_slot_reset,
187         .resume = igb_io_resume,
188 };
189
190
191 static struct pci_driver igb_driver = {
192         .name     = igb_driver_name,
193         .id_table = igb_pci_tbl,
194         .probe    = igb_probe,
195         .remove   = __devexit_p(igb_remove),
196 #ifdef CONFIG_PM
197         /* Power Managment Hooks */
198         .suspend  = igb_suspend,
199         .resume   = igb_resume,
200 #endif
201         .shutdown = igb_shutdown,
202         .err_handler = &igb_err_handler
203 };
204
205 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
206 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
207 MODULE_LICENSE("GPL");
208 MODULE_VERSION(DRV_VERSION);
209
210 struct igb_reg_info {
211         u32 ofs;
212         char *name;
213 };
214
215 static const struct igb_reg_info igb_reg_info_tbl[] = {
216
217         /* General Registers */
218         {E1000_CTRL, "CTRL"},
219         {E1000_STATUS, "STATUS"},
220         {E1000_CTRL_EXT, "CTRL_EXT"},
221
222         /* Interrupt Registers */
223         {E1000_ICR, "ICR"},
224
225         /* RX Registers */
226         {E1000_RCTL, "RCTL"},
227         {E1000_RDLEN(0), "RDLEN"},
228         {E1000_RDH(0), "RDH"},
229         {E1000_RDT(0), "RDT"},
230         {E1000_RXDCTL(0), "RXDCTL"},
231         {E1000_RDBAL(0), "RDBAL"},
232         {E1000_RDBAH(0), "RDBAH"},
233
234         /* TX Registers */
235         {E1000_TCTL, "TCTL"},
236         {E1000_TDBAL(0), "TDBAL"},
237         {E1000_TDBAH(0), "TDBAH"},
238         {E1000_TDLEN(0), "TDLEN"},
239         {E1000_TDH(0), "TDH"},
240         {E1000_TDT(0), "TDT"},
241         {E1000_TXDCTL(0), "TXDCTL"},
242         {E1000_TDFH, "TDFH"},
243         {E1000_TDFT, "TDFT"},
244         {E1000_TDFHS, "TDFHS"},
245         {E1000_TDFPC, "TDFPC"},
246
247         /* List Terminator */
248         {}
249 };
250
251 /*
252  * igb_regdump - register printout routine
253  */
254 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
255 {
256         int n = 0;
257         char rname[16];
258         u32 regs[8];
259
260         switch (reginfo->ofs) {
261         case E1000_RDLEN(0):
262                 for (n = 0; n < 4; n++)
263                         regs[n] = rd32(E1000_RDLEN(n));
264                 break;
265         case E1000_RDH(0):
266                 for (n = 0; n < 4; n++)
267                         regs[n] = rd32(E1000_RDH(n));
268                 break;
269         case E1000_RDT(0):
270                 for (n = 0; n < 4; n++)
271                         regs[n] = rd32(E1000_RDT(n));
272                 break;
273         case E1000_RXDCTL(0):
274                 for (n = 0; n < 4; n++)
275                         regs[n] = rd32(E1000_RXDCTL(n));
276                 break;
277         case E1000_RDBAL(0):
278                 for (n = 0; n < 4; n++)
279                         regs[n] = rd32(E1000_RDBAL(n));
280                 break;
281         case E1000_RDBAH(0):
282                 for (n = 0; n < 4; n++)
283                         regs[n] = rd32(E1000_RDBAH(n));
284                 break;
285         case E1000_TDBAL(0):
286                 for (n = 0; n < 4; n++)
287                         regs[n] = rd32(E1000_RDBAL(n));
288                 break;
289         case E1000_TDBAH(0):
290                 for (n = 0; n < 4; n++)
291                         regs[n] = rd32(E1000_TDBAH(n));
292                 break;
293         case E1000_TDLEN(0):
294                 for (n = 0; n < 4; n++)
295                         regs[n] = rd32(E1000_TDLEN(n));
296                 break;
297         case E1000_TDH(0):
298                 for (n = 0; n < 4; n++)
299                         regs[n] = rd32(E1000_TDH(n));
300                 break;
301         case E1000_TDT(0):
302                 for (n = 0; n < 4; n++)
303                         regs[n] = rd32(E1000_TDT(n));
304                 break;
305         case E1000_TXDCTL(0):
306                 for (n = 0; n < 4; n++)
307                         regs[n] = rd32(E1000_TXDCTL(n));
308                 break;
309         default:
310                 printk(KERN_INFO "%-15s %08x\n",
311                         reginfo->name, rd32(reginfo->ofs));
312                 return;
313         }
314
315         snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
316         printk(KERN_INFO "%-15s ", rname);
317         for (n = 0; n < 4; n++)
318                 printk(KERN_CONT "%08x ", regs[n]);
319         printk(KERN_CONT "\n");
320 }
321
322 /*
323  * igb_dump - Print registers, tx-rings and rx-rings
324  */
325 static void igb_dump(struct igb_adapter *adapter)
326 {
327         struct net_device *netdev = adapter->netdev;
328         struct e1000_hw *hw = &adapter->hw;
329         struct igb_reg_info *reginfo;
330         int n = 0;
331         struct igb_ring *tx_ring;
332         union e1000_adv_tx_desc *tx_desc;
333         struct my_u0 { u64 a; u64 b; } *u0;
334         struct igb_buffer *buffer_info;
335         struct igb_ring *rx_ring;
336         union e1000_adv_rx_desc *rx_desc;
337         u32 staterr;
338         int i = 0;
339
340         if (!netif_msg_hw(adapter))
341                 return;
342
343         /* Print netdevice Info */
344         if (netdev) {
345                 dev_info(&adapter->pdev->dev, "Net device Info\n");
346                 printk(KERN_INFO "Device Name     state            "
347                         "trans_start      last_rx\n");
348                 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
349                 netdev->name,
350                 netdev->state,
351                 netdev->trans_start,
352                 netdev->last_rx);
353         }
354
355         /* Print Registers */
356         dev_info(&adapter->pdev->dev, "Register Dump\n");
357         printk(KERN_INFO " Register Name   Value\n");
358         for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
359              reginfo->name; reginfo++) {
360                 igb_regdump(hw, reginfo);
361         }
362
363         /* Print TX Ring Summary */
364         if (!netdev || !netif_running(netdev))
365                 goto exit;
366
367         dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
368         printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma  ]"
369                 " leng ntw timestamp\n");
370         for (n = 0; n < adapter->num_tx_queues; n++) {
371                 tx_ring = adapter->tx_ring[n];
372                 buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
373                 printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n",
374                            n, tx_ring->next_to_use, tx_ring->next_to_clean,
375                            (u64)buffer_info->dma,
376                            buffer_info->length,
377                            buffer_info->next_to_watch,
378                            (u64)buffer_info->time_stamp);
379         }
380
381         /* Print TX Rings */
382         if (!netif_msg_tx_done(adapter))
383                 goto rx_ring_summary;
384
385         dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
386
387         /* Transmit Descriptor Formats
388          *
389          * Advanced Transmit Descriptor
390          *   +--------------------------------------------------------------+
391          * 0 |         Buffer Address [63:0]                                |
392          *   +--------------------------------------------------------------+
393          * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
394          *   +--------------------------------------------------------------+
395          *   63      46 45    40 39 38 36 35 32 31   24             15       0
396          */
397
398         for (n = 0; n < adapter->num_tx_queues; n++) {
399                 tx_ring = adapter->tx_ring[n];
400                 printk(KERN_INFO "------------------------------------\n");
401                 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
402                 printk(KERN_INFO "------------------------------------\n");
403                 printk(KERN_INFO "T [desc]     [address 63:0  ] "
404                         "[PlPOCIStDDM Ln] [bi->dma       ] "
405                         "leng  ntw timestamp        bi->skb\n");
406
407                 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
408                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
409                         buffer_info = &tx_ring->buffer_info[i];
410                         u0 = (struct my_u0 *)tx_desc;
411                         printk(KERN_INFO "T [0x%03X]    %016llX %016llX %016llX"
412                                 " %04X  %3X %016llX %p", i,
413                                 le64_to_cpu(u0->a),
414                                 le64_to_cpu(u0->b),
415                                 (u64)buffer_info->dma,
416                                 buffer_info->length,
417                                 buffer_info->next_to_watch,
418                                 (u64)buffer_info->time_stamp,
419                                 buffer_info->skb);
420                         if (i == tx_ring->next_to_use &&
421                                 i == tx_ring->next_to_clean)
422                                 printk(KERN_CONT " NTC/U\n");
423                         else if (i == tx_ring->next_to_use)
424                                 printk(KERN_CONT " NTU\n");
425                         else if (i == tx_ring->next_to_clean)
426                                 printk(KERN_CONT " NTC\n");
427                         else
428                                 printk(KERN_CONT "\n");
429
430                         if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
431                                 print_hex_dump(KERN_INFO, "",
432                                         DUMP_PREFIX_ADDRESS,
433                                         16, 1, phys_to_virt(buffer_info->dma),
434                                         buffer_info->length, true);
435                 }
436         }
437
438         /* Print RX Rings Summary */
439 rx_ring_summary:
440         dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
441         printk(KERN_INFO "Queue [NTU] [NTC]\n");
442         for (n = 0; n < adapter->num_rx_queues; n++) {
443                 rx_ring = adapter->rx_ring[n];
444                 printk(KERN_INFO " %5d %5X %5X\n", n,
445                            rx_ring->next_to_use, rx_ring->next_to_clean);
446         }
447
448         /* Print RX Rings */
449         if (!netif_msg_rx_status(adapter))
450                 goto exit;
451
452         dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
453
454         /* Advanced Receive Descriptor (Read) Format
455          *    63                                           1        0
456          *    +-----------------------------------------------------+
457          *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
458          *    +----------------------------------------------+------+
459          *  8 |       Header Buffer Address [63:1]           |  DD  |
460          *    +-----------------------------------------------------+
461          *
462          *
463          * Advanced Receive Descriptor (Write-Back) Format
464          *
465          *   63       48 47    32 31  30      21 20 17 16   4 3     0
466          *   +------------------------------------------------------+
467          * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
468          *   | Checksum   Ident  |   |           |    | Type | Type |
469          *   +------------------------------------------------------+
470          * 8 | VLAN Tag | Length | Extended Error | Extended Status |
471          *   +------------------------------------------------------+
472          *   63       48 47    32 31            20 19               0
473          */
474
475         for (n = 0; n < adapter->num_rx_queues; n++) {
476                 rx_ring = adapter->rx_ring[n];
477                 printk(KERN_INFO "------------------------------------\n");
478                 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
479                 printk(KERN_INFO "------------------------------------\n");
480                 printk(KERN_INFO "R  [desc]      [ PktBuf     A0] "
481                         "[  HeadBuf   DD] [bi->dma       ] [bi->skb] "
482                         "<-- Adv Rx Read format\n");
483                 printk(KERN_INFO "RWB[desc]      [PcsmIpSHl PtRs] "
484                         "[vl er S cks ln] ---------------- [bi->skb] "
485                         "<-- Adv Rx Write-Back format\n");
486
487                 for (i = 0; i < rx_ring->count; i++) {
488                         buffer_info = &rx_ring->buffer_info[i];
489                         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
490                         u0 = (struct my_u0 *)rx_desc;
491                         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
492                         if (staterr & E1000_RXD_STAT_DD) {
493                                 /* Descriptor Done */
494                                 printk(KERN_INFO "RWB[0x%03X]     %016llX "
495                                         "%016llX ---------------- %p", i,
496                                         le64_to_cpu(u0->a),
497                                         le64_to_cpu(u0->b),
498                                         buffer_info->skb);
499                         } else {
500                                 printk(KERN_INFO "R  [0x%03X]     %016llX "
501                                         "%016llX %016llX %p", i,
502                                         le64_to_cpu(u0->a),
503                                         le64_to_cpu(u0->b),
504                                         (u64)buffer_info->dma,
505                                         buffer_info->skb);
506
507                                 if (netif_msg_pktdata(adapter)) {
508                                         print_hex_dump(KERN_INFO, "",
509                                                 DUMP_PREFIX_ADDRESS,
510                                                 16, 1,
511                                                 phys_to_virt(buffer_info->dma),
512                                                 rx_ring->rx_buffer_len, true);
513                                         if (rx_ring->rx_buffer_len
514                                                 < IGB_RXBUFFER_1024)
515                                                 print_hex_dump(KERN_INFO, "",
516                                                   DUMP_PREFIX_ADDRESS,
517                                                   16, 1,
518                                                   phys_to_virt(
519                                                     buffer_info->page_dma +
520                                                     buffer_info->page_offset),
521                                                   PAGE_SIZE/2, true);
522                                 }
523                         }
524
525                         if (i == rx_ring->next_to_use)
526                                 printk(KERN_CONT " NTU\n");
527                         else if (i == rx_ring->next_to_clean)
528                                 printk(KERN_CONT " NTC\n");
529                         else
530                                 printk(KERN_CONT "\n");
531
532                 }
533         }
534
535 exit:
536         return;
537 }
538
539
540 /**
541  * igb_read_clock - read raw cycle counter (to be used by time counter)
542  */
543 static cycle_t igb_read_clock(const struct cyclecounter *tc)
544 {
545         struct igb_adapter *adapter =
546                 container_of(tc, struct igb_adapter, cycles);
547         struct e1000_hw *hw = &adapter->hw;
548         u64 stamp = 0;
549         int shift = 0;
550
551         /*
552          * The timestamp latches on lowest register read. For the 82580
553          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
554          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
555          */
556         if (hw->mac.type == e1000_82580) {
557                 stamp = rd32(E1000_SYSTIMR) >> 8;
558                 shift = IGB_82580_TSYNC_SHIFT;
559         }
560
561         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
562         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
563         return stamp;
564 }
565
566 /**
567  * igb_get_hw_dev - return device
568  * used by hardware layer to print debugging information
569  **/
570 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
571 {
572         struct igb_adapter *adapter = hw->back;
573         return adapter->netdev;
574 }
575
576 /**
577  * igb_init_module - Driver Registration Routine
578  *
579  * igb_init_module is the first routine called when the driver is
580  * loaded. All it does is register with the PCI subsystem.
581  **/
582 static int __init igb_init_module(void)
583 {
584         int ret;
585         printk(KERN_INFO "%s - version %s\n",
586                igb_driver_string, igb_driver_version);
587
588         printk(KERN_INFO "%s\n", igb_copyright);
589
590 #ifdef CONFIG_IGB_DCA
591         dca_register_notify(&dca_notifier);
592 #endif
593         ret = pci_register_driver(&igb_driver);
594         return ret;
595 }
596
597 module_init(igb_init_module);
598
599 /**
600  * igb_exit_module - Driver Exit Cleanup Routine
601  *
602  * igb_exit_module is called just before the driver is removed
603  * from memory.
604  **/
605 static void __exit igb_exit_module(void)
606 {
607 #ifdef CONFIG_IGB_DCA
608         dca_unregister_notify(&dca_notifier);
609 #endif
610         pci_unregister_driver(&igb_driver);
611 }
612
613 module_exit(igb_exit_module);
614
615 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
616 /**
617  * igb_cache_ring_register - Descriptor ring to register mapping
618  * @adapter: board private structure to initialize
619  *
620  * Once we know the feature-set enabled for the device, we'll cache
621  * the register offset the descriptor ring is assigned to.
622  **/
623 static void igb_cache_ring_register(struct igb_adapter *adapter)
624 {
625         int i = 0, j = 0;
626         u32 rbase_offset = adapter->vfs_allocated_count;
627
628         switch (adapter->hw.mac.type) {
629         case e1000_82576:
630                 /* The queues are allocated for virtualization such that VF 0
631                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
632                  * In order to avoid collision we start at the first free queue
633                  * and continue consuming queues in the same sequence
634                  */
635                 if (adapter->vfs_allocated_count) {
636                         for (; i < adapter->rss_queues; i++)
637                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
638                                                                Q_IDX_82576(i);
639                 }
640         case e1000_82575:
641         case e1000_82580:
642         case e1000_i350:
643         default:
644                 for (; i < adapter->num_rx_queues; i++)
645                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
646                 for (; j < adapter->num_tx_queues; j++)
647                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
648                 break;
649         }
650 }
651
652 static void igb_free_queues(struct igb_adapter *adapter)
653 {
654         int i;
655
656         for (i = 0; i < adapter->num_tx_queues; i++) {
657                 kfree(adapter->tx_ring[i]);
658                 adapter->tx_ring[i] = NULL;
659         }
660         for (i = 0; i < adapter->num_rx_queues; i++) {
661                 kfree(adapter->rx_ring[i]);
662                 adapter->rx_ring[i] = NULL;
663         }
664         adapter->num_rx_queues = 0;
665         adapter->num_tx_queues = 0;
666 }
667
668 /**
669  * igb_alloc_queues - Allocate memory for all rings
670  * @adapter: board private structure to initialize
671  *
672  * We allocate one ring per queue at run-time since we don't know the
673  * number of queues at compile-time.
674  **/
675 static int igb_alloc_queues(struct igb_adapter *adapter)
676 {
677         struct igb_ring *ring;
678         int i;
679
680         for (i = 0; i < adapter->num_tx_queues; i++) {
681                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
682                 if (!ring)
683                         goto err;
684                 ring->count = adapter->tx_ring_count;
685                 ring->queue_index = i;
686                 ring->dev = &adapter->pdev->dev;
687                 ring->netdev = adapter->netdev;
688                 /* For 82575, context index must be unique per ring. */
689                 if (adapter->hw.mac.type == e1000_82575)
690                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
691                 adapter->tx_ring[i] = ring;
692         }
693
694         for (i = 0; i < adapter->num_rx_queues; i++) {
695                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
696                 if (!ring)
697                         goto err;
698                 ring->count = adapter->rx_ring_count;
699                 ring->queue_index = i;
700                 ring->dev = &adapter->pdev->dev;
701                 ring->netdev = adapter->netdev;
702                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
703                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
704                 /* set flag indicating ring supports SCTP checksum offload */
705                 if (adapter->hw.mac.type >= e1000_82576)
706                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
707                 adapter->rx_ring[i] = ring;
708         }
709
710         igb_cache_ring_register(adapter);
711
712         return 0;
713
714 err:
715         igb_free_queues(adapter);
716
717         return -ENOMEM;
718 }
719
720 #define IGB_N0_QUEUE -1
721 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
722 {
723         u32 msixbm = 0;
724         struct igb_adapter *adapter = q_vector->adapter;
725         struct e1000_hw *hw = &adapter->hw;
726         u32 ivar, index;
727         int rx_queue = IGB_N0_QUEUE;
728         int tx_queue = IGB_N0_QUEUE;
729
730         if (q_vector->rx_ring)
731                 rx_queue = q_vector->rx_ring->reg_idx;
732         if (q_vector->tx_ring)
733                 tx_queue = q_vector->tx_ring->reg_idx;
734
735         switch (hw->mac.type) {
736         case e1000_82575:
737                 /* The 82575 assigns vectors using a bitmask, which matches the
738                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
739                    or more queues to a vector, we write the appropriate bits
740                    into the MSIXBM register for that vector. */
741                 if (rx_queue > IGB_N0_QUEUE)
742                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
743                 if (tx_queue > IGB_N0_QUEUE)
744                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
745                 if (!adapter->msix_entries && msix_vector == 0)
746                         msixbm |= E1000_EIMS_OTHER;
747                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
748                 q_vector->eims_value = msixbm;
749                 break;
750         case e1000_82576:
751                 /* 82576 uses a table-based method for assigning vectors.
752                    Each queue has a single entry in the table to which we write
753                    a vector number along with a "valid" bit.  Sadly, the layout
754                    of the table is somewhat counterintuitive. */
755                 if (rx_queue > IGB_N0_QUEUE) {
756                         index = (rx_queue & 0x7);
757                         ivar = array_rd32(E1000_IVAR0, index);
758                         if (rx_queue < 8) {
759                                 /* vector goes into low byte of register */
760                                 ivar = ivar & 0xFFFFFF00;
761                                 ivar |= msix_vector | E1000_IVAR_VALID;
762                         } else {
763                                 /* vector goes into third byte of register */
764                                 ivar = ivar & 0xFF00FFFF;
765                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
766                         }
767                         array_wr32(E1000_IVAR0, index, ivar);
768                 }
769                 if (tx_queue > IGB_N0_QUEUE) {
770                         index = (tx_queue & 0x7);
771                         ivar = array_rd32(E1000_IVAR0, index);
772                         if (tx_queue < 8) {
773                                 /* vector goes into second byte of register */
774                                 ivar = ivar & 0xFFFF00FF;
775                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
776                         } else {
777                                 /* vector goes into high byte of register */
778                                 ivar = ivar & 0x00FFFFFF;
779                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
780                         }
781                         array_wr32(E1000_IVAR0, index, ivar);
782                 }
783                 q_vector->eims_value = 1 << msix_vector;
784                 break;
785         case e1000_82580:
786         case e1000_i350:
787                 /* 82580 uses the same table-based approach as 82576 but has fewer
788                    entries as a result we carry over for queues greater than 4. */
789                 if (rx_queue > IGB_N0_QUEUE) {
790                         index = (rx_queue >> 1);
791                         ivar = array_rd32(E1000_IVAR0, index);
792                         if (rx_queue & 0x1) {
793                                 /* vector goes into third byte of register */
794                                 ivar = ivar & 0xFF00FFFF;
795                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
796                         } else {
797                                 /* vector goes into low byte of register */
798                                 ivar = ivar & 0xFFFFFF00;
799                                 ivar |= msix_vector | E1000_IVAR_VALID;
800                         }
801                         array_wr32(E1000_IVAR0, index, ivar);
802                 }
803                 if (tx_queue > IGB_N0_QUEUE) {
804                         index = (tx_queue >> 1);
805                         ivar = array_rd32(E1000_IVAR0, index);
806                         if (tx_queue & 0x1) {
807                                 /* vector goes into high byte of register */
808                                 ivar = ivar & 0x00FFFFFF;
809                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
810                         } else {
811                                 /* vector goes into second byte of register */
812                                 ivar = ivar & 0xFFFF00FF;
813                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
814                         }
815                         array_wr32(E1000_IVAR0, index, ivar);
816                 }
817                 q_vector->eims_value = 1 << msix_vector;
818                 break;
819         default:
820                 BUG();
821                 break;
822         }
823
824         /* add q_vector eims value to global eims_enable_mask */
825         adapter->eims_enable_mask |= q_vector->eims_value;
826
827         /* configure q_vector to set itr on first interrupt */
828         q_vector->set_itr = 1;
829 }
830
831 /**
832  * igb_configure_msix - Configure MSI-X hardware
833  *
834  * igb_configure_msix sets up the hardware to properly
835  * generate MSI-X interrupts.
836  **/
837 static void igb_configure_msix(struct igb_adapter *adapter)
838 {
839         u32 tmp;
840         int i, vector = 0;
841         struct e1000_hw *hw = &adapter->hw;
842
843         adapter->eims_enable_mask = 0;
844
845         /* set vector for other causes, i.e. link changes */
846         switch (hw->mac.type) {
847         case e1000_82575:
848                 tmp = rd32(E1000_CTRL_EXT);
849                 /* enable MSI-X PBA support*/
850                 tmp |= E1000_CTRL_EXT_PBA_CLR;
851
852                 /* Auto-Mask interrupts upon ICR read. */
853                 tmp |= E1000_CTRL_EXT_EIAME;
854                 tmp |= E1000_CTRL_EXT_IRCA;
855
856                 wr32(E1000_CTRL_EXT, tmp);
857
858                 /* enable msix_other interrupt */
859                 array_wr32(E1000_MSIXBM(0), vector++,
860                                       E1000_EIMS_OTHER);
861                 adapter->eims_other = E1000_EIMS_OTHER;
862
863                 break;
864
865         case e1000_82576:
866         case e1000_82580:
867         case e1000_i350:
868                 /* Turn on MSI-X capability first, or our settings
869                  * won't stick.  And it will take days to debug. */
870                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
871                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
872                                 E1000_GPIE_NSICR);
873
874                 /* enable msix_other interrupt */
875                 adapter->eims_other = 1 << vector;
876                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
877
878                 wr32(E1000_IVAR_MISC, tmp);
879                 break;
880         default:
881                 /* do nothing, since nothing else supports MSI-X */
882                 break;
883         } /* switch (hw->mac.type) */
884
885         adapter->eims_enable_mask |= adapter->eims_other;
886
887         for (i = 0; i < adapter->num_q_vectors; i++)
888                 igb_assign_vector(adapter->q_vector[i], vector++);
889
890         wrfl();
891 }
892
893 /**
894  * igb_request_msix - Initialize MSI-X interrupts
895  *
896  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
897  * kernel.
898  **/
899 static int igb_request_msix(struct igb_adapter *adapter)
900 {
901         struct net_device *netdev = adapter->netdev;
902         struct e1000_hw *hw = &adapter->hw;
903         int i, err = 0, vector = 0;
904
905         err = request_irq(adapter->msix_entries[vector].vector,
906                           igb_msix_other, 0, netdev->name, adapter);
907         if (err)
908                 goto out;
909         vector++;
910
911         for (i = 0; i < adapter->num_q_vectors; i++) {
912                 struct igb_q_vector *q_vector = adapter->q_vector[i];
913
914                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
915
916                 if (q_vector->rx_ring && q_vector->tx_ring)
917                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
918                                 q_vector->rx_ring->queue_index);
919                 else if (q_vector->tx_ring)
920                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
921                                 q_vector->tx_ring->queue_index);
922                 else if (q_vector->rx_ring)
923                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
924                                 q_vector->rx_ring->queue_index);
925                 else
926                         sprintf(q_vector->name, "%s-unused", netdev->name);
927
928                 err = request_irq(adapter->msix_entries[vector].vector,
929                                   igb_msix_ring, 0, q_vector->name,
930                                   q_vector);
931                 if (err)
932                         goto out;
933                 vector++;
934         }
935
936         igb_configure_msix(adapter);
937         return 0;
938 out:
939         return err;
940 }
941
942 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
943 {
944         if (adapter->msix_entries) {
945                 pci_disable_msix(adapter->pdev);
946                 kfree(adapter->msix_entries);
947                 adapter->msix_entries = NULL;
948         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
949                 pci_disable_msi(adapter->pdev);
950         }
951 }
952
953 /**
954  * igb_free_q_vectors - Free memory allocated for interrupt vectors
955  * @adapter: board private structure to initialize
956  *
957  * This function frees the memory allocated to the q_vectors.  In addition if
958  * NAPI is enabled it will delete any references to the NAPI struct prior
959  * to freeing the q_vector.
960  **/
961 static void igb_free_q_vectors(struct igb_adapter *adapter)
962 {
963         int v_idx;
964
965         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
966                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
967                 adapter->q_vector[v_idx] = NULL;
968                 if (!q_vector)
969                         continue;
970                 netif_napi_del(&q_vector->napi);
971                 kfree(q_vector);
972         }
973         adapter->num_q_vectors = 0;
974 }
975
976 /**
977  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
978  *
979  * This function resets the device so that it has 0 rx queues, tx queues, and
980  * MSI-X interrupts allocated.
981  */
982 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
983 {
984         igb_free_queues(adapter);
985         igb_free_q_vectors(adapter);
986         igb_reset_interrupt_capability(adapter);
987 }
988
989 /**
990  * igb_set_interrupt_capability - set MSI or MSI-X if supported
991  *
992  * Attempt to configure interrupts using the best available
993  * capabilities of the hardware and kernel.
994  **/
995 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
996 {
997         int err;
998         int numvecs, i;
999
1000         /* Number of supported queues. */
1001         adapter->num_rx_queues = adapter->rss_queues;
1002         if (adapter->vfs_allocated_count)
1003                 adapter->num_tx_queues = 1;
1004         else
1005                 adapter->num_tx_queues = adapter->rss_queues;
1006
1007         /* start with one vector for every rx queue */
1008         numvecs = adapter->num_rx_queues;
1009
1010         /* if tx handler is separate add 1 for every tx queue */
1011         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1012                 numvecs += adapter->num_tx_queues;
1013
1014         /* store the number of vectors reserved for queues */
1015         adapter->num_q_vectors = numvecs;
1016
1017         /* add 1 vector for link status interrupts */
1018         numvecs++;
1019         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1020                                         GFP_KERNEL);
1021         if (!adapter->msix_entries)
1022                 goto msi_only;
1023
1024         for (i = 0; i < numvecs; i++)
1025                 adapter->msix_entries[i].entry = i;
1026
1027         err = pci_enable_msix(adapter->pdev,
1028                               adapter->msix_entries,
1029                               numvecs);
1030         if (err == 0)
1031                 goto out;
1032
1033         igb_reset_interrupt_capability(adapter);
1034
1035         /* If we can't do MSI-X, try MSI */
1036 msi_only:
1037 #ifdef CONFIG_PCI_IOV
1038         /* disable SR-IOV for non MSI-X configurations */
1039         if (adapter->vf_data) {
1040                 struct e1000_hw *hw = &adapter->hw;
1041                 /* disable iov and allow time for transactions to clear */
1042                 pci_disable_sriov(adapter->pdev);
1043                 msleep(500);
1044
1045                 kfree(adapter->vf_data);
1046                 adapter->vf_data = NULL;
1047                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1048                 msleep(100);
1049                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1050         }
1051 #endif
1052         adapter->vfs_allocated_count = 0;
1053         adapter->rss_queues = 1;
1054         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1055         adapter->num_rx_queues = 1;
1056         adapter->num_tx_queues = 1;
1057         adapter->num_q_vectors = 1;
1058         if (!pci_enable_msi(adapter->pdev))
1059                 adapter->flags |= IGB_FLAG_HAS_MSI;
1060 out:
1061         /* Notify the stack of the (possibly) reduced queue counts. */
1062         netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1063         return netif_set_real_num_rx_queues(adapter->netdev,
1064                                             adapter->num_rx_queues);
1065 }
1066
1067 /**
1068  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1069  * @adapter: board private structure to initialize
1070  *
1071  * We allocate one q_vector per queue interrupt.  If allocation fails we
1072  * return -ENOMEM.
1073  **/
1074 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1075 {
1076         struct igb_q_vector *q_vector;
1077         struct e1000_hw *hw = &adapter->hw;
1078         int v_idx;
1079
1080         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1081                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1082                 if (!q_vector)
1083                         goto err_out;
1084                 q_vector->adapter = adapter;
1085                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1086                 q_vector->itr_val = IGB_START_ITR;
1087                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1088                 adapter->q_vector[v_idx] = q_vector;
1089         }
1090         return 0;
1091
1092 err_out:
1093         igb_free_q_vectors(adapter);
1094         return -ENOMEM;
1095 }
1096
1097 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1098                                       int ring_idx, int v_idx)
1099 {
1100         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1101
1102         q_vector->rx_ring = adapter->rx_ring[ring_idx];
1103         q_vector->rx_ring->q_vector = q_vector;
1104         q_vector->itr_val = adapter->rx_itr_setting;
1105         if (q_vector->itr_val && q_vector->itr_val <= 3)
1106                 q_vector->itr_val = IGB_START_ITR;
1107 }
1108
1109 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1110                                       int ring_idx, int v_idx)
1111 {
1112         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1113
1114         q_vector->tx_ring = adapter->tx_ring[ring_idx];
1115         q_vector->tx_ring->q_vector = q_vector;
1116         q_vector->itr_val = adapter->tx_itr_setting;
1117         if (q_vector->itr_val && q_vector->itr_val <= 3)
1118                 q_vector->itr_val = IGB_START_ITR;
1119 }
1120
1121 /**
1122  * igb_map_ring_to_vector - maps allocated queues to vectors
1123  *
1124  * This function maps the recently allocated queues to vectors.
1125  **/
1126 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1127 {
1128         int i;
1129         int v_idx = 0;
1130
1131         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1132             (adapter->num_q_vectors < adapter->num_tx_queues))
1133                 return -ENOMEM;
1134
1135         if (adapter->num_q_vectors >=
1136             (adapter->num_rx_queues + adapter->num_tx_queues)) {
1137                 for (i = 0; i < adapter->num_rx_queues; i++)
1138                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1139                 for (i = 0; i < adapter->num_tx_queues; i++)
1140                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1141         } else {
1142                 for (i = 0; i < adapter->num_rx_queues; i++) {
1143                         if (i < adapter->num_tx_queues)
1144                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1145                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1146                 }
1147                 for (; i < adapter->num_tx_queues; i++)
1148                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1149         }
1150         return 0;
1151 }
1152
1153 /**
1154  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1155  *
1156  * This function initializes the interrupts and allocates all of the queues.
1157  **/
1158 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1159 {
1160         struct pci_dev *pdev = adapter->pdev;
1161         int err;
1162
1163         err = igb_set_interrupt_capability(adapter);
1164         if (err)
1165                 return err;
1166
1167         err = igb_alloc_q_vectors(adapter);
1168         if (err) {
1169                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1170                 goto err_alloc_q_vectors;
1171         }
1172
1173         err = igb_alloc_queues(adapter);
1174         if (err) {
1175                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1176                 goto err_alloc_queues;
1177         }
1178
1179         err = igb_map_ring_to_vector(adapter);
1180         if (err) {
1181                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1182                 goto err_map_queues;
1183         }
1184
1185
1186         return 0;
1187 err_map_queues:
1188         igb_free_queues(adapter);
1189 err_alloc_queues:
1190         igb_free_q_vectors(adapter);
1191 err_alloc_q_vectors:
1192         igb_reset_interrupt_capability(adapter);
1193         return err;
1194 }
1195
1196 /**
1197  * igb_request_irq - initialize interrupts
1198  *
1199  * Attempts to configure interrupts using the best available
1200  * capabilities of the hardware and kernel.
1201  **/
1202 static int igb_request_irq(struct igb_adapter *adapter)
1203 {
1204         struct net_device *netdev = adapter->netdev;
1205         struct pci_dev *pdev = adapter->pdev;
1206         int err = 0;
1207
1208         if (adapter->msix_entries) {
1209                 err = igb_request_msix(adapter);
1210                 if (!err)
1211                         goto request_done;
1212                 /* fall back to MSI */
1213                 igb_clear_interrupt_scheme(adapter);
1214                 if (!pci_enable_msi(adapter->pdev))
1215                         adapter->flags |= IGB_FLAG_HAS_MSI;
1216                 igb_free_all_tx_resources(adapter);
1217                 igb_free_all_rx_resources(adapter);
1218                 adapter->num_tx_queues = 1;
1219                 adapter->num_rx_queues = 1;
1220                 adapter->num_q_vectors = 1;
1221                 err = igb_alloc_q_vectors(adapter);
1222                 if (err) {
1223                         dev_err(&pdev->dev,
1224                                 "Unable to allocate memory for vectors\n");
1225                         goto request_done;
1226                 }
1227                 err = igb_alloc_queues(adapter);
1228                 if (err) {
1229                         dev_err(&pdev->dev,
1230                                 "Unable to allocate memory for queues\n");
1231                         igb_free_q_vectors(adapter);
1232                         goto request_done;
1233                 }
1234                 igb_setup_all_tx_resources(adapter);
1235                 igb_setup_all_rx_resources(adapter);
1236         } else {
1237                 igb_assign_vector(adapter->q_vector[0], 0);
1238         }
1239
1240         if (adapter->flags & IGB_FLAG_HAS_MSI) {
1241                 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1242                                   netdev->name, adapter);
1243                 if (!err)
1244                         goto request_done;
1245
1246                 /* fall back to legacy interrupts */
1247                 igb_reset_interrupt_capability(adapter);
1248                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1249         }
1250
1251         err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1252                           netdev->name, adapter);
1253
1254         if (err)
1255                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1256                         err);
1257
1258 request_done:
1259         return err;
1260 }
1261
1262 static void igb_free_irq(struct igb_adapter *adapter)
1263 {
1264         if (adapter->msix_entries) {
1265                 int vector = 0, i;
1266
1267                 free_irq(adapter->msix_entries[vector++].vector, adapter);
1268
1269                 for (i = 0; i < adapter->num_q_vectors; i++) {
1270                         struct igb_q_vector *q_vector = adapter->q_vector[i];
1271                         free_irq(adapter->msix_entries[vector++].vector,
1272                                  q_vector);
1273                 }
1274         } else {
1275                 free_irq(adapter->pdev->irq, adapter);
1276         }
1277 }
1278
1279 /**
1280  * igb_irq_disable - Mask off interrupt generation on the NIC
1281  * @adapter: board private structure
1282  **/
1283 static void igb_irq_disable(struct igb_adapter *adapter)
1284 {
1285         struct e1000_hw *hw = &adapter->hw;
1286
1287         /*
1288          * we need to be careful when disabling interrupts.  The VFs are also
1289          * mapped into these registers and so clearing the bits can cause
1290          * issues on the VF drivers so we only need to clear what we set
1291          */
1292         if (adapter->msix_entries) {
1293                 u32 regval = rd32(E1000_EIAM);
1294                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1295                 wr32(E1000_EIMC, adapter->eims_enable_mask);
1296                 regval = rd32(E1000_EIAC);
1297                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1298         }
1299
1300         wr32(E1000_IAM, 0);
1301         wr32(E1000_IMC, ~0);
1302         wrfl();
1303         if (adapter->msix_entries) {
1304                 int i;
1305                 for (i = 0; i < adapter->num_q_vectors; i++)
1306                         synchronize_irq(adapter->msix_entries[i].vector);
1307         } else {
1308                 synchronize_irq(adapter->pdev->irq);
1309         }
1310 }
1311
1312 /**
1313  * igb_irq_enable - Enable default interrupt generation settings
1314  * @adapter: board private structure
1315  **/
1316 static void igb_irq_enable(struct igb_adapter *adapter)
1317 {
1318         struct e1000_hw *hw = &adapter->hw;
1319
1320         if (adapter->msix_entries) {
1321                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1322                 u32 regval = rd32(E1000_EIAC);
1323                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1324                 regval = rd32(E1000_EIAM);
1325                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1326                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1327                 if (adapter->vfs_allocated_count) {
1328                         wr32(E1000_MBVFIMR, 0xFF);
1329                         ims |= E1000_IMS_VMMB;
1330                 }
1331                 if (adapter->hw.mac.type == e1000_82580)
1332                         ims |= E1000_IMS_DRSTA;
1333
1334                 wr32(E1000_IMS, ims);
1335         } else {
1336                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1337                                 E1000_IMS_DRSTA);
1338                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1339                                 E1000_IMS_DRSTA);
1340         }
1341 }
1342
1343 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1344 {
1345         struct e1000_hw *hw = &adapter->hw;
1346         u16 vid = adapter->hw.mng_cookie.vlan_id;
1347         u16 old_vid = adapter->mng_vlan_id;
1348
1349         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1350                 /* add VID to filter table */
1351                 igb_vfta_set(hw, vid, true);
1352                 adapter->mng_vlan_id = vid;
1353         } else {
1354                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1355         }
1356
1357         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1358             (vid != old_vid) &&
1359             !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1360                 /* remove VID from filter table */
1361                 igb_vfta_set(hw, old_vid, false);
1362         }
1363 }
1364
1365 /**
1366  * igb_release_hw_control - release control of the h/w to f/w
1367  * @adapter: address of board private structure
1368  *
1369  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1370  * For ASF and Pass Through versions of f/w this means that the
1371  * driver is no longer loaded.
1372  *
1373  **/
1374 static void igb_release_hw_control(struct igb_adapter *adapter)
1375 {
1376         struct e1000_hw *hw = &adapter->hw;
1377         u32 ctrl_ext;
1378
1379         /* Let firmware take over control of h/w */
1380         ctrl_ext = rd32(E1000_CTRL_EXT);
1381         wr32(E1000_CTRL_EXT,
1382                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1383 }
1384
1385 /**
1386  * igb_get_hw_control - get control of the h/w from f/w
1387  * @adapter: address of board private structure
1388  *
1389  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1390  * For ASF and Pass Through versions of f/w this means that
1391  * the driver is loaded.
1392  *
1393  **/
1394 static void igb_get_hw_control(struct igb_adapter *adapter)
1395 {
1396         struct e1000_hw *hw = &adapter->hw;
1397         u32 ctrl_ext;
1398
1399         /* Let firmware know the driver has taken over */
1400         ctrl_ext = rd32(E1000_CTRL_EXT);
1401         wr32(E1000_CTRL_EXT,
1402                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1403 }
1404
1405 /**
1406  * igb_configure - configure the hardware for RX and TX
1407  * @adapter: private board structure
1408  **/
1409 static void igb_configure(struct igb_adapter *adapter)
1410 {
1411         struct net_device *netdev = adapter->netdev;
1412         int i;
1413
1414         igb_get_hw_control(adapter);
1415         igb_set_rx_mode(netdev);
1416
1417         igb_restore_vlan(adapter);
1418
1419         igb_setup_tctl(adapter);
1420         igb_setup_mrqc(adapter);
1421         igb_setup_rctl(adapter);
1422
1423         igb_configure_tx(adapter);
1424         igb_configure_rx(adapter);
1425
1426         igb_rx_fifo_flush_82575(&adapter->hw);
1427
1428         /* call igb_desc_unused which always leaves
1429          * at least 1 descriptor unused to make sure
1430          * next_to_use != next_to_clean */
1431         for (i = 0; i < adapter->num_rx_queues; i++) {
1432                 struct igb_ring *ring = adapter->rx_ring[i];
1433                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1434         }
1435 }
1436
1437 /**
1438  * igb_power_up_link - Power up the phy/serdes link
1439  * @adapter: address of board private structure
1440  **/
1441 void igb_power_up_link(struct igb_adapter *adapter)
1442 {
1443         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1444                 igb_power_up_phy_copper(&adapter->hw);
1445         else
1446                 igb_power_up_serdes_link_82575(&adapter->hw);
1447 }
1448
1449 /**
1450  * igb_power_down_link - Power down the phy/serdes link
1451  * @adapter: address of board private structure
1452  */
1453 static void igb_power_down_link(struct igb_adapter *adapter)
1454 {
1455         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1456                 igb_power_down_phy_copper_82575(&adapter->hw);
1457         else
1458                 igb_shutdown_serdes_link_82575(&adapter->hw);
1459 }
1460
1461 /**
1462  * igb_up - Open the interface and prepare it to handle traffic
1463  * @adapter: board private structure
1464  **/
1465 int igb_up(struct igb_adapter *adapter)
1466 {
1467         struct e1000_hw *hw = &adapter->hw;
1468         int i;
1469
1470         /* hardware has been reset, we need to reload some things */
1471         igb_configure(adapter);
1472
1473         clear_bit(__IGB_DOWN, &adapter->state);
1474
1475         for (i = 0; i < adapter->num_q_vectors; i++) {
1476                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1477                 napi_enable(&q_vector->napi);
1478         }
1479         if (adapter->msix_entries)
1480                 igb_configure_msix(adapter);
1481         else
1482                 igb_assign_vector(adapter->q_vector[0], 0);
1483
1484         /* Clear any pending interrupts. */
1485         rd32(E1000_ICR);
1486         igb_irq_enable(adapter);
1487
1488         /* notify VFs that reset has been completed */
1489         if (adapter->vfs_allocated_count) {
1490                 u32 reg_data = rd32(E1000_CTRL_EXT);
1491                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1492                 wr32(E1000_CTRL_EXT, reg_data);
1493         }
1494
1495         netif_tx_start_all_queues(adapter->netdev);
1496
1497         /* start the watchdog. */
1498         hw->mac.get_link_status = 1;
1499         schedule_work(&adapter->watchdog_task);
1500
1501         return 0;
1502 }
1503
1504 void igb_down(struct igb_adapter *adapter)
1505 {
1506         struct net_device *netdev = adapter->netdev;
1507         struct e1000_hw *hw = &adapter->hw;
1508         u32 tctl, rctl;
1509         int i;
1510
1511         /* signal that we're down so the interrupt handler does not
1512          * reschedule our watchdog timer */
1513         set_bit(__IGB_DOWN, &adapter->state);
1514
1515         /* disable receives in the hardware */
1516         rctl = rd32(E1000_RCTL);
1517         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1518         /* flush and sleep below */
1519
1520         netif_tx_stop_all_queues(netdev);
1521
1522         /* disable transmits in the hardware */
1523         tctl = rd32(E1000_TCTL);
1524         tctl &= ~E1000_TCTL_EN;
1525         wr32(E1000_TCTL, tctl);
1526         /* flush both disables and wait for them to finish */
1527         wrfl();
1528         msleep(10);
1529
1530         for (i = 0; i < adapter->num_q_vectors; i++) {
1531                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1532                 napi_disable(&q_vector->napi);
1533         }
1534
1535         igb_irq_disable(adapter);
1536
1537         del_timer_sync(&adapter->watchdog_timer);
1538         del_timer_sync(&adapter->phy_info_timer);
1539
1540         netif_carrier_off(netdev);
1541
1542         /* record the stats before reset*/
1543         spin_lock(&adapter->stats64_lock);
1544         igb_update_stats(adapter, &adapter->stats64);
1545         spin_unlock(&adapter->stats64_lock);
1546
1547         adapter->link_speed = 0;
1548         adapter->link_duplex = 0;
1549
1550         if (!pci_channel_offline(adapter->pdev))
1551                 igb_reset(adapter);
1552         igb_clean_all_tx_rings(adapter);
1553         igb_clean_all_rx_rings(adapter);
1554 #ifdef CONFIG_IGB_DCA
1555
1556         /* since we reset the hardware DCA settings were cleared */
1557         igb_setup_dca(adapter);
1558 #endif
1559 }
1560
1561 void igb_reinit_locked(struct igb_adapter *adapter)
1562 {
1563         WARN_ON(in_interrupt());
1564         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1565                 msleep(1);
1566         igb_down(adapter);
1567         igb_up(adapter);
1568         clear_bit(__IGB_RESETTING, &adapter->state);
1569 }
1570
1571 void igb_reset(struct igb_adapter *adapter)
1572 {
1573         struct pci_dev *pdev = adapter->pdev;
1574         struct e1000_hw *hw = &adapter->hw;
1575         struct e1000_mac_info *mac = &hw->mac;
1576         struct e1000_fc_info *fc = &hw->fc;
1577         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1578         u16 hwm;
1579
1580         /* Repartition Pba for greater than 9k mtu
1581          * To take effect CTRL.RST is required.
1582          */
1583         switch (mac->type) {
1584         case e1000_i350:
1585         case e1000_82580:
1586                 pba = rd32(E1000_RXPBS);
1587                 pba = igb_rxpbs_adjust_82580(pba);
1588                 break;
1589         case e1000_82576:
1590                 pba = rd32(E1000_RXPBS);
1591                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1592                 break;
1593         case e1000_82575:
1594         default:
1595                 pba = E1000_PBA_34K;
1596                 break;
1597         }
1598
1599         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1600             (mac->type < e1000_82576)) {
1601                 /* adjust PBA for jumbo frames */
1602                 wr32(E1000_PBA, pba);
1603
1604                 /* To maintain wire speed transmits, the Tx FIFO should be
1605                  * large enough to accommodate two full transmit packets,
1606                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1607                  * the Rx FIFO should be large enough to accommodate at least
1608                  * one full receive packet and is similarly rounded up and
1609                  * expressed in KB. */
1610                 pba = rd32(E1000_PBA);
1611                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1612                 tx_space = pba >> 16;
1613                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1614                 pba &= 0xffff;
1615                 /* the tx fifo also stores 16 bytes of information about the tx
1616                  * but don't include ethernet FCS because hardware appends it */
1617                 min_tx_space = (adapter->max_frame_size +
1618                                 sizeof(union e1000_adv_tx_desc) -
1619                                 ETH_FCS_LEN) * 2;
1620                 min_tx_space = ALIGN(min_tx_space, 1024);
1621                 min_tx_space >>= 10;
1622                 /* software strips receive CRC, so leave room for it */
1623                 min_rx_space = adapter->max_frame_size;
1624                 min_rx_space = ALIGN(min_rx_space, 1024);
1625                 min_rx_space >>= 10;
1626
1627                 /* If current Tx allocation is less than the min Tx FIFO size,
1628                  * and the min Tx FIFO size is less than the current Rx FIFO
1629                  * allocation, take space away from current Rx allocation */
1630                 if (tx_space < min_tx_space &&
1631                     ((min_tx_space - tx_space) < pba)) {
1632                         pba = pba - (min_tx_space - tx_space);
1633
1634                         /* if short on rx space, rx wins and must trump tx
1635                          * adjustment */
1636                         if (pba < min_rx_space)
1637                                 pba = min_rx_space;
1638                 }
1639                 wr32(E1000_PBA, pba);
1640         }
1641
1642         /* flow control settings */
1643         /* The high water mark must be low enough to fit one full frame
1644          * (or the size used for early receive) above it in the Rx FIFO.
1645          * Set it to the lower of:
1646          * - 90% of the Rx FIFO size, or
1647          * - the full Rx FIFO size minus one full frame */
1648         hwm = min(((pba << 10) * 9 / 10),
1649                         ((pba << 10) - 2 * adapter->max_frame_size));
1650
1651         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1652         fc->low_water = fc->high_water - 16;
1653         fc->pause_time = 0xFFFF;
1654         fc->send_xon = 1;
1655         fc->current_mode = fc->requested_mode;
1656
1657         /* disable receive for all VFs and wait one second */
1658         if (adapter->vfs_allocated_count) {
1659                 int i;
1660                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1661                         adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1662
1663                 /* ping all the active vfs to let them know we are going down */
1664                 igb_ping_all_vfs(adapter);
1665
1666                 /* disable transmits and receives */
1667                 wr32(E1000_VFRE, 0);
1668                 wr32(E1000_VFTE, 0);
1669         }
1670
1671         /* Allow time for pending master requests to run */
1672         hw->mac.ops.reset_hw(hw);
1673         wr32(E1000_WUC, 0);
1674
1675         if (hw->mac.ops.init_hw(hw))
1676                 dev_err(&pdev->dev, "Hardware Error\n");
1677
1678         if (hw->mac.type == e1000_82580) {
1679                 u32 reg = rd32(E1000_PCIEMISC);
1680                 wr32(E1000_PCIEMISC,
1681                                 reg & ~E1000_PCIEMISC_LX_DECISION);
1682         }
1683         if (!netif_running(adapter->netdev))
1684                 igb_power_down_link(adapter);
1685
1686         igb_update_mng_vlan(adapter);
1687
1688         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1689         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1690
1691         igb_get_phy_info(hw);
1692 }
1693
1694 static const struct net_device_ops igb_netdev_ops = {
1695         .ndo_open               = igb_open,
1696         .ndo_stop               = igb_close,
1697         .ndo_start_xmit         = igb_xmit_frame_adv,
1698         .ndo_get_stats64        = igb_get_stats64,
1699         .ndo_set_rx_mode        = igb_set_rx_mode,
1700         .ndo_set_multicast_list = igb_set_rx_mode,
1701         .ndo_set_mac_address    = igb_set_mac,
1702         .ndo_change_mtu         = igb_change_mtu,
1703         .ndo_do_ioctl           = igb_ioctl,
1704         .ndo_tx_timeout         = igb_tx_timeout,
1705         .ndo_validate_addr      = eth_validate_addr,
1706         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1707         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1708         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1709         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1710         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1711         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1712         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1713 #ifdef CONFIG_NET_POLL_CONTROLLER
1714         .ndo_poll_controller    = igb_netpoll,
1715 #endif
1716 };
1717
1718 /**
1719  * igb_probe - Device Initialization Routine
1720  * @pdev: PCI device information struct
1721  * @ent: entry in igb_pci_tbl
1722  *
1723  * Returns 0 on success, negative on failure
1724  *
1725  * igb_probe initializes an adapter identified by a pci_dev structure.
1726  * The OS initialization, configuring of the adapter private structure,
1727  * and a hardware reset occur.
1728  **/
1729 static int __devinit igb_probe(struct pci_dev *pdev,
1730                                const struct pci_device_id *ent)
1731 {
1732         struct net_device *netdev;
1733         struct igb_adapter *adapter;
1734         struct e1000_hw *hw;
1735         u16 eeprom_data = 0;
1736         s32 ret_val;
1737         static int global_quad_port_a; /* global quad port a indication */
1738         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1739         unsigned long mmio_start, mmio_len;
1740         int err, pci_using_dac;
1741         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1742         u8 part_str[E1000_PBANUM_LENGTH];
1743
1744         /* Catch broken hardware that put the wrong VF device ID in
1745          * the PCIe SR-IOV capability.
1746          */
1747         if (pdev->is_virtfn) {
1748                 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1749                      pci_name(pdev), pdev->vendor, pdev->device);
1750                 return -EINVAL;
1751         }
1752
1753         err = pci_enable_device_mem(pdev);
1754         if (err)
1755                 return err;
1756
1757         pci_using_dac = 0;
1758         err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1759         if (!err) {
1760                 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1761                 if (!err)
1762                         pci_using_dac = 1;
1763         } else {
1764                 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1765                 if (err) {
1766                         err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1767                         if (err) {
1768                                 dev_err(&pdev->dev, "No usable DMA "
1769                                         "configuration, aborting\n");
1770                                 goto err_dma;
1771                         }
1772                 }
1773         }
1774
1775         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1776                                            IORESOURCE_MEM),
1777                                            igb_driver_name);
1778         if (err)
1779                 goto err_pci_reg;
1780
1781         pci_enable_pcie_error_reporting(pdev);
1782
1783         pci_set_master(pdev);
1784         pci_save_state(pdev);
1785
1786         err = -ENOMEM;
1787         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1788                                    IGB_ABS_MAX_TX_QUEUES);
1789         if (!netdev)
1790                 goto err_alloc_etherdev;
1791
1792         SET_NETDEV_DEV(netdev, &pdev->dev);
1793
1794         pci_set_drvdata(pdev, netdev);
1795         adapter = netdev_priv(netdev);
1796         adapter->netdev = netdev;
1797         adapter->pdev = pdev;
1798         hw = &adapter->hw;
1799         hw->back = adapter;
1800         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1801
1802         mmio_start = pci_resource_start(pdev, 0);
1803         mmio_len = pci_resource_len(pdev, 0);
1804
1805         err = -EIO;
1806         hw->hw_addr = ioremap(mmio_start, mmio_len);
1807         if (!hw->hw_addr)
1808                 goto err_ioremap;
1809
1810         netdev->netdev_ops = &igb_netdev_ops;
1811         igb_set_ethtool_ops(netdev);
1812         netdev->watchdog_timeo = 5 * HZ;
1813
1814         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1815
1816         netdev->mem_start = mmio_start;
1817         netdev->mem_end = mmio_start + mmio_len;
1818
1819         /* PCI config space info */
1820         hw->vendor_id = pdev->vendor;
1821         hw->device_id = pdev->device;
1822         hw->revision_id = pdev->revision;
1823         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1824         hw->subsystem_device_id = pdev->subsystem_device;
1825
1826         /* Copy the default MAC, PHY and NVM function pointers */
1827         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1828         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1829         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1830         /* Initialize skew-specific constants */
1831         err = ei->get_invariants(hw);
1832         if (err)
1833                 goto err_sw_init;
1834
1835         /* setup the private structure */
1836         err = igb_sw_init(adapter);
1837         if (err)
1838                 goto err_sw_init;
1839
1840         igb_get_bus_info_pcie(hw);
1841
1842         hw->phy.autoneg_wait_to_complete = false;
1843
1844         /* Copper options */
1845         if (hw->phy.media_type == e1000_media_type_copper) {
1846                 hw->phy.mdix = AUTO_ALL_MODES;
1847                 hw->phy.disable_polarity_correction = false;
1848                 hw->phy.ms_type = e1000_ms_hw_default;
1849         }
1850
1851         if (igb_check_reset_block(hw))
1852                 dev_info(&pdev->dev,
1853                         "PHY reset is blocked due to SOL/IDER session.\n");
1854
1855         netdev->features = NETIF_F_SG |
1856                            NETIF_F_IP_CSUM |
1857                            NETIF_F_HW_VLAN_TX |
1858                            NETIF_F_HW_VLAN_RX |
1859                            NETIF_F_HW_VLAN_FILTER;
1860
1861         netdev->features |= NETIF_F_IPV6_CSUM;
1862         netdev->features |= NETIF_F_TSO;
1863         netdev->features |= NETIF_F_TSO6;
1864         netdev->features |= NETIF_F_GRO;
1865
1866         netdev->vlan_features |= NETIF_F_TSO;
1867         netdev->vlan_features |= NETIF_F_TSO6;
1868         netdev->vlan_features |= NETIF_F_IP_CSUM;
1869         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1870         netdev->vlan_features |= NETIF_F_SG;
1871
1872         if (pci_using_dac) {
1873                 netdev->features |= NETIF_F_HIGHDMA;
1874                 netdev->vlan_features |= NETIF_F_HIGHDMA;
1875         }
1876
1877         if (hw->mac.type >= e1000_82576)
1878                 netdev->features |= NETIF_F_SCTP_CSUM;
1879
1880         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1881
1882         /* before reading the NVM, reset the controller to put the device in a
1883          * known good starting state */
1884         hw->mac.ops.reset_hw(hw);
1885
1886         /* make sure the NVM is good */
1887         if (igb_validate_nvm_checksum(hw) < 0) {
1888                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1889                 err = -EIO;
1890                 goto err_eeprom;
1891         }
1892
1893         /* copy the MAC address out of the NVM */
1894         if (hw->mac.ops.read_mac_addr(hw))
1895                 dev_err(&pdev->dev, "NVM Read Error\n");
1896
1897         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1898         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1899
1900         if (!is_valid_ether_addr(netdev->perm_addr)) {
1901                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1902                 err = -EIO;
1903                 goto err_eeprom;
1904         }
1905
1906         setup_timer(&adapter->watchdog_timer, igb_watchdog,
1907                     (unsigned long) adapter);
1908         setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
1909                     (unsigned long) adapter);
1910
1911         INIT_WORK(&adapter->reset_task, igb_reset_task);
1912         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1913
1914         /* Initialize link properties that are user-changeable */
1915         adapter->fc_autoneg = true;
1916         hw->mac.autoneg = true;
1917         hw->phy.autoneg_advertised = 0x2f;
1918
1919         hw->fc.requested_mode = e1000_fc_default;
1920         hw->fc.current_mode = e1000_fc_default;
1921
1922         igb_validate_mdi_setting(hw);
1923
1924         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1925          * enable the ACPI Magic Packet filter
1926          */
1927
1928         if (hw->bus.func == 0)
1929                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1930         else if (hw->mac.type == e1000_82580)
1931                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1932                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1933                                  &eeprom_data);
1934         else if (hw->bus.func == 1)
1935                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1936
1937         if (eeprom_data & eeprom_apme_mask)
1938                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1939
1940         /* now that we have the eeprom settings, apply the special cases where
1941          * the eeprom may be wrong or the board simply won't support wake on
1942          * lan on a particular port */
1943         switch (pdev->device) {
1944         case E1000_DEV_ID_82575GB_QUAD_COPPER:
1945                 adapter->eeprom_wol = 0;
1946                 break;
1947         case E1000_DEV_ID_82575EB_FIBER_SERDES:
1948         case E1000_DEV_ID_82576_FIBER:
1949         case E1000_DEV_ID_82576_SERDES:
1950                 /* Wake events only supported on port A for dual fiber
1951                  * regardless of eeprom setting */
1952                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1953                         adapter->eeprom_wol = 0;
1954                 break;
1955         case E1000_DEV_ID_82576_QUAD_COPPER:
1956         case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
1957                 /* if quad port adapter, disable WoL on all but port A */
1958                 if (global_quad_port_a != 0)
1959                         adapter->eeprom_wol = 0;
1960                 else
1961                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1962                 /* Reset for multiple quad port adapters */
1963                 if (++global_quad_port_a == 4)
1964                         global_quad_port_a = 0;
1965                 break;
1966         }
1967
1968         /* initialize the wol settings based on the eeprom settings */
1969         adapter->wol = adapter->eeprom_wol;
1970         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1971
1972         /* reset the hardware with the new settings */
1973         igb_reset(adapter);
1974
1975         /* let the f/w know that the h/w is now under the control of the
1976          * driver. */
1977         igb_get_hw_control(adapter);
1978
1979         strcpy(netdev->name, "eth%d");
1980         err = register_netdev(netdev);
1981         if (err)
1982                 goto err_register;
1983
1984         /* carrier off reporting is important to ethtool even BEFORE open */
1985         netif_carrier_off(netdev);
1986
1987 #ifdef CONFIG_IGB_DCA
1988         if (dca_add_requester(&pdev->dev) == 0) {
1989                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1990                 dev_info(&pdev->dev, "DCA enabled\n");
1991                 igb_setup_dca(adapter);
1992         }
1993
1994 #endif
1995         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1996         /* print bus type/speed/width info */
1997         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1998                  netdev->name,
1999                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2000                   (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2001                                                             "unknown"),
2002                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2003                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2004                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2005                    "unknown"),
2006                  netdev->dev_addr);
2007
2008         ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2009         if (ret_val)
2010                 strcpy(part_str, "Unknown");
2011         dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2012         dev_info(&pdev->dev,
2013                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2014                 adapter->msix_entries ? "MSI-X" :
2015                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2016                 adapter->num_rx_queues, adapter->num_tx_queues);
2017
2018         return 0;
2019
2020 err_register:
2021         igb_release_hw_control(adapter);
2022 err_eeprom:
2023         if (!igb_check_reset_block(hw))
2024                 igb_reset_phy(hw);
2025
2026         if (hw->flash_address)
2027                 iounmap(hw->flash_address);
2028 err_sw_init:
2029         igb_clear_interrupt_scheme(adapter);
2030         iounmap(hw->hw_addr);
2031 err_ioremap:
2032         free_netdev(netdev);
2033 err_alloc_etherdev:
2034         pci_release_selected_regions(pdev,
2035                                      pci_select_bars(pdev, IORESOURCE_MEM));
2036 err_pci_reg:
2037 err_dma:
2038         pci_disable_device(pdev);
2039         return err;
2040 }
2041
2042 /**
2043  * igb_remove - Device Removal Routine
2044  * @pdev: PCI device information struct
2045  *
2046  * igb_remove is called by the PCI subsystem to alert the driver
2047  * that it should release a PCI device.  The could be caused by a
2048  * Hot-Plug event, or because the driver is going to be removed from
2049  * memory.
2050  **/
2051 static void __devexit igb_remove(struct pci_dev *pdev)
2052 {
2053         struct net_device *netdev = pci_get_drvdata(pdev);
2054         struct igb_adapter *adapter = netdev_priv(netdev);
2055         struct e1000_hw *hw = &adapter->hw;
2056
2057         /*
2058          * The watchdog timer may be rescheduled, so explicitly
2059          * disable watchdog from being rescheduled.
2060          */
2061         set_bit(__IGB_DOWN, &adapter->state);
2062         del_timer_sync(&adapter->watchdog_timer);
2063         del_timer_sync(&adapter->phy_info_timer);
2064
2065         cancel_work_sync(&adapter->reset_task);
2066         cancel_work_sync(&adapter->watchdog_task);
2067
2068 #ifdef CONFIG_IGB_DCA
2069         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2070                 dev_info(&pdev->dev, "DCA disabled\n");
2071                 dca_remove_requester(&pdev->dev);
2072                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2073                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2074         }
2075 #endif
2076
2077         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
2078          * would have already happened in close and is redundant. */
2079         igb_release_hw_control(adapter);
2080
2081         unregister_netdev(netdev);
2082
2083         igb_clear_interrupt_scheme(adapter);
2084
2085 #ifdef CONFIG_PCI_IOV
2086         /* reclaim resources allocated to VFs */
2087         if (adapter->vf_data) {
2088                 /* disable iov and allow time for transactions to clear */
2089                 pci_disable_sriov(pdev);
2090                 msleep(500);
2091
2092                 kfree(adapter->vf_data);
2093                 adapter->vf_data = NULL;
2094                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2095                 msleep(100);
2096                 dev_info(&pdev->dev, "IOV Disabled\n");
2097         }
2098 #endif
2099
2100         iounmap(hw->hw_addr);
2101         if (hw->flash_address)
2102                 iounmap(hw->flash_address);
2103         pci_release_selected_regions(pdev,
2104                                      pci_select_bars(pdev, IORESOURCE_MEM));
2105
2106         free_netdev(netdev);
2107
2108         pci_disable_pcie_error_reporting(pdev);
2109
2110         pci_disable_device(pdev);
2111 }
2112
2113 /**
2114  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2115  * @adapter: board private structure to initialize
2116  *
2117  * This function initializes the vf specific data storage and then attempts to
2118  * allocate the VFs.  The reason for ordering it this way is because it is much
2119  * mor expensive time wise to disable SR-IOV than it is to allocate and free
2120  * the memory for the VFs.
2121  **/
2122 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2123 {
2124 #ifdef CONFIG_PCI_IOV
2125         struct pci_dev *pdev = adapter->pdev;
2126
2127         if (adapter->vfs_allocated_count) {
2128                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2129                                            sizeof(struct vf_data_storage),
2130                                            GFP_KERNEL);
2131                 /* if allocation failed then we do not support SR-IOV */
2132                 if (!adapter->vf_data) {
2133                         adapter->vfs_allocated_count = 0;
2134                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
2135                                 "Data Storage\n");
2136                 }
2137         }
2138
2139         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2140                 kfree(adapter->vf_data);
2141                 adapter->vf_data = NULL;
2142 #endif /* CONFIG_PCI_IOV */
2143                 adapter->vfs_allocated_count = 0;
2144 #ifdef CONFIG_PCI_IOV
2145         } else {
2146                 unsigned char mac_addr[ETH_ALEN];
2147                 int i;
2148                 dev_info(&pdev->dev, "%d vfs allocated\n",
2149                          adapter->vfs_allocated_count);
2150                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2151                         random_ether_addr(mac_addr);
2152                         igb_set_vf_mac(adapter, i, mac_addr);
2153                 }
2154         }
2155 #endif /* CONFIG_PCI_IOV */
2156 }
2157
2158
2159 /**
2160  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2161  * @adapter: board private structure to initialize
2162  *
2163  * igb_init_hw_timer initializes the function pointer and values for the hw
2164  * timer found in hardware.
2165  **/
2166 static void igb_init_hw_timer(struct igb_adapter *adapter)
2167 {
2168         struct e1000_hw *hw = &adapter->hw;
2169
2170         switch (hw->mac.type) {
2171         case e1000_i350:
2172         case e1000_82580:
2173                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2174                 adapter->cycles.read = igb_read_clock;
2175                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2176                 adapter->cycles.mult = 1;
2177                 /*
2178                  * The 82580 timesync updates the system timer every 8ns by 8ns
2179                  * and the value cannot be shifted.  Instead we need to shift
2180                  * the registers to generate a 64bit timer value.  As a result
2181                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2182                  * 24 in order to generate a larger value for synchronization.
2183                  */
2184                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2185                 /* disable system timer temporarily by setting bit 31 */
2186                 wr32(E1000_TSAUXC, 0x80000000);
2187                 wrfl();
2188
2189                 /* Set registers so that rollover occurs soon to test this. */
2190                 wr32(E1000_SYSTIMR, 0x00000000);
2191                 wr32(E1000_SYSTIML, 0x80000000);
2192                 wr32(E1000_SYSTIMH, 0x000000FF);
2193                 wrfl();
2194
2195                 /* enable system timer by clearing bit 31 */
2196                 wr32(E1000_TSAUXC, 0x0);
2197                 wrfl();
2198
2199                 timecounter_init(&adapter->clock,
2200                                  &adapter->cycles,
2201                                  ktime_to_ns(ktime_get_real()));
2202                 /*
2203                  * Synchronize our NIC clock against system wall clock. NIC
2204                  * time stamp reading requires ~3us per sample, each sample
2205                  * was pretty stable even under load => only require 10
2206                  * samples for each offset comparison.
2207                  */
2208                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2209                 adapter->compare.source = &adapter->clock;
2210                 adapter->compare.target = ktime_get_real;
2211                 adapter->compare.num_samples = 10;
2212                 timecompare_update(&adapter->compare, 0);
2213                 break;
2214         case e1000_82576:
2215                 /*
2216                  * Initialize hardware timer: we keep it running just in case
2217                  * that some program needs it later on.
2218                  */
2219                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2220                 adapter->cycles.read = igb_read_clock;
2221                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2222                 adapter->cycles.mult = 1;
2223                 /**
2224                  * Scale the NIC clock cycle by a large factor so that
2225                  * relatively small clock corrections can be added or
2226                  * substracted at each clock tick. The drawbacks of a large
2227                  * factor are a) that the clock register overflows more quickly
2228                  * (not such a big deal) and b) that the increment per tick has
2229                  * to fit into 24 bits.  As a result we need to use a shift of
2230                  * 19 so we can fit a value of 16 into the TIMINCA register.
2231                  */
2232                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2233                 wr32(E1000_TIMINCA,
2234                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
2235                                 (16 << IGB_82576_TSYNC_SHIFT));
2236
2237                 /* Set registers so that rollover occurs soon to test this. */
2238                 wr32(E1000_SYSTIML, 0x00000000);
2239                 wr32(E1000_SYSTIMH, 0xFF800000);
2240                 wrfl();
2241
2242                 timecounter_init(&adapter->clock,
2243                                  &adapter->cycles,
2244                                  ktime_to_ns(ktime_get_real()));
2245                 /*
2246                  * Synchronize our NIC clock against system wall clock. NIC
2247                  * time stamp reading requires ~3us per sample, each sample
2248                  * was pretty stable even under load => only require 10
2249                  * samples for each offset comparison.
2250                  */
2251                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2252                 adapter->compare.source = &adapter->clock;
2253                 adapter->compare.target = ktime_get_real;
2254                 adapter->compare.num_samples = 10;
2255                 timecompare_update(&adapter->compare, 0);
2256                 break;
2257         case e1000_82575:
2258                 /* 82575 does not support timesync */
2259         default:
2260                 break;
2261         }
2262
2263 }
2264
2265 /**
2266  * igb_sw_init - Initialize general software structures (struct igb_adapter)
2267  * @adapter: board private structure to initialize
2268  *
2269  * igb_sw_init initializes the Adapter private data structure.
2270  * Fields are initialized based on PCI device information and
2271  * OS network device settings (MTU size).
2272  **/
2273 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2274 {
2275         struct e1000_hw *hw = &adapter->hw;
2276         struct net_device *netdev = adapter->netdev;
2277         struct pci_dev *pdev = adapter->pdev;
2278
2279         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2280
2281         adapter->tx_ring_count = IGB_DEFAULT_TXD;
2282         adapter->rx_ring_count = IGB_DEFAULT_RXD;
2283         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2284         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2285
2286         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
2287         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2288
2289         spin_lock_init(&adapter->stats64_lock);
2290 #ifdef CONFIG_PCI_IOV
2291         switch (hw->mac.type) {
2292         case e1000_82576:
2293         case e1000_i350:
2294                 adapter->vfs_allocated_count = (max_vfs > 7) ? 7 : max_vfs;
2295                 break;
2296         default:
2297                 break;
2298         }
2299 #endif /* CONFIG_PCI_IOV */
2300         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2301
2302         /*
2303          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2304          * then we should combine the queues into a queue pair in order to
2305          * conserve interrupts due to limited supply
2306          */
2307         if ((adapter->rss_queues > 4) ||
2308             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2309                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2310
2311         /* This call may decrease the number of queues */
2312         if (igb_init_interrupt_scheme(adapter)) {
2313                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2314                 return -ENOMEM;
2315         }
2316
2317         igb_init_hw_timer(adapter);
2318         igb_probe_vfs(adapter);
2319
2320         /* Explicitly disable IRQ since the NIC can be in any state. */
2321         igb_irq_disable(adapter);
2322
2323         set_bit(__IGB_DOWN, &adapter->state);
2324         return 0;
2325 }
2326
2327 /**
2328  * igb_open - Called when a network interface is made active
2329  * @netdev: network interface device structure
2330  *
2331  * Returns 0 on success, negative value on failure
2332  *
2333  * The open entry point is called when a network interface is made
2334  * active by the system (IFF_UP).  At this point all resources needed
2335  * for transmit and receive operations are allocated, the interrupt
2336  * handler is registered with the OS, the watchdog timer is started,
2337  * and the stack is notified that the interface is ready.
2338  **/
2339 static int igb_open(struct net_device *netdev)
2340 {
2341         struct igb_adapter *adapter = netdev_priv(netdev);
2342         struct e1000_hw *hw = &adapter->hw;
2343         int err;
2344         int i;
2345
2346         /* disallow open during test */
2347         if (test_bit(__IGB_TESTING, &adapter->state))
2348                 return -EBUSY;
2349
2350         netif_carrier_off(netdev);
2351
2352         /* allocate transmit descriptors */
2353         err = igb_setup_all_tx_resources(adapter);
2354         if (err)
2355                 goto err_setup_tx;
2356
2357         /* allocate receive descriptors */
2358         err = igb_setup_all_rx_resources(adapter);
2359         if (err)
2360                 goto err_setup_rx;
2361
2362         igb_power_up_link(adapter);
2363
2364         /* before we allocate an interrupt, we must be ready to handle it.
2365          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2366          * as soon as we call pci_request_irq, so we have to setup our
2367          * clean_rx handler before we do so.  */
2368         igb_configure(adapter);
2369
2370         err = igb_request_irq(adapter);
2371         if (err)
2372                 goto err_req_irq;
2373
2374         /* From here on the code is the same as igb_up() */
2375         clear_bit(__IGB_DOWN, &adapter->state);
2376
2377         for (i = 0; i < adapter->num_q_vectors; i++) {
2378                 struct igb_q_vector *q_vector = adapter->q_vector[i];
2379                 napi_enable(&q_vector->napi);
2380         }
2381
2382         /* Clear any pending interrupts. */
2383         rd32(E1000_ICR);
2384
2385         igb_irq_enable(adapter);
2386
2387         /* notify VFs that reset has been completed */
2388         if (adapter->vfs_allocated_count) {
2389                 u32 reg_data = rd32(E1000_CTRL_EXT);
2390                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2391                 wr32(E1000_CTRL_EXT, reg_data);
2392         }
2393
2394         netif_tx_start_all_queues(netdev);
2395
2396         /* start the watchdog. */
2397         hw->mac.get_link_status = 1;
2398         schedule_work(&adapter->watchdog_task);
2399
2400         return 0;
2401
2402 err_req_irq:
2403         igb_release_hw_control(adapter);
2404         igb_power_down_link(adapter);
2405         igb_free_all_rx_resources(adapter);
2406 err_setup_rx:
2407         igb_free_all_tx_resources(adapter);
2408 err_setup_tx:
2409         igb_reset(adapter);
2410
2411         return err;
2412 }
2413
2414 /**
2415  * igb_close - Disables a network interface
2416  * @netdev: network interface device structure
2417  *
2418  * Returns 0, this is not allowed to fail
2419  *
2420  * The close entry point is called when an interface is de-activated
2421  * by the OS.  The hardware is still under the driver's control, but
2422  * needs to be disabled.  A global MAC reset is issued to stop the
2423  * hardware, and all transmit and receive resources are freed.
2424  **/
2425 static int igb_close(struct net_device *netdev)
2426 {
2427         struct igb_adapter *adapter = netdev_priv(netdev);
2428
2429         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2430         igb_down(adapter);
2431
2432         igb_free_irq(adapter);
2433
2434         igb_free_all_tx_resources(adapter);
2435         igb_free_all_rx_resources(adapter);
2436
2437         return 0;
2438 }
2439
2440 /**
2441  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2442  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2443  *
2444  * Return 0 on success, negative on failure
2445  **/
2446 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2447 {
2448         struct device *dev = tx_ring->dev;
2449         int size;
2450
2451         size = sizeof(struct igb_buffer) * tx_ring->count;
2452         tx_ring->buffer_info = vzalloc(size);
2453         if (!tx_ring->buffer_info)
2454                 goto err;
2455
2456         /* round up to nearest 4K */
2457         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2458         tx_ring->size = ALIGN(tx_ring->size, 4096);
2459
2460         tx_ring->desc = dma_alloc_coherent(dev,
2461                                            tx_ring->size,
2462                                            &tx_ring->dma,
2463                                            GFP_KERNEL);
2464
2465         if (!tx_ring->desc)
2466                 goto err;
2467
2468         tx_ring->next_to_use = 0;
2469         tx_ring->next_to_clean = 0;
2470         return 0;
2471
2472 err:
2473         vfree(tx_ring->buffer_info);
2474         dev_err(dev,
2475                 "Unable to allocate memory for the transmit descriptor ring\n");
2476         return -ENOMEM;
2477 }
2478
2479 /**
2480  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2481  *                                (Descriptors) for all queues
2482  * @adapter: board private structure
2483  *
2484  * Return 0 on success, negative on failure
2485  **/
2486 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2487 {
2488         struct pci_dev *pdev = adapter->pdev;
2489         int i, err = 0;
2490
2491         for (i = 0; i < adapter->num_tx_queues; i++) {
2492                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2493                 if (err) {
2494                         dev_err(&pdev->dev,
2495                                 "Allocation for Tx Queue %u failed\n", i);
2496                         for (i--; i >= 0; i--)
2497                                 igb_free_tx_resources(adapter->tx_ring[i]);
2498                         break;
2499                 }
2500         }
2501
2502         for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2503                 int r_idx = i % adapter->num_tx_queues;
2504                 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2505         }
2506         return err;
2507 }
2508
2509 /**
2510  * igb_setup_tctl - configure the transmit control registers
2511  * @adapter: Board private structure
2512  **/
2513 void igb_setup_tctl(struct igb_adapter *adapter)
2514 {
2515         struct e1000_hw *hw = &adapter->hw;
2516         u32 tctl;
2517
2518         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2519         wr32(E1000_TXDCTL(0), 0);
2520
2521         /* Program the Transmit Control Register */
2522         tctl = rd32(E1000_TCTL);
2523         tctl &= ~E1000_TCTL_CT;
2524         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2525                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2526
2527         igb_config_collision_dist(hw);
2528
2529         /* Enable transmits */
2530         tctl |= E1000_TCTL_EN;
2531
2532         wr32(E1000_TCTL, tctl);
2533 }
2534
2535 /**
2536  * igb_configure_tx_ring - Configure transmit ring after Reset
2537  * @adapter: board private structure
2538  * @ring: tx ring to configure
2539  *
2540  * Configure a transmit ring after a reset.
2541  **/
2542 void igb_configure_tx_ring(struct igb_adapter *adapter,
2543                            struct igb_ring *ring)
2544 {
2545         struct e1000_hw *hw = &adapter->hw;
2546         u32 txdctl;
2547         u64 tdba = ring->dma;
2548         int reg_idx = ring->reg_idx;
2549
2550         /* disable the queue */
2551         txdctl = rd32(E1000_TXDCTL(reg_idx));
2552         wr32(E1000_TXDCTL(reg_idx),
2553                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2554         wrfl();
2555         mdelay(10);
2556
2557         wr32(E1000_TDLEN(reg_idx),
2558                         ring->count * sizeof(union e1000_adv_tx_desc));
2559         wr32(E1000_TDBAL(reg_idx),
2560                         tdba & 0x00000000ffffffffULL);
2561         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2562
2563         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2564         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2565         writel(0, ring->head);
2566         writel(0, ring->tail);
2567
2568         txdctl |= IGB_TX_PTHRESH;
2569         txdctl |= IGB_TX_HTHRESH << 8;
2570         txdctl |= IGB_TX_WTHRESH << 16;
2571
2572         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2573         wr32(E1000_TXDCTL(reg_idx), txdctl);
2574 }
2575
2576 /**
2577  * igb_configure_tx - Configure transmit Unit after Reset
2578  * @adapter: board private structure
2579  *
2580  * Configure the Tx unit of the MAC after a reset.
2581  **/
2582 static void igb_configure_tx(struct igb_adapter *adapter)
2583 {
2584         int i;
2585
2586         for (i = 0; i < adapter->num_tx_queues; i++)
2587                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2588 }
2589
2590 /**
2591  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2592  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2593  *
2594  * Returns 0 on success, negative on failure
2595  **/
2596 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2597 {
2598         struct device *dev = rx_ring->dev;
2599         int size, desc_len;
2600
2601         size = sizeof(struct igb_buffer) * rx_ring->count;
2602         rx_ring->buffer_info = vzalloc(size);
2603         if (!rx_ring->buffer_info)
2604                 goto err;
2605
2606         desc_len = sizeof(union e1000_adv_rx_desc);
2607
2608         /* Round up to nearest 4K */
2609         rx_ring->size = rx_ring->count * desc_len;
2610         rx_ring->size = ALIGN(rx_ring->size, 4096);
2611
2612         rx_ring->desc = dma_alloc_coherent(dev,
2613                                            rx_ring->size,
2614                                            &rx_ring->dma,
2615                                            GFP_KERNEL);
2616
2617         if (!rx_ring->desc)
2618                 goto err;
2619
2620         rx_ring->next_to_clean = 0;
2621         rx_ring->next_to_use = 0;
2622
2623         return 0;
2624
2625 err:
2626         vfree(rx_ring->buffer_info);
2627         rx_ring->buffer_info = NULL;
2628         dev_err(dev, "Unable to allocate memory for the receive descriptor"
2629                 " ring\n");
2630         return -ENOMEM;
2631 }
2632
2633 /**
2634  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2635  *                                (Descriptors) for all queues
2636  * @adapter: board private structure
2637  *
2638  * Return 0 on success, negative on failure
2639  **/
2640 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2641 {
2642         struct pci_dev *pdev = adapter->pdev;
2643         int i, err = 0;
2644
2645         for (i = 0; i < adapter->num_rx_queues; i++) {
2646                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2647                 if (err) {
2648                         dev_err(&pdev->dev,
2649                                 "Allocation for Rx Queue %u failed\n", i);
2650                         for (i--; i >= 0; i--)
2651                                 igb_free_rx_resources(adapter->rx_ring[i]);
2652                         break;
2653                 }
2654         }
2655
2656         return err;
2657 }
2658
2659 /**
2660  * igb_setup_mrqc - configure the multiple receive queue control registers
2661  * @adapter: Board private structure
2662  **/
2663 static void igb_setup_mrqc(struct igb_adapter *adapter)
2664 {
2665         struct e1000_hw *hw = &adapter->hw;
2666         u32 mrqc, rxcsum;
2667         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2668         union e1000_reta {
2669                 u32 dword;
2670                 u8  bytes[4];
2671         } reta;
2672         static const u8 rsshash[40] = {
2673                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2674                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2675                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2676                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2677
2678         /* Fill out hash function seeds */
2679         for (j = 0; j < 10; j++) {
2680                 u32 rsskey = rsshash[(j * 4)];
2681                 rsskey |= rsshash[(j * 4) + 1] << 8;
2682                 rsskey |= rsshash[(j * 4) + 2] << 16;
2683                 rsskey |= rsshash[(j * 4) + 3] << 24;
2684                 array_wr32(E1000_RSSRK(0), j, rsskey);
2685         }
2686
2687         num_rx_queues = adapter->rss_queues;
2688
2689         if (adapter->vfs_allocated_count) {
2690                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2691                 switch (hw->mac.type) {
2692                 case e1000_i350:
2693                 case e1000_82580:
2694                         num_rx_queues = 1;
2695                         shift = 0;
2696                         break;
2697                 case e1000_82576:
2698                         shift = 3;
2699                         num_rx_queues = 2;
2700                         break;
2701                 case e1000_82575:
2702                         shift = 2;
2703                         shift2 = 6;
2704                 default:
2705                         break;
2706                 }
2707         } else {
2708                 if (hw->mac.type == e1000_82575)
2709                         shift = 6;
2710         }
2711
2712         for (j = 0; j < (32 * 4); j++) {
2713                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2714                 if (shift2)
2715                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2716                 if ((j & 3) == 3)
2717                         wr32(E1000_RETA(j >> 2), reta.dword);
2718         }
2719
2720         /*
2721          * Disable raw packet checksumming so that RSS hash is placed in
2722          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2723          * offloads as they are enabled by default
2724          */
2725         rxcsum = rd32(E1000_RXCSUM);
2726         rxcsum |= E1000_RXCSUM_PCSD;
2727
2728         if (adapter->hw.mac.type >= e1000_82576)
2729                 /* Enable Receive Checksum Offload for SCTP */
2730                 rxcsum |= E1000_RXCSUM_CRCOFL;
2731
2732         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2733         wr32(E1000_RXCSUM, rxcsum);
2734
2735         /* If VMDq is enabled then we set the appropriate mode for that, else
2736          * we default to RSS so that an RSS hash is calculated per packet even
2737          * if we are only using one queue */
2738         if (adapter->vfs_allocated_count) {
2739                 if (hw->mac.type > e1000_82575) {
2740                         /* Set the default pool for the PF's first queue */
2741                         u32 vtctl = rd32(E1000_VT_CTL);
2742                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2743                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2744                         vtctl |= adapter->vfs_allocated_count <<
2745                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2746                         wr32(E1000_VT_CTL, vtctl);
2747                 }
2748                 if (adapter->rss_queues > 1)
2749                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2750                 else
2751                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2752         } else {
2753                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2754         }
2755         igb_vmm_control(adapter);
2756
2757         /*
2758          * Generate RSS hash based on TCP port numbers and/or
2759          * IPv4/v6 src and dst addresses since UDP cannot be
2760          * hashed reliably due to IP fragmentation
2761          */
2762         mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2763                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2764                 E1000_MRQC_RSS_FIELD_IPV6 |
2765                 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2766                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2767
2768         wr32(E1000_MRQC, mrqc);
2769 }
2770
2771 /**
2772  * igb_setup_rctl - configure the receive control registers
2773  * @adapter: Board private structure
2774  **/
2775 void igb_setup_rctl(struct igb_adapter *adapter)
2776 {
2777         struct e1000_hw *hw = &adapter->hw;
2778         u32 rctl;
2779
2780         rctl = rd32(E1000_RCTL);
2781
2782         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2783         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2784
2785         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2786                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2787
2788         /*
2789          * enable stripping of CRC. It's unlikely this will break BMC
2790          * redirection as it did with e1000. Newer features require
2791          * that the HW strips the CRC.
2792          */
2793         rctl |= E1000_RCTL_SECRC;
2794
2795         /* disable store bad packets and clear size bits. */
2796         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2797
2798         /* enable LPE to prevent packets larger than max_frame_size */
2799         rctl |= E1000_RCTL_LPE;
2800
2801         /* disable queue 0 to prevent tail write w/o re-config */
2802         wr32(E1000_RXDCTL(0), 0);
2803
2804         /* Attention!!!  For SR-IOV PF driver operations you must enable
2805          * queue drop for all VF and PF queues to prevent head of line blocking
2806          * if an un-trusted VF does not provide descriptors to hardware.
2807          */
2808         if (adapter->vfs_allocated_count) {
2809                 /* set all queue drop enable bits */
2810                 wr32(E1000_QDE, ALL_QUEUES);
2811         }
2812
2813         wr32(E1000_RCTL, rctl);
2814 }
2815
2816 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2817                                    int vfn)
2818 {
2819         struct e1000_hw *hw = &adapter->hw;
2820         u32 vmolr;
2821
2822         /* if it isn't the PF check to see if VFs are enabled and
2823          * increase the size to support vlan tags */
2824         if (vfn < adapter->vfs_allocated_count &&
2825             adapter->vf_data[vfn].vlans_enabled)
2826                 size += VLAN_TAG_SIZE;
2827
2828         vmolr = rd32(E1000_VMOLR(vfn));
2829         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2830         vmolr |= size | E1000_VMOLR_LPE;
2831         wr32(E1000_VMOLR(vfn), vmolr);
2832
2833         return 0;
2834 }
2835
2836 /**
2837  * igb_rlpml_set - set maximum receive packet size
2838  * @adapter: board private structure
2839  *
2840  * Configure maximum receivable packet size.
2841  **/
2842 static void igb_rlpml_set(struct igb_adapter *adapter)
2843 {
2844         u32 max_frame_size = adapter->max_frame_size;
2845         struct e1000_hw *hw = &adapter->hw;
2846         u16 pf_id = adapter->vfs_allocated_count;
2847
2848         if (adapter->vlgrp)
2849                 max_frame_size += VLAN_TAG_SIZE;
2850
2851         /* if vfs are enabled we set RLPML to the largest possible request
2852          * size and set the VMOLR RLPML to the size we need */
2853         if (pf_id) {
2854                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2855                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2856         }
2857
2858         wr32(E1000_RLPML, max_frame_size);
2859 }
2860
2861 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2862                                  int vfn, bool aupe)
2863 {
2864         struct e1000_hw *hw = &adapter->hw;
2865         u32 vmolr;
2866
2867         /*
2868          * This register exists only on 82576 and newer so if we are older then
2869          * we should exit and do nothing
2870          */
2871         if (hw->mac.type < e1000_82576)
2872                 return;
2873
2874         vmolr = rd32(E1000_VMOLR(vfn));
2875         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2876         if (aupe)
2877                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
2878         else
2879                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2880
2881         /* clear all bits that might not be set */
2882         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2883
2884         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2885                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2886         /*
2887          * for VMDq only allow the VFs and pool 0 to accept broadcast and
2888          * multicast packets
2889          */
2890         if (vfn <= adapter->vfs_allocated_count)
2891                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
2892
2893         wr32(E1000_VMOLR(vfn), vmolr);
2894 }
2895
2896 /**
2897  * igb_configure_rx_ring - Configure a receive ring after Reset
2898  * @adapter: board private structure
2899  * @ring: receive ring to be configured
2900  *
2901  * Configure the Rx unit of the MAC after a reset.
2902  **/
2903 void igb_configure_rx_ring(struct igb_adapter *adapter,
2904                            struct igb_ring *ring)
2905 {
2906         struct e1000_hw *hw = &adapter->hw;
2907         u64 rdba = ring->dma;
2908         int reg_idx = ring->reg_idx;
2909         u32 srrctl, rxdctl;
2910
2911         /* disable the queue */
2912         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2913         wr32(E1000_RXDCTL(reg_idx),
2914                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2915
2916         /* Set DMA base address registers */
2917         wr32(E1000_RDBAL(reg_idx),
2918              rdba & 0x00000000ffffffffULL);
2919         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2920         wr32(E1000_RDLEN(reg_idx),
2921                        ring->count * sizeof(union e1000_adv_rx_desc));
2922
2923         /* initialize head and tail */
2924         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2925         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2926         writel(0, ring->head);
2927         writel(0, ring->tail);
2928
2929         /* set descriptor configuration */
2930         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2931                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2932                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2933 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2934                 srrctl |= IGB_RXBUFFER_16384 >>
2935                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2936 #else
2937                 srrctl |= (PAGE_SIZE / 2) >>
2938                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2939 #endif
2940                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2941         } else {
2942                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2943                          E1000_SRRCTL_BSIZEPKT_SHIFT;
2944                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2945         }
2946         if (hw->mac.type == e1000_82580)
2947                 srrctl |= E1000_SRRCTL_TIMESTAMP;
2948         /* Only set Drop Enable if we are supporting multiple queues */
2949         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
2950                 srrctl |= E1000_SRRCTL_DROP_EN;
2951
2952         wr32(E1000_SRRCTL(reg_idx), srrctl);
2953
2954         /* set filtering for VMDQ pools */
2955         igb_set_vmolr(adapter, reg_idx & 0x7, true);
2956
2957         /* enable receive descriptor fetching */
2958         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2959         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2960         rxdctl &= 0xFFF00000;
2961         rxdctl |= IGB_RX_PTHRESH;
2962         rxdctl |= IGB_RX_HTHRESH << 8;
2963         rxdctl |= IGB_RX_WTHRESH << 16;
2964         wr32(E1000_RXDCTL(reg_idx), rxdctl);
2965 }
2966
2967 /**
2968  * igb_configure_rx - Configure receive Unit after Reset
2969  * @adapter: board private structure
2970  *
2971  * Configure the Rx unit of the MAC after a reset.
2972  **/
2973 static void igb_configure_rx(struct igb_adapter *adapter)
2974 {
2975         int i;
2976
2977         /* set UTA to appropriate mode */
2978         igb_set_uta(adapter);
2979
2980         /* set the correct pool for the PF default MAC address in entry 0 */
2981         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2982                          adapter->vfs_allocated_count);
2983
2984         /* Setup the HW Rx Head and Tail Descriptor Pointers and
2985          * the Base and Length of the Rx Descriptor Ring */
2986         for (i = 0; i < adapter->num_rx_queues; i++)
2987                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
2988 }
2989
2990 /**
2991  * igb_free_tx_resources - Free Tx Resources per Queue
2992  * @tx_ring: Tx descriptor ring for a specific queue
2993  *
2994  * Free all transmit software resources
2995  **/
2996 void igb_free_tx_resources(struct igb_ring *tx_ring)
2997 {
2998         igb_clean_tx_ring(tx_ring);
2999
3000         vfree(tx_ring->buffer_info);
3001         tx_ring->buffer_info = NULL;
3002
3003         /* if not set, then don't free */
3004         if (!tx_ring->desc)
3005                 return;
3006
3007         dma_free_coherent(tx_ring->dev, tx_ring->size,
3008                           tx_ring->desc, tx_ring->dma);
3009
3010         tx_ring->desc = NULL;
3011 }
3012
3013 /**
3014  * igb_free_all_tx_resources - Free Tx Resources for All Queues
3015  * @adapter: board private structure
3016  *
3017  * Free all transmit software resources
3018  **/
3019 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3020 {
3021         int i;
3022
3023         for (i = 0; i < adapter->num_tx_queues; i++)
3024                 igb_free_tx_resources(adapter->tx_ring[i]);
3025 }
3026
3027 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
3028                                     struct igb_buffer *buffer_info)
3029 {
3030         if (buffer_info->dma) {
3031                 if (buffer_info->mapped_as_page)
3032                         dma_unmap_page(tx_ring->dev,
3033                                         buffer_info->dma,
3034                                         buffer_info->length,
3035                                         DMA_TO_DEVICE);
3036                 else
3037                         dma_unmap_single(tx_ring->dev,
3038                                         buffer_info->dma,
3039                                         buffer_info->length,
3040                                         DMA_TO_DEVICE);
3041                 buffer_info->dma = 0;
3042         }
3043         if (buffer_info->skb) {
3044                 dev_kfree_skb_any(buffer_info->skb);
3045                 buffer_info->skb = NULL;
3046         }
3047         buffer_info->time_stamp = 0;
3048         buffer_info->length = 0;
3049         buffer_info->next_to_watch = 0;
3050         buffer_info->mapped_as_page = false;
3051 }
3052
3053 /**
3054  * igb_clean_tx_ring - Free Tx Buffers
3055  * @tx_ring: ring to be cleaned
3056  **/
3057 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3058 {
3059         struct igb_buffer *buffer_info;
3060         unsigned long size;
3061         unsigned int i;
3062
3063         if (!tx_ring->buffer_info)
3064                 return;
3065         /* Free all the Tx ring sk_buffs */
3066
3067         for (i = 0; i < tx_ring->count; i++) {
3068                 buffer_info = &tx_ring->buffer_info[i];
3069                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3070         }
3071
3072         size = sizeof(struct igb_buffer) * tx_ring->count;
3073         memset(tx_ring->buffer_info, 0, size);
3074
3075         /* Zero out the descriptor ring */
3076         memset(tx_ring->desc, 0, tx_ring->size);
3077
3078         tx_ring->next_to_use = 0;
3079         tx_ring->next_to_clean = 0;
3080 }
3081
3082 /**
3083  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3084  * @adapter: board private structure
3085  **/
3086 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3087 {
3088         int i;
3089
3090         for (i = 0; i < adapter->num_tx_queues; i++)
3091                 igb_clean_tx_ring(adapter->tx_ring[i]);
3092 }
3093
3094 /**
3095  * igb_free_rx_resources - Free Rx Resources
3096  * @rx_ring: ring to clean the resources from
3097  *
3098  * Free all receive software resources
3099  **/
3100 void igb_free_rx_resources(struct igb_ring *rx_ring)
3101 {
3102         igb_clean_rx_ring(rx_ring);
3103
3104         vfree(rx_ring->buffer_info);
3105         rx_ring->buffer_info = NULL;
3106
3107         /* if not set, then don't free */
3108         if (!rx_ring->desc)
3109                 return;
3110
3111         dma_free_coherent(rx_ring->dev, rx_ring->size,
3112                           rx_ring->desc, rx_ring->dma);
3113
3114         rx_ring->desc = NULL;
3115 }
3116
3117 /**
3118  * igb_free_all_rx_resources - Free Rx Resources for All Queues
3119  * @adapter: board private structure
3120  *
3121  * Free all receive software resources
3122  **/
3123 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3124 {
3125         int i;
3126
3127         for (i = 0; i < adapter->num_rx_queues; i++)
3128                 igb_free_rx_resources(adapter->rx_ring[i]);
3129 }
3130
3131 /**
3132  * igb_clean_rx_ring - Free Rx Buffers per Queue
3133  * @rx_ring: ring to free buffers from
3134  **/
3135 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3136 {
3137         struct igb_buffer *buffer_info;
3138         unsigned long size;
3139         unsigned int i;
3140
3141         if (!rx_ring->buffer_info)
3142                 return;
3143
3144         /* Free all the Rx ring sk_buffs */
3145         for (i = 0; i < rx_ring->count; i++) {
3146                 buffer_info = &rx_ring->buffer_info[i];
3147                 if (buffer_info->dma) {
3148                         dma_unmap_single(rx_ring->dev,
3149                                          buffer_info->dma,
3150                                          rx_ring->rx_buffer_len,
3151                                          DMA_FROM_DEVICE);
3152                         buffer_info->dma = 0;
3153                 }
3154
3155                 if (buffer_info->skb) {
3156                         dev_kfree_skb(buffer_info->skb);
3157                         buffer_info->skb = NULL;
3158                 }
3159                 if (buffer_info->page_dma) {
3160                         dma_unmap_page(rx_ring->dev,
3161                                        buffer_info->page_dma,
3162                                        PAGE_SIZE / 2,
3163                                        DMA_FROM_DEVICE);
3164                         buffer_info->page_dma = 0;
3165                 }
3166                 if (buffer_info->page) {
3167                         put_page(buffer_info->page);
3168                         buffer_info->page = NULL;
3169                         buffer_info->page_offset = 0;
3170                 }
3171         }
3172
3173         size = sizeof(struct igb_buffer) * rx_ring->count;
3174         memset(rx_ring->buffer_info, 0, size);
3175
3176         /* Zero out the descriptor ring */
3177         memset(rx_ring->desc, 0, rx_ring->size);
3178
3179         rx_ring->next_to_clean = 0;
3180         rx_ring->next_to_use = 0;
3181 }
3182
3183 /**
3184  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3185  * @adapter: board private structure
3186  **/
3187 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3188 {
3189         int i;
3190
3191         for (i = 0; i < adapter->num_rx_queues; i++)
3192                 igb_clean_rx_ring(adapter->rx_ring[i]);
3193 }
3194
3195 /**
3196  * igb_set_mac - Change the Ethernet Address of the NIC
3197  * @netdev: network interface device structure
3198  * @p: pointer to an address structure
3199  *
3200  * Returns 0 on success, negative on failure
3201  **/
3202 static int igb_set_mac(struct net_device *netdev, void *p)
3203 {
3204         struct igb_adapter *adapter = netdev_priv(netdev);
3205         struct e1000_hw *hw = &adapter->hw;
3206         struct sockaddr *addr = p;
3207
3208         if (!is_valid_ether_addr(addr->sa_data))
3209                 return -EADDRNOTAVAIL;
3210
3211         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3212         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3213
3214         /* set the correct pool for the new PF MAC address in entry 0 */
3215         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3216                          adapter->vfs_allocated_count);
3217
3218         return 0;
3219 }
3220
3221 /**
3222  * igb_write_mc_addr_list - write multicast addresses to MTA
3223  * @netdev: network interface device structure
3224  *
3225  * Writes multicast address list to the MTA hash table.
3226  * Returns: -ENOMEM on failure
3227  *                0 on no addresses written
3228  *                X on writing X addresses to MTA
3229  **/
3230 static int igb_write_mc_addr_list(struct net_device *netdev)
3231 {
3232         struct igb_adapter *adapter = netdev_priv(netdev);
3233         struct e1000_hw *hw = &adapter->hw;
3234         struct netdev_hw_addr *ha;
3235         u8  *mta_list;
3236         int i;
3237
3238         if (netdev_mc_empty(netdev)) {
3239                 /* nothing to program, so clear mc list */
3240                 igb_update_mc_addr_list(hw, NULL, 0);
3241                 igb_restore_vf_multicasts(adapter);
3242                 return 0;
3243         }
3244
3245         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3246         if (!mta_list)
3247                 return -ENOMEM;
3248
3249         /* The shared function expects a packed array of only addresses. */
3250         i = 0;
3251         netdev_for_each_mc_addr(ha, netdev)
3252                 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3253
3254         igb_update_mc_addr_list(hw, mta_list, i);
3255         kfree(mta_list);
3256
3257         return netdev_mc_count(netdev);
3258 }
3259
3260 /**
3261  * igb_write_uc_addr_list - write unicast addresses to RAR table
3262  * @netdev: network interface device structure
3263  *
3264  * Writes unicast address list to the RAR table.
3265  * Returns: -ENOMEM on failure/insufficient address space
3266  *                0 on no addresses written
3267  *                X on writing X addresses to the RAR table
3268  **/
3269 static int igb_write_uc_addr_list(struct net_device *netdev)
3270 {
3271         struct igb_adapter *adapter = netdev_priv(netdev);
3272         struct e1000_hw *hw = &adapter->hw;
3273         unsigned int vfn = adapter->vfs_allocated_count;
3274         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3275         int count = 0;
3276
3277         /* return ENOMEM indicating insufficient memory for addresses */
3278         if (netdev_uc_count(netdev) > rar_entries)
3279                 return -ENOMEM;
3280
3281         if (!netdev_uc_empty(netdev) && rar_entries) {
3282                 struct netdev_hw_addr *ha;
3283
3284                 netdev_for_each_uc_addr(ha, netdev) {
3285                         if (!rar_entries)
3286                                 break;
3287                         igb_rar_set_qsel(adapter, ha->addr,
3288                                          rar_entries--,
3289                                          vfn);
3290                         count++;
3291                 }
3292         }
3293         /* write the addresses in reverse order to avoid write combining */
3294         for (; rar_entries > 0 ; rar_entries--) {
3295                 wr32(E1000_RAH(rar_entries), 0);
3296                 wr32(E1000_RAL(rar_entries), 0);
3297         }
3298         wrfl();
3299
3300         return count;
3301 }
3302
3303 /**
3304  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3305  * @netdev: network interface device structure
3306  *
3307  * The set_rx_mode entry point is called whenever the unicast or multicast
3308  * address lists or the network interface flags are updated.  This routine is
3309  * responsible for configuring the hardware for proper unicast, multicast,
3310  * promiscuous mode, and all-multi behavior.
3311  **/
3312 static void igb_set_rx_mode(struct net_device *netdev)
3313 {
3314         struct igb_adapter *adapter = netdev_priv(netdev);
3315         struct e1000_hw *hw = &adapter->hw;
3316         unsigned int vfn = adapter->vfs_allocated_count;
3317         u32 rctl, vmolr = 0;
3318         int count;
3319
3320         /* Check for Promiscuous and All Multicast modes */
3321         rctl = rd32(E1000_RCTL);
3322
3323         /* clear the effected bits */
3324         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3325
3326         if (netdev->flags & IFF_PROMISC) {
3327                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3328                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3329         } else {
3330                 if (netdev->flags & IFF_ALLMULTI) {
3331                         rctl |= E1000_RCTL_MPE;
3332                         vmolr |= E1000_VMOLR_MPME;
3333                 } else {
3334                         /*
3335                          * Write addresses to the MTA, if the attempt fails
3336                          * then we should just turn on promiscous mode so
3337                          * that we can at least receive multicast traffic
3338                          */
3339                         count = igb_write_mc_addr_list(netdev);
3340                         if (count < 0) {
3341                                 rctl |= E1000_RCTL_MPE;
3342                                 vmolr |= E1000_VMOLR_MPME;
3343                         } else if (count) {
3344                                 vmolr |= E1000_VMOLR_ROMPE;
3345                         }
3346                 }
3347                 /*
3348                  * Write addresses to available RAR registers, if there is not
3349                  * sufficient space to store all the addresses then enable
3350                  * unicast promiscous mode
3351                  */
3352                 count = igb_write_uc_addr_list(netdev);
3353                 if (count < 0) {
3354                         rctl |= E1000_RCTL_UPE;
3355                         vmolr |= E1000_VMOLR_ROPE;
3356                 }
3357                 rctl |= E1000_RCTL_VFE;
3358         }
3359         wr32(E1000_RCTL, rctl);
3360
3361         /*
3362          * In order to support SR-IOV and eventually VMDq it is necessary to set
3363          * the VMOLR to enable the appropriate modes.  Without this workaround
3364          * we will have issues with VLAN tag stripping not being done for frames
3365          * that are only arriving because we are the default pool
3366          */
3367         if (hw->mac.type < e1000_82576)
3368                 return;
3369
3370         vmolr |= rd32(E1000_VMOLR(vfn)) &
3371                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3372         wr32(E1000_VMOLR(vfn), vmolr);
3373         igb_restore_vf_multicasts(adapter);
3374 }
3375
3376 static void igb_check_wvbr(struct igb_adapter *adapter)
3377 {
3378         struct e1000_hw *hw = &adapter->hw;
3379         u32 wvbr = 0;
3380
3381         switch (hw->mac.type) {
3382         case e1000_82576:
3383         case e1000_i350:
3384                 if (!(wvbr = rd32(E1000_WVBR)))
3385                         return;
3386                 break;
3387         default:
3388                 break;
3389         }
3390
3391         adapter->wvbr |= wvbr;
3392 }
3393
3394 #define IGB_STAGGERED_QUEUE_OFFSET 8
3395
3396 static void igb_spoof_check(struct igb_adapter *adapter)
3397 {
3398         int j;
3399
3400         if (!adapter->wvbr)
3401                 return;
3402
3403         for(j = 0; j < adapter->vfs_allocated_count; j++) {
3404                 if (adapter->wvbr & (1 << j) ||
3405                     adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3406                         dev_warn(&adapter->pdev->dev,
3407                                 "Spoof event(s) detected on VF %d\n", j);
3408                         adapter->wvbr &=
3409                                 ~((1 << j) |
3410                                   (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3411                 }
3412         }
3413 }
3414
3415 /* Need to wait a few seconds after link up to get diagnostic information from
3416  * the phy */
3417 static void igb_update_phy_info(unsigned long data)
3418 {
3419         struct igb_adapter *adapter = (struct igb_adapter *) data;
3420         igb_get_phy_info(&adapter->hw);
3421 }
3422
3423 /**
3424  * igb_has_link - check shared code for link and determine up/down
3425  * @adapter: pointer to driver private info
3426  **/
3427 bool igb_has_link(struct igb_adapter *adapter)
3428 {
3429         struct e1000_hw *hw = &adapter->hw;
3430         bool link_active = false;
3431         s32 ret_val = 0;
3432
3433         /* get_link_status is set on LSC (link status) interrupt or
3434          * rx sequence error interrupt.  get_link_status will stay
3435          * false until the e1000_check_for_link establishes link
3436          * for copper adapters ONLY
3437          */
3438         switch (hw->phy.media_type) {
3439         case e1000_media_type_copper:
3440                 if (hw->mac.get_link_status) {
3441                         ret_val = hw->mac.ops.check_for_link(hw);
3442                         link_active = !hw->mac.get_link_status;
3443                 } else {
3444                         link_active = true;
3445                 }
3446                 break;
3447         case e1000_media_type_internal_serdes:
3448                 ret_val = hw->mac.ops.check_for_link(hw);
3449                 link_active = hw->mac.serdes_has_link;
3450                 break;
3451         default:
3452         case e1000_media_type_unknown:
3453                 break;
3454         }
3455
3456         return link_active;
3457 }
3458
3459 /**
3460  * igb_watchdog - Timer Call-back
3461  * @data: pointer to adapter cast into an unsigned long
3462  **/
3463 static void igb_watchdog(unsigned long data)
3464 {
3465         struct igb_adapter *adapter = (struct igb_adapter *)data;
3466         /* Do the rest outside of interrupt context */
3467         schedule_work(&adapter->watchdog_task);
3468 }
3469
3470 static void igb_watchdog_task(struct work_struct *work)
3471 {
3472         struct igb_adapter *adapter = container_of(work,
3473                                                    struct igb_adapter,
3474                                                    watchdog_task);
3475         struct e1000_hw *hw = &adapter->hw;
3476         struct net_device *netdev = adapter->netdev;
3477         u32 link;
3478         int i;
3479
3480         link = igb_has_link(adapter);
3481         if (link) {
3482                 if (!netif_carrier_ok(netdev)) {
3483                         u32 ctrl;
3484                         hw->mac.ops.get_speed_and_duplex(hw,
3485                                                          &adapter->link_speed,
3486                                                          &adapter->link_duplex);
3487
3488                         ctrl = rd32(E1000_CTRL);
3489                         /* Links status message must follow this format */
3490                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3491                                  "Flow Control: %s\n",
3492                                netdev->name,
3493                                adapter->link_speed,
3494                                adapter->link_duplex == FULL_DUPLEX ?
3495                                  "Full Duplex" : "Half Duplex",
3496                                ((ctrl & E1000_CTRL_TFCE) &&
3497                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3498                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3499                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3500
3501                         /* adjust timeout factor according to speed/duplex */
3502                         adapter->tx_timeout_factor = 1;
3503                         switch (adapter->link_speed) {
3504                         case SPEED_10:
3505                                 adapter->tx_timeout_factor = 14;
3506                                 break;
3507                         case SPEED_100:
3508                                 /* maybe add some timeout factor ? */
3509                                 break;
3510                         }
3511
3512                         netif_carrier_on(netdev);
3513
3514                         igb_ping_all_vfs(adapter);
3515                         igb_check_vf_rate_limit(adapter);
3516
3517                         /* link state has changed, schedule phy info update */
3518                         if (!test_bit(__IGB_DOWN, &adapter->state))
3519                                 mod_timer(&adapter->phy_info_timer,
3520                                           round_jiffies(jiffies + 2 * HZ));
3521                 }
3522         } else {
3523                 if (netif_carrier_ok(netdev)) {
3524                         adapter->link_speed = 0;
3525                         adapter->link_duplex = 0;
3526                         /* Links status message must follow this format */
3527                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3528                                netdev->name);
3529                         netif_carrier_off(netdev);
3530
3531                         igb_ping_all_vfs(adapter);
3532
3533                         /* link state has changed, schedule phy info update */
3534                         if (!test_bit(__IGB_DOWN, &adapter->state))
3535                                 mod_timer(&adapter->phy_info_timer,
3536                                           round_jiffies(jiffies + 2 * HZ));
3537                 }
3538         }
3539
3540         spin_lock(&adapter->stats64_lock);
3541         igb_update_stats(adapter, &adapter->stats64);
3542         spin_unlock(&adapter->stats64_lock);
3543
3544         for (i = 0; i < adapter->num_tx_queues; i++) {
3545                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3546                 if (!netif_carrier_ok(netdev)) {
3547                         /* We've lost link, so the controller stops DMA,
3548                          * but we've got queued Tx work that's never going
3549                          * to get done, so reset controller to flush Tx.
3550                          * (Do the reset outside of interrupt context). */
3551                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3552                                 adapter->tx_timeout_count++;
3553                                 schedule_work(&adapter->reset_task);
3554                                 /* return immediately since reset is imminent */
3555                                 return;
3556                         }
3557                 }
3558
3559                 /* Force detection of hung controller every watchdog period */
3560                 tx_ring->detect_tx_hung = true;
3561         }
3562
3563         /* Cause software interrupt to ensure rx ring is cleaned */
3564         if (adapter->msix_entries) {
3565                 u32 eics = 0;
3566                 for (i = 0; i < adapter->num_q_vectors; i++) {
3567                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3568                         eics |= q_vector->eims_value;
3569                 }
3570                 wr32(E1000_EICS, eics);
3571         } else {
3572                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3573         }
3574
3575         igb_spoof_check(adapter);
3576
3577         /* Reset the timer */
3578         if (!test_bit(__IGB_DOWN, &adapter->state))
3579                 mod_timer(&adapter->watchdog_timer,
3580                           round_jiffies(jiffies + 2 * HZ));
3581 }
3582
3583 enum latency_range {
3584         lowest_latency = 0,
3585         low_latency = 1,
3586         bulk_latency = 2,
3587         latency_invalid = 255
3588 };
3589
3590 /**
3591  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3592  *
3593  *      Stores a new ITR value based on strictly on packet size.  This
3594  *      algorithm is less sophisticated than that used in igb_update_itr,
3595  *      due to the difficulty of synchronizing statistics across multiple
3596  *      receive rings.  The divisors and thresholds used by this function
3597  *      were determined based on theoretical maximum wire speed and testing
3598  *      data, in order to minimize response time while increasing bulk
3599  *      throughput.
3600  *      This functionality is controlled by the InterruptThrottleRate module
3601  *      parameter (see igb_param.c)
3602  *      NOTE:  This function is called only when operating in a multiqueue
3603  *             receive environment.
3604  * @q_vector: pointer to q_vector
3605  **/
3606 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3607 {
3608         int new_val = q_vector->itr_val;
3609         int avg_wire_size = 0;
3610         struct igb_adapter *adapter = q_vector->adapter;
3611         struct igb_ring *ring;
3612         unsigned int packets;
3613
3614         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3615          * ints/sec - ITR timer value of 120 ticks.
3616          */
3617         if (adapter->link_speed != SPEED_1000) {
3618                 new_val = 976;
3619                 goto set_itr_val;
3620         }
3621
3622         ring = q_vector->rx_ring;
3623         if (ring) {
3624                 packets = ACCESS_ONCE(ring->total_packets);
3625
3626                 if (packets)
3627                         avg_wire_size = ring->total_bytes / packets;
3628         }
3629
3630         ring = q_vector->tx_ring;
3631         if (ring) {
3632                 packets = ACCESS_ONCE(ring->total_packets);
3633
3634                 if (packets)
3635                         avg_wire_size = max_t(u32, avg_wire_size,
3636                                               ring->total_bytes / packets);
3637         }
3638
3639         /* if avg_wire_size isn't set no work was done */
3640         if (!avg_wire_size)
3641                 goto clear_counts;
3642
3643         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3644         avg_wire_size += 24;
3645
3646         /* Don't starve jumbo frames */
3647         avg_wire_size = min(avg_wire_size, 3000);
3648
3649         /* Give a little boost to mid-size frames */
3650         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3651                 new_val = avg_wire_size / 3;
3652         else
3653                 new_val = avg_wire_size / 2;
3654
3655         /* when in itr mode 3 do not exceed 20K ints/sec */
3656         if (adapter->rx_itr_setting == 3 && new_val < 196)
3657                 new_val = 196;
3658
3659 set_itr_val:
3660         if (new_val != q_vector->itr_val) {
3661                 q_vector->itr_val = new_val;
3662                 q_vector->set_itr = 1;
3663         }
3664 clear_counts:
3665         if (q_vector->rx_ring) {
3666                 q_vector->rx_ring->total_bytes = 0;
3667                 q_vector->rx_ring->total_packets = 0;
3668         }
3669         if (q_vector->tx_ring) {
3670                 q_vector->tx_ring->total_bytes = 0;
3671                 q_vector->tx_ring->total_packets = 0;
3672         }
3673 }
3674
3675 /**
3676  * igb_update_itr - update the dynamic ITR value based on statistics
3677  *      Stores a new ITR value based on packets and byte
3678  *      counts during the last interrupt.  The advantage of per interrupt
3679  *      computation is faster updates and more accurate ITR for the current
3680  *      traffic pattern.  Constants in this function were computed
3681  *      based on theoretical maximum wire speed and thresholds were set based
3682  *      on testing data as well as attempting to minimize response time
3683  *      while increasing bulk throughput.
3684  *      this functionality is controlled by the InterruptThrottleRate module
3685  *      parameter (see igb_param.c)
3686  *      NOTE:  These calculations are only valid when operating in a single-
3687  *             queue environment.
3688  * @adapter: pointer to adapter
3689  * @itr_setting: current q_vector->itr_val
3690  * @packets: the number of packets during this measurement interval
3691  * @bytes: the number of bytes during this measurement interval
3692  **/
3693 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3694                                    int packets, int bytes)
3695 {
3696         unsigned int retval = itr_setting;
3697
3698         if (packets == 0)
3699                 goto update_itr_done;
3700
3701         switch (itr_setting) {
3702         case lowest_latency:
3703                 /* handle TSO and jumbo frames */
3704                 if (bytes/packets > 8000)
3705                         retval = bulk_latency;
3706                 else if ((packets < 5) && (bytes > 512))
3707                         retval = low_latency;
3708                 break;
3709         case low_latency:  /* 50 usec aka 20000 ints/s */
3710                 if (bytes > 10000) {
3711                         /* this if handles the TSO accounting */
3712                         if (bytes/packets > 8000) {
3713                                 retval = bulk_latency;
3714                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3715                                 retval = bulk_latency;
3716                         } else if ((packets > 35)) {
3717                                 retval = lowest_latency;
3718                         }
3719                 } else if (bytes/packets > 2000) {
3720                         retval = bulk_latency;
3721                 } else if (packets <= 2 && bytes < 512) {
3722                         retval = lowest_latency;
3723                 }
3724                 break;
3725         case bulk_latency: /* 250 usec aka 4000 ints/s */
3726                 if (bytes > 25000) {
3727                         if (packets > 35)
3728                                 retval = low_latency;
3729                 } else if (bytes < 1500) {
3730                         retval = low_latency;
3731                 }
3732                 break;
3733         }
3734
3735 update_itr_done:
3736         return retval;
3737 }
3738
3739 static void igb_set_itr(struct igb_adapter *adapter)
3740 {
3741         struct igb_q_vector *q_vector = adapter->q_vector[0];
3742         u16 current_itr;
3743         u32 new_itr = q_vector->itr_val;
3744
3745         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3746         if (adapter->link_speed != SPEED_1000) {
3747                 current_itr = 0;
3748                 new_itr = 4000;
3749                 goto set_itr_now;
3750         }
3751
3752         adapter->rx_itr = igb_update_itr(adapter,
3753                                     adapter->rx_itr,
3754                                     q_vector->rx_ring->total_packets,
3755                                     q_vector->rx_ring->total_bytes);
3756
3757         adapter->tx_itr = igb_update_itr(adapter,
3758                                     adapter->tx_itr,
3759                                     q_vector->tx_ring->total_packets,
3760                                     q_vector->tx_ring->total_bytes);
3761         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3762
3763         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3764         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3765                 current_itr = low_latency;
3766
3767         switch (current_itr) {
3768         /* counts and packets in update_itr are dependent on these numbers */
3769         case lowest_latency:
3770                 new_itr = 56;  /* aka 70,000 ints/sec */
3771                 break;
3772         case low_latency:
3773                 new_itr = 196; /* aka 20,000 ints/sec */
3774                 break;
3775         case bulk_latency:
3776                 new_itr = 980; /* aka 4,000 ints/sec */
3777                 break;
3778         default:
3779                 break;
3780         }
3781
3782 set_itr_now:
3783         q_vector->rx_ring->total_bytes = 0;
3784         q_vector->rx_ring->total_packets = 0;
3785         q_vector->tx_ring->total_bytes = 0;
3786         q_vector->tx_ring->total_packets = 0;
3787
3788         if (new_itr != q_vector->itr_val) {
3789                 /* this attempts to bias the interrupt rate towards Bulk
3790                  * by adding intermediate steps when interrupt rate is
3791                  * increasing */
3792                 new_itr = new_itr > q_vector->itr_val ?
3793                              max((new_itr * q_vector->itr_val) /
3794                                  (new_itr + (q_vector->itr_val >> 2)),
3795                                  new_itr) :
3796                              new_itr;
3797                 /* Don't write the value here; it resets the adapter's
3798                  * internal timer, and causes us to delay far longer than
3799                  * we should between interrupts.  Instead, we write the ITR
3800                  * value at the beginning of the next interrupt so the timing
3801                  * ends up being correct.
3802                  */
3803                 q_vector->itr_val = new_itr;
3804                 q_vector->set_itr = 1;
3805         }
3806 }
3807
3808 #define IGB_TX_FLAGS_CSUM               0x00000001
3809 #define IGB_TX_FLAGS_VLAN               0x00000002
3810 #define IGB_TX_FLAGS_TSO                0x00000004
3811 #define IGB_TX_FLAGS_IPV4               0x00000008
3812 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3813 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3814 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3815
3816 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3817                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3818 {
3819         struct e1000_adv_tx_context_desc *context_desc;
3820         unsigned int i;
3821         int err;
3822         struct igb_buffer *buffer_info;
3823         u32 info = 0, tu_cmd = 0;
3824         u32 mss_l4len_idx;
3825         u8 l4len;
3826
3827         if (skb_header_cloned(skb)) {
3828                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3829                 if (err)
3830                         return err;
3831         }
3832
3833         l4len = tcp_hdrlen(skb);
3834         *hdr_len += l4len;
3835
3836         if (skb->protocol == htons(ETH_P_IP)) {
3837                 struct iphdr *iph = ip_hdr(skb);
3838                 iph->tot_len = 0;
3839                 iph->check = 0;
3840                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3841                                                          iph->daddr, 0,
3842                                                          IPPROTO_TCP,
3843                                                          0);
3844         } else if (skb_is_gso_v6(skb)) {
3845                 ipv6_hdr(skb)->payload_len = 0;
3846                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3847                                                        &ipv6_hdr(skb)->daddr,
3848                                                        0, IPPROTO_TCP, 0);
3849         }
3850
3851         i = tx_ring->next_to_use;
3852
3853         buffer_info = &tx_ring->buffer_info[i];
3854         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3855         /* VLAN MACLEN IPLEN */
3856         if (tx_flags & IGB_TX_FLAGS_VLAN)
3857                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3858         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3859         *hdr_len += skb_network_offset(skb);
3860         info |= skb_network_header_len(skb);
3861         *hdr_len += skb_network_header_len(skb);
3862         context_desc->vlan_macip_lens = cpu_to_le32(info);
3863
3864         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3865         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3866
3867         if (skb->protocol == htons(ETH_P_IP))
3868                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3869         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3870
3871         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3872
3873         /* MSS L4LEN IDX */
3874         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3875         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3876
3877         /* For 82575, context index must be unique per ring. */
3878         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3879                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3880
3881         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3882         context_desc->seqnum_seed = 0;
3883
3884         buffer_info->time_stamp = jiffies;
3885         buffer_info->next_to_watch = i;
3886         buffer_info->dma = 0;
3887         i++;
3888         if (i == tx_ring->count)
3889                 i = 0;
3890
3891         tx_ring->next_to_use = i;
3892
3893         return true;
3894 }
3895
3896 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3897                                    struct sk_buff *skb, u32 tx_flags)
3898 {
3899         struct e1000_adv_tx_context_desc *context_desc;
3900         struct device *dev = tx_ring->dev;
3901         struct igb_buffer *buffer_info;
3902         u32 info = 0, tu_cmd = 0;
3903         unsigned int i;
3904
3905         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3906             (tx_flags & IGB_TX_FLAGS_VLAN)) {
3907                 i = tx_ring->next_to_use;
3908                 buffer_info = &tx_ring->buffer_info[i];
3909                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3910
3911                 if (tx_flags & IGB_TX_FLAGS_VLAN)
3912                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3913
3914                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3915                 if (skb->ip_summed == CHECKSUM_PARTIAL)
3916                         info |= skb_network_header_len(skb);
3917
3918                 context_desc->vlan_macip_lens = cpu_to_le32(info);
3919
3920                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3921
3922                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3923                         __be16 protocol;
3924
3925                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3926                                 const struct vlan_ethhdr *vhdr =
3927                                           (const struct vlan_ethhdr*)skb->data;
3928
3929                                 protocol = vhdr->h_vlan_encapsulated_proto;
3930                         } else {
3931                                 protocol = skb->protocol;
3932                         }
3933
3934                         switch (protocol) {
3935                         case cpu_to_be16(ETH_P_IP):
3936                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3937                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3938                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3939                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3940                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3941                                 break;
3942                         case cpu_to_be16(ETH_P_IPV6):
3943                                 /* XXX what about other V6 headers?? */
3944                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3945                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3946                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3947                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3948                                 break;
3949                         default:
3950                                 if (unlikely(net_ratelimit()))
3951                                         dev_warn(dev,
3952                                             "partial checksum but proto=%x!\n",
3953                                             skb->protocol);
3954                                 break;
3955                         }
3956                 }
3957
3958                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3959                 context_desc->seqnum_seed = 0;
3960                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3961                         context_desc->mss_l4len_idx =
3962                                 cpu_to_le32(tx_ring->reg_idx << 4);
3963
3964                 buffer_info->time_stamp = jiffies;
3965                 buffer_info->next_to_watch = i;
3966                 buffer_info->dma = 0;
3967
3968                 i++;
3969                 if (i == tx_ring->count)
3970                         i = 0;
3971                 tx_ring->next_to_use = i;
3972
3973                 return true;
3974         }
3975         return false;
3976 }
3977
3978 #define IGB_MAX_TXD_PWR 16
3979 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
3980
3981 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3982                                  unsigned int first)
3983 {
3984         struct igb_buffer *buffer_info;
3985         struct device *dev = tx_ring->dev;
3986         unsigned int hlen = skb_headlen(skb);
3987         unsigned int count = 0, i;
3988         unsigned int f;
3989         u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
3990
3991         i = tx_ring->next_to_use;
3992
3993         buffer_info = &tx_ring->buffer_info[i];
3994         BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD);
3995         buffer_info->length = hlen;
3996         /* set time_stamp *before* dma to help avoid a possible race */
3997         buffer_info->time_stamp = jiffies;
3998         buffer_info->next_to_watch = i;
3999         buffer_info->dma = dma_map_single(dev, skb->data, hlen,
4000                                           DMA_TO_DEVICE);
4001         if (dma_mapping_error(dev, buffer_info->dma))
4002                 goto dma_error;
4003
4004         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
4005                 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f];
4006                 unsigned int len = frag->size;
4007
4008                 count++;
4009                 i++;
4010                 if (i == tx_ring->count)
4011                         i = 0;
4012
4013                 buffer_info = &tx_ring->buffer_info[i];
4014                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
4015                 buffer_info->length = len;
4016                 buffer_info->time_stamp = jiffies;
4017                 buffer_info->next_to_watch = i;
4018                 buffer_info->mapped_as_page = true;
4019                 buffer_info->dma = dma_map_page(dev,
4020                                                 frag->page,
4021                                                 frag->page_offset,
4022                                                 len,
4023                                                 DMA_TO_DEVICE);
4024                 if (dma_mapping_error(dev, buffer_info->dma))
4025                         goto dma_error;
4026
4027         }
4028
4029         tx_ring->buffer_info[i].skb = skb;
4030         tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags;
4031         /* multiply data chunks by size of headers */
4032         tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len;
4033         tx_ring->buffer_info[i].gso_segs = gso_segs;
4034         tx_ring->buffer_info[first].next_to_watch = i;
4035
4036         return ++count;
4037
4038 dma_error:
4039         dev_err(dev, "TX DMA map failed\n");
4040
4041         /* clear timestamp and dma mappings for failed buffer_info mapping */
4042         buffer_info->dma = 0;
4043         buffer_info->time_stamp = 0;
4044         buffer_info->length = 0;
4045         buffer_info->next_to_watch = 0;
4046         buffer_info->mapped_as_page = false;
4047
4048         /* clear timestamp and dma mappings for remaining portion of packet */
4049         while (count--) {
4050                 if (i == 0)
4051                         i = tx_ring->count;
4052                 i--;
4053                 buffer_info = &tx_ring->buffer_info[i];
4054                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4055         }
4056
4057         return 0;
4058 }
4059
4060 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
4061                                     u32 tx_flags, int count, u32 paylen,
4062                                     u8 hdr_len)
4063 {
4064         union e1000_adv_tx_desc *tx_desc;
4065         struct igb_buffer *buffer_info;
4066         u32 olinfo_status = 0, cmd_type_len;
4067         unsigned int i = tx_ring->next_to_use;
4068
4069         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
4070                         E1000_ADVTXD_DCMD_DEXT);
4071
4072         if (tx_flags & IGB_TX_FLAGS_VLAN)
4073                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
4074
4075         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4076                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
4077
4078         if (tx_flags & IGB_TX_FLAGS_TSO) {
4079                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
4080
4081                 /* insert tcp checksum */
4082                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4083
4084                 /* insert ip checksum */
4085                 if (tx_flags & IGB_TX_FLAGS_IPV4)
4086                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4087
4088         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
4089                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4090         }
4091
4092         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
4093             (tx_flags & (IGB_TX_FLAGS_CSUM |
4094                          IGB_TX_FLAGS_TSO |
4095                          IGB_TX_FLAGS_VLAN)))
4096                 olinfo_status |= tx_ring->reg_idx << 4;
4097
4098         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
4099
4100         do {
4101                 buffer_info = &tx_ring->buffer_info[i];
4102                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4103                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4104                 tx_desc->read.cmd_type_len =
4105                         cpu_to_le32(cmd_type_len | buffer_info->length);
4106                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4107                 count--;
4108                 i++;
4109                 if (i == tx_ring->count)
4110                         i = 0;
4111         } while (count > 0);
4112
4113         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
4114         /* Force memory writes to complete before letting h/w
4115          * know there are new descriptors to fetch.  (Only
4116          * applicable for weak-ordered memory model archs,
4117          * such as IA-64). */
4118         wmb();
4119
4120         tx_ring->next_to_use = i;
4121         writel(i, tx_ring->tail);
4122         /* we need this if more than one processor can write to our tail
4123          * at a time, it syncronizes IO on IA64/Altix systems */
4124         mmiowb();
4125 }
4126
4127 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4128 {
4129         struct net_device *netdev = tx_ring->netdev;
4130
4131         netif_stop_subqueue(netdev, tx_ring->queue_index);
4132
4133         /* Herbert's original patch had:
4134          *  smp_mb__after_netif_stop_queue();
4135          * but since that doesn't exist yet, just open code it. */
4136         smp_mb();
4137
4138         /* We need to check again in a case another CPU has just
4139          * made room available. */
4140         if (igb_desc_unused(tx_ring) < size)
4141                 return -EBUSY;
4142
4143         /* A reprieve! */
4144         netif_wake_subqueue(netdev, tx_ring->queue_index);
4145
4146         u64_stats_update_begin(&tx_ring->tx_syncp2);
4147         tx_ring->tx_stats.restart_queue2++;
4148         u64_stats_update_end(&tx_ring->tx_syncp2);
4149
4150         return 0;
4151 }
4152
4153 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4154 {
4155         if (igb_desc_unused(tx_ring) >= size)
4156                 return 0;
4157         return __igb_maybe_stop_tx(tx_ring, size);
4158 }
4159
4160 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
4161                                     struct igb_ring *tx_ring)
4162 {
4163         int tso = 0, count;
4164         u32 tx_flags = 0;
4165         u16 first;
4166         u8 hdr_len = 0;
4167
4168         /* need: 1 descriptor per page,
4169          *       + 2 desc gap to keep tail from touching head,
4170          *       + 1 desc for skb->data,
4171          *       + 1 desc for context descriptor,
4172          * otherwise try next time */
4173         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4174                 /* this is a hard error */
4175                 return NETDEV_TX_BUSY;
4176         }
4177
4178         if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4179                 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4180                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4181         }
4182
4183         if (vlan_tx_tag_present(skb)) {
4184                 tx_flags |= IGB_TX_FLAGS_VLAN;
4185                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4186         }
4187
4188         if (skb->protocol == htons(ETH_P_IP))
4189                 tx_flags |= IGB_TX_FLAGS_IPV4;
4190
4191         first = tx_ring->next_to_use;
4192         if (skb_is_gso(skb)) {
4193                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
4194
4195                 if (tso < 0) {
4196                         dev_kfree_skb_any(skb);
4197                         return NETDEV_TX_OK;
4198                 }
4199         }
4200
4201         if (tso)
4202                 tx_flags |= IGB_TX_FLAGS_TSO;
4203         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
4204                  (skb->ip_summed == CHECKSUM_PARTIAL))
4205                 tx_flags |= IGB_TX_FLAGS_CSUM;
4206
4207         /*
4208          * count reflects descriptors mapped, if 0 or less then mapping error
4209          * has occured and we need to rewind the descriptor queue
4210          */
4211         count = igb_tx_map_adv(tx_ring, skb, first);
4212         if (!count) {
4213                 dev_kfree_skb_any(skb);
4214                 tx_ring->buffer_info[first].time_stamp = 0;
4215                 tx_ring->next_to_use = first;
4216                 return NETDEV_TX_OK;
4217         }
4218
4219         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
4220
4221         /* Make sure there is space in the ring for the next send. */
4222         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4223
4224         return NETDEV_TX_OK;
4225 }
4226
4227 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
4228                                       struct net_device *netdev)
4229 {
4230         struct igb_adapter *adapter = netdev_priv(netdev);
4231         struct igb_ring *tx_ring;
4232         int r_idx = 0;
4233
4234         if (test_bit(__IGB_DOWN, &adapter->state)) {
4235                 dev_kfree_skb_any(skb);
4236                 return NETDEV_TX_OK;
4237         }
4238
4239         if (skb->len <= 0) {
4240                 dev_kfree_skb_any(skb);
4241                 return NETDEV_TX_OK;
4242         }
4243
4244         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
4245         tx_ring = adapter->multi_tx_table[r_idx];
4246
4247         /* This goes back to the question of how to logically map a tx queue
4248          * to a flow.  Right now, performance is impacted slightly negatively
4249          * if using multiple tx queues.  If the stack breaks away from a
4250          * single qdisc implementation, we can look at this again. */
4251         return igb_xmit_frame_ring_adv(skb, tx_ring);
4252 }
4253
4254 /**
4255  * igb_tx_timeout - Respond to a Tx Hang
4256  * @netdev: network interface device structure
4257  **/
4258 static void igb_tx_timeout(struct net_device *netdev)
4259 {
4260         struct igb_adapter *adapter = netdev_priv(netdev);
4261         struct e1000_hw *hw = &adapter->hw;
4262
4263         /* Do the reset outside of interrupt context */
4264         adapter->tx_timeout_count++;
4265
4266         if (hw->mac.type == e1000_82580)
4267                 hw->dev_spec._82575.global_device_reset = true;
4268
4269         schedule_work(&adapter->reset_task);
4270         wr32(E1000_EICS,
4271              (adapter->eims_enable_mask & ~adapter->eims_other));
4272 }
4273
4274 static void igb_reset_task(struct work_struct *work)
4275 {
4276         struct igb_adapter *adapter;
4277         adapter = container_of(work, struct igb_adapter, reset_task);
4278
4279         igb_dump(adapter);
4280         netdev_err(adapter->netdev, "Reset adapter\n");
4281         igb_reinit_locked(adapter);
4282 }
4283
4284 /**
4285  * igb_get_stats64 - Get System Network Statistics
4286  * @netdev: network interface device structure
4287  * @stats: rtnl_link_stats64 pointer
4288  *
4289  **/
4290 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4291                                                  struct rtnl_link_stats64 *stats)
4292 {
4293         struct igb_adapter *adapter = netdev_priv(netdev);
4294
4295         spin_lock(&adapter->stats64_lock);
4296         igb_update_stats(adapter, &adapter->stats64);
4297         memcpy(stats, &adapter->stats64, sizeof(*stats));
4298         spin_unlock(&adapter->stats64_lock);
4299
4300         return stats;
4301 }
4302
4303 /**
4304  * igb_change_mtu - Change the Maximum Transfer Unit
4305  * @netdev: network interface device structure
4306  * @new_mtu: new value for maximum frame size
4307  *
4308  * Returns 0 on success, negative on failure
4309  **/
4310 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4311 {
4312         struct igb_adapter *adapter = netdev_priv(netdev);
4313         struct pci_dev *pdev = adapter->pdev;
4314         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
4315         u32 rx_buffer_len, i;
4316
4317         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4318                 dev_err(&pdev->dev, "Invalid MTU setting\n");
4319                 return -EINVAL;
4320         }
4321
4322         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4323                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4324                 return -EINVAL;
4325         }
4326
4327         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4328                 msleep(1);
4329
4330         /* igb_down has a dependency on max_frame_size */
4331         adapter->max_frame_size = max_frame;
4332
4333         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
4334          * means we reserve 2 more, this pushes us to allocate from the next
4335          * larger slab size.
4336          * i.e. RXBUFFER_2048 --> size-4096 slab
4337          */
4338
4339         if (adapter->hw.mac.type == e1000_82580)
4340                 max_frame += IGB_TS_HDR_LEN;
4341
4342         if (max_frame <= IGB_RXBUFFER_1024)
4343                 rx_buffer_len = IGB_RXBUFFER_1024;
4344         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
4345                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
4346         else
4347                 rx_buffer_len = IGB_RXBUFFER_128;
4348
4349         if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
4350              (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
4351                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
4352
4353         if ((adapter->hw.mac.type == e1000_82580) &&
4354             (rx_buffer_len == IGB_RXBUFFER_128))
4355                 rx_buffer_len += IGB_RXBUFFER_64;
4356
4357         if (netif_running(netdev))
4358                 igb_down(adapter);
4359
4360         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4361                  netdev->mtu, new_mtu);
4362         netdev->mtu = new_mtu;
4363
4364         for (i = 0; i < adapter->num_rx_queues; i++)
4365                 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
4366
4367         if (netif_running(netdev))
4368                 igb_up(adapter);
4369         else
4370                 igb_reset(adapter);
4371
4372         clear_bit(__IGB_RESETTING, &adapter->state);
4373
4374         return 0;
4375 }
4376
4377 /**
4378  * igb_update_stats - Update the board statistics counters
4379  * @adapter: board private structure
4380  **/
4381
4382 void igb_update_stats(struct igb_adapter *adapter,
4383                       struct rtnl_link_stats64 *net_stats)
4384 {
4385         struct e1000_hw *hw = &adapter->hw;
4386         struct pci_dev *pdev = adapter->pdev;
4387         u32 reg, mpc;
4388         u16 phy_tmp;
4389         int i;
4390         u64 bytes, packets;
4391         unsigned int start;
4392         u64 _bytes, _packets;
4393
4394 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4395
4396         /*
4397          * Prevent stats update while adapter is being reset, or if the pci
4398          * connection is down.
4399          */
4400         if (adapter->link_speed == 0)
4401                 return;
4402         if (pci_channel_offline(pdev))
4403                 return;
4404
4405         bytes = 0;
4406         packets = 0;
4407         for (i = 0; i < adapter->num_rx_queues; i++) {
4408                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4409                 struct igb_ring *ring = adapter->rx_ring[i];
4410
4411                 ring->rx_stats.drops += rqdpc_tmp;
4412                 net_stats->rx_fifo_errors += rqdpc_tmp;
4413
4414                 do {
4415                         start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4416                         _bytes = ring->rx_stats.bytes;
4417                         _packets = ring->rx_stats.packets;
4418                 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4419                 bytes += _bytes;
4420                 packets += _packets;
4421         }
4422
4423         net_stats->rx_bytes = bytes;
4424         net_stats->rx_packets = packets;
4425
4426         bytes = 0;
4427         packets = 0;
4428         for (i = 0; i < adapter->num_tx_queues; i++) {
4429                 struct igb_ring *ring = adapter->tx_ring[i];
4430                 do {
4431                         start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4432                         _bytes = ring->tx_stats.bytes;
4433                         _packets = ring->tx_stats.packets;
4434                 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4435                 bytes += _bytes;
4436                 packets += _packets;
4437         }
4438         net_stats->tx_bytes = bytes;
4439         net_stats->tx_packets = packets;
4440
4441         /* read stats registers */
4442         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4443         adapter->stats.gprc += rd32(E1000_GPRC);
4444         adapter->stats.gorc += rd32(E1000_GORCL);
4445         rd32(E1000_GORCH); /* clear GORCL */
4446         adapter->stats.bprc += rd32(E1000_BPRC);
4447         adapter->stats.mprc += rd32(E1000_MPRC);
4448         adapter->stats.roc += rd32(E1000_ROC);
4449
4450         adapter->stats.prc64 += rd32(E1000_PRC64);
4451         adapter->stats.prc127 += rd32(E1000_PRC127);
4452         adapter->stats.prc255 += rd32(E1000_PRC255);
4453         adapter->stats.prc511 += rd32(E1000_PRC511);
4454         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4455         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4456         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4457         adapter->stats.sec += rd32(E1000_SEC);
4458
4459         mpc = rd32(E1000_MPC);
4460         adapter->stats.mpc += mpc;
4461         net_stats->rx_fifo_errors += mpc;
4462         adapter->stats.scc += rd32(E1000_SCC);
4463         adapter->stats.ecol += rd32(E1000_ECOL);
4464         adapter->stats.mcc += rd32(E1000_MCC);
4465         adapter->stats.latecol += rd32(E1000_LATECOL);
4466         adapter->stats.dc += rd32(E1000_DC);
4467         adapter->stats.rlec += rd32(E1000_RLEC);
4468         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4469         adapter->stats.xontxc += rd32(E1000_XONTXC);
4470         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4471         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4472         adapter->stats.fcruc += rd32(E1000_FCRUC);
4473         adapter->stats.gptc += rd32(E1000_GPTC);
4474         adapter->stats.gotc += rd32(E1000_GOTCL);
4475         rd32(E1000_GOTCH); /* clear GOTCL */
4476         adapter->stats.rnbc += rd32(E1000_RNBC);
4477         adapter->stats.ruc += rd32(E1000_RUC);
4478         adapter->stats.rfc += rd32(E1000_RFC);
4479         adapter->stats.rjc += rd32(E1000_RJC);
4480         adapter->stats.tor += rd32(E1000_TORH);
4481         adapter->stats.tot += rd32(E1000_TOTH);
4482         adapter->stats.tpr += rd32(E1000_TPR);
4483
4484         adapter->stats.ptc64 += rd32(E1000_PTC64);
4485         adapter->stats.ptc127 += rd32(E1000_PTC127);
4486         adapter->stats.ptc255 += rd32(E1000_PTC255);
4487         adapter->stats.ptc511 += rd32(E1000_PTC511);
4488         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4489         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4490
4491         adapter->stats.mptc += rd32(E1000_MPTC);
4492         adapter->stats.bptc += rd32(E1000_BPTC);
4493
4494         adapter->stats.tpt += rd32(E1000_TPT);
4495         adapter->stats.colc += rd32(E1000_COLC);
4496
4497         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4498         /* read internal phy specific stats */
4499         reg = rd32(E1000_CTRL_EXT);
4500         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4501                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4502                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4503         }
4504
4505         adapter->stats.tsctc += rd32(E1000_TSCTC);
4506         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4507
4508         adapter->stats.iac += rd32(E1000_IAC);
4509         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4510         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4511         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4512         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4513         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4514         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4515         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4516         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4517
4518         /* Fill out the OS statistics structure */
4519         net_stats->multicast = adapter->stats.mprc;
4520         net_stats->collisions = adapter->stats.colc;
4521
4522         /* Rx Errors */
4523
4524         /* RLEC on some newer hardware can be incorrect so build
4525          * our own version based on RUC and ROC */
4526         net_stats->rx_errors = adapter->stats.rxerrc +
4527                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4528                 adapter->stats.ruc + adapter->stats.roc +
4529                 adapter->stats.cexterr;
4530         net_stats->rx_length_errors = adapter->stats.ruc +
4531                                       adapter->stats.roc;
4532         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4533         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4534         net_stats->rx_missed_errors = adapter->stats.mpc;
4535
4536         /* Tx Errors */
4537         net_stats->tx_errors = adapter->stats.ecol +
4538                                adapter->stats.latecol;
4539         net_stats->tx_aborted_errors = adapter->stats.ecol;
4540         net_stats->tx_window_errors = adapter->stats.latecol;
4541         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4542
4543         /* Tx Dropped needs to be maintained elsewhere */
4544
4545         /* Phy Stats */
4546         if (hw->phy.media_type == e1000_media_type_copper) {
4547                 if ((adapter->link_speed == SPEED_1000) &&
4548                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4549                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4550                         adapter->phy_stats.idle_errors += phy_tmp;
4551                 }
4552         }
4553
4554         /* Management Stats */
4555         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4556         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4557         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4558 }
4559
4560 static irqreturn_t igb_msix_other(int irq, void *data)
4561 {
4562         struct igb_adapter *adapter = data;
4563         struct e1000_hw *hw = &adapter->hw;
4564         u32 icr = rd32(E1000_ICR);
4565         /* reading ICR causes bit 31 of EICR to be cleared */
4566
4567         if (icr & E1000_ICR_DRSTA)
4568                 schedule_work(&adapter->reset_task);
4569
4570         if (icr & E1000_ICR_DOUTSYNC) {
4571                 /* HW is reporting DMA is out of sync */
4572                 adapter->stats.doosync++;
4573                 /* The DMA Out of Sync is also indication of a spoof event
4574                  * in IOV mode. Check the Wrong VM Behavior register to
4575                  * see if it is really a spoof event. */
4576                 igb_check_wvbr(adapter);
4577         }
4578
4579         /* Check for a mailbox event */
4580         if (icr & E1000_ICR_VMMB)
4581                 igb_msg_task(adapter);
4582
4583         if (icr & E1000_ICR_LSC) {
4584                 hw->mac.get_link_status = 1;
4585                 /* guard against interrupt when we're going down */
4586                 if (!test_bit(__IGB_DOWN, &adapter->state))
4587                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4588         }
4589
4590         if (adapter->vfs_allocated_count)
4591                 wr32(E1000_IMS, E1000_IMS_LSC |
4592                                 E1000_IMS_VMMB |
4593                                 E1000_IMS_DOUTSYNC);
4594         else
4595                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4596         wr32(E1000_EIMS, adapter->eims_other);
4597
4598         return IRQ_HANDLED;
4599 }
4600
4601 static void igb_write_itr(struct igb_q_vector *q_vector)
4602 {
4603         struct igb_adapter *adapter = q_vector->adapter;
4604         u32 itr_val = q_vector->itr_val & 0x7FFC;
4605
4606         if (!q_vector->set_itr)
4607                 return;
4608
4609         if (!itr_val)
4610                 itr_val = 0x4;
4611
4612         if (adapter->hw.mac.type == e1000_82575)
4613                 itr_val |= itr_val << 16;
4614         else
4615                 itr_val |= 0x8000000;
4616
4617         writel(itr_val, q_vector->itr_register);
4618         q_vector->set_itr = 0;
4619 }
4620
4621 static irqreturn_t igb_msix_ring(int irq, void *data)
4622 {
4623         struct igb_q_vector *q_vector = data;
4624
4625         /* Write the ITR value calculated from the previous interrupt. */
4626         igb_write_itr(q_vector);
4627
4628         napi_schedule(&q_vector->napi);
4629
4630         return IRQ_HANDLED;
4631 }
4632
4633 #ifdef CONFIG_IGB_DCA
4634 static void igb_update_dca(struct igb_q_vector *q_vector)
4635 {
4636         struct igb_adapter *adapter = q_vector->adapter;
4637         struct e1000_hw *hw = &adapter->hw;
4638         int cpu = get_cpu();
4639
4640         if (q_vector->cpu == cpu)
4641                 goto out_no_update;
4642
4643         if (q_vector->tx_ring) {
4644                 int q = q_vector->tx_ring->reg_idx;
4645                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4646                 if (hw->mac.type == e1000_82575) {
4647                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4648                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4649                 } else {
4650                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4651                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4652                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4653                 }
4654                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4655                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4656         }
4657         if (q_vector->rx_ring) {
4658                 int q = q_vector->rx_ring->reg_idx;
4659                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4660                 if (hw->mac.type == e1000_82575) {
4661                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4662                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4663                 } else {
4664                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4665                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4666                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4667                 }
4668                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4669                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4670                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4671                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4672         }
4673         q_vector->cpu = cpu;
4674 out_no_update:
4675         put_cpu();
4676 }
4677
4678 static void igb_setup_dca(struct igb_adapter *adapter)
4679 {
4680         struct e1000_hw *hw = &adapter->hw;
4681         int i;
4682
4683         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4684                 return;
4685
4686         /* Always use CB2 mode, difference is masked in the CB driver. */
4687         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4688
4689         for (i = 0; i < adapter->num_q_vectors; i++) {
4690                 adapter->q_vector[i]->cpu = -1;
4691                 igb_update_dca(adapter->q_vector[i]);
4692         }
4693 }
4694
4695 static int __igb_notify_dca(struct device *dev, void *data)
4696 {
4697         struct net_device *netdev = dev_get_drvdata(dev);
4698         struct igb_adapter *adapter = netdev_priv(netdev);
4699         struct pci_dev *pdev = adapter->pdev;
4700         struct e1000_hw *hw = &adapter->hw;
4701         unsigned long event = *(unsigned long *)data;
4702
4703         switch (event) {
4704         case DCA_PROVIDER_ADD:
4705                 /* if already enabled, don't do it again */
4706                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4707                         break;
4708                 if (dca_add_requester(dev) == 0) {
4709                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4710                         dev_info(&pdev->dev, "DCA enabled\n");
4711                         igb_setup_dca(adapter);
4712                         break;
4713                 }
4714                 /* Fall Through since DCA is disabled. */
4715         case DCA_PROVIDER_REMOVE:
4716                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4717                         /* without this a class_device is left
4718                          * hanging around in the sysfs model */
4719                         dca_remove_requester(dev);
4720                         dev_info(&pdev->dev, "DCA disabled\n");
4721                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4722                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4723                 }
4724                 break;
4725         }
4726
4727         return 0;
4728 }
4729
4730 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4731                           void *p)
4732 {
4733         int ret_val;
4734
4735         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4736                                          __igb_notify_dca);
4737
4738         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4739 }
4740 #endif /* CONFIG_IGB_DCA */
4741
4742 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4743 {
4744         struct e1000_hw *hw = &adapter->hw;
4745         u32 ping;
4746         int i;
4747
4748         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4749                 ping = E1000_PF_CONTROL_MSG;
4750                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4751                         ping |= E1000_VT_MSGTYPE_CTS;
4752                 igb_write_mbx(hw, &ping, 1, i);
4753         }
4754 }
4755
4756 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4757 {
4758         struct e1000_hw *hw = &adapter->hw;
4759         u32 vmolr = rd32(E1000_VMOLR(vf));
4760         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4761
4762         vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4763                             IGB_VF_FLAG_MULTI_PROMISC);
4764         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4765
4766         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4767                 vmolr |= E1000_VMOLR_MPME;
4768                 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4769                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4770         } else {
4771                 /*
4772                  * if we have hashes and we are clearing a multicast promisc
4773                  * flag we need to write the hashes to the MTA as this step
4774                  * was previously skipped
4775                  */
4776                 if (vf_data->num_vf_mc_hashes > 30) {
4777                         vmolr |= E1000_VMOLR_MPME;
4778                 } else if (vf_data->num_vf_mc_hashes) {
4779                         int j;
4780                         vmolr |= E1000_VMOLR_ROMPE;
4781                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4782                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4783                 }
4784         }
4785
4786         wr32(E1000_VMOLR(vf), vmolr);
4787
4788         /* there are flags left unprocessed, likely not supported */
4789         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4790                 return -EINVAL;
4791
4792         return 0;
4793
4794 }
4795
4796 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4797                                   u32 *msgbuf, u32 vf)
4798 {
4799         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4800         u16 *hash_list = (u16 *)&msgbuf[1];
4801         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4802         int i;
4803
4804         /* salt away the number of multicast addresses assigned
4805          * to this VF for later use to restore when the PF multi cast
4806          * list changes
4807          */
4808         vf_data->num_vf_mc_hashes = n;
4809
4810         /* only up to 30 hash values supported */
4811         if (n > 30)
4812                 n = 30;
4813
4814         /* store the hashes for later use */
4815         for (i = 0; i < n; i++)
4816                 vf_data->vf_mc_hashes[i] = hash_list[i];
4817
4818         /* Flush and reset the mta with the new values */
4819         igb_set_rx_mode(adapter->netdev);
4820
4821         return 0;
4822 }
4823
4824 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4825 {
4826         struct e1000_hw *hw = &adapter->hw;
4827         struct vf_data_storage *vf_data;
4828         int i, j;
4829
4830         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4831                 u32 vmolr = rd32(E1000_VMOLR(i));
4832                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4833
4834                 vf_data = &adapter->vf_data[i];
4835
4836                 if ((vf_data->num_vf_mc_hashes > 30) ||
4837                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4838                         vmolr |= E1000_VMOLR_MPME;
4839                 } else if (vf_data->num_vf_mc_hashes) {
4840                         vmolr |= E1000_VMOLR_ROMPE;
4841                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4842                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4843                 }
4844                 wr32(E1000_VMOLR(i), vmolr);
4845         }
4846 }
4847
4848 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4849 {
4850         struct e1000_hw *hw = &adapter->hw;
4851         u32 pool_mask, reg, vid;
4852         int i;
4853
4854         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4855
4856         /* Find the vlan filter for this id */
4857         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4858                 reg = rd32(E1000_VLVF(i));
4859
4860                 /* remove the vf from the pool */
4861                 reg &= ~pool_mask;
4862
4863                 /* if pool is empty then remove entry from vfta */
4864                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4865                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4866                         reg = 0;
4867                         vid = reg & E1000_VLVF_VLANID_MASK;
4868                         igb_vfta_set(hw, vid, false);
4869                 }
4870
4871                 wr32(E1000_VLVF(i), reg);
4872         }
4873
4874         adapter->vf_data[vf].vlans_enabled = 0;
4875 }
4876
4877 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4878 {
4879         struct e1000_hw *hw = &adapter->hw;
4880         u32 reg, i;
4881
4882         /* The vlvf table only exists on 82576 hardware and newer */
4883         if (hw->mac.type < e1000_82576)
4884                 return -1;
4885
4886         /* we only need to do this if VMDq is enabled */
4887         if (!adapter->vfs_allocated_count)
4888                 return -1;
4889
4890         /* Find the vlan filter for this id */
4891         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4892                 reg = rd32(E1000_VLVF(i));
4893                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4894                     vid == (reg & E1000_VLVF_VLANID_MASK))
4895                         break;
4896         }
4897
4898         if (add) {
4899                 if (i == E1000_VLVF_ARRAY_SIZE) {
4900                         /* Did not find a matching VLAN ID entry that was
4901                          * enabled.  Search for a free filter entry, i.e.
4902                          * one without the enable bit set
4903                          */
4904                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4905                                 reg = rd32(E1000_VLVF(i));
4906                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4907                                         break;
4908                         }
4909                 }
4910                 if (i < E1000_VLVF_ARRAY_SIZE) {
4911                         /* Found an enabled/available entry */
4912                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4913
4914                         /* if !enabled we need to set this up in vfta */
4915                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4916                                 /* add VID to filter table */
4917                                 igb_vfta_set(hw, vid, true);
4918                                 reg |= E1000_VLVF_VLANID_ENABLE;
4919                         }
4920                         reg &= ~E1000_VLVF_VLANID_MASK;
4921                         reg |= vid;
4922                         wr32(E1000_VLVF(i), reg);
4923
4924                         /* do not modify RLPML for PF devices */
4925                         if (vf >= adapter->vfs_allocated_count)
4926                                 return 0;
4927
4928                         if (!adapter->vf_data[vf].vlans_enabled) {
4929                                 u32 size;
4930                                 reg = rd32(E1000_VMOLR(vf));
4931                                 size = reg & E1000_VMOLR_RLPML_MASK;
4932                                 size += 4;
4933                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4934                                 reg |= size;
4935                                 wr32(E1000_VMOLR(vf), reg);
4936                         }
4937
4938                         adapter->vf_data[vf].vlans_enabled++;
4939                         return 0;
4940                 }
4941         } else {
4942                 if (i < E1000_VLVF_ARRAY_SIZE) {
4943                         /* remove vf from the pool */
4944                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4945                         /* if pool is empty then remove entry from vfta */
4946                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4947                                 reg = 0;
4948                                 igb_vfta_set(hw, vid, false);
4949                         }
4950                         wr32(E1000_VLVF(i), reg);
4951
4952                         /* do not modify RLPML for PF devices */
4953                         if (vf >= adapter->vfs_allocated_count)
4954                                 return 0;
4955
4956                         adapter->vf_data[vf].vlans_enabled--;
4957                         if (!adapter->vf_data[vf].vlans_enabled) {
4958                                 u32 size;
4959                                 reg = rd32(E1000_VMOLR(vf));
4960                                 size = reg & E1000_VMOLR_RLPML_MASK;
4961                                 size -= 4;
4962                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4963                                 reg |= size;
4964                                 wr32(E1000_VMOLR(vf), reg);
4965                         }
4966                 }
4967         }
4968         return 0;
4969 }
4970
4971 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
4972 {
4973         struct e1000_hw *hw = &adapter->hw;
4974
4975         if (vid)
4976                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
4977         else
4978                 wr32(E1000_VMVIR(vf), 0);
4979 }
4980
4981 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
4982                                int vf, u16 vlan, u8 qos)
4983 {
4984         int err = 0;
4985         struct igb_adapter *adapter = netdev_priv(netdev);
4986
4987         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
4988                 return -EINVAL;
4989         if (vlan || qos) {
4990                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
4991                 if (err)
4992                         goto out;
4993                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
4994                 igb_set_vmolr(adapter, vf, !vlan);
4995                 adapter->vf_data[vf].pf_vlan = vlan;
4996                 adapter->vf_data[vf].pf_qos = qos;
4997                 dev_info(&adapter->pdev->dev,
4998                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
4999                 if (test_bit(__IGB_DOWN, &adapter->state)) {
5000                         dev_warn(&adapter->pdev->dev,
5001                                  "The VF VLAN has been set,"
5002                                  " but the PF device is not up.\n");
5003                         dev_warn(&adapter->pdev->dev,
5004                                  "Bring the PF device up before"
5005                                  " attempting to use the VF device.\n");
5006                 }
5007         } else {
5008                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5009                                    false, vf);
5010                 igb_set_vmvir(adapter, vlan, vf);
5011                 igb_set_vmolr(adapter, vf, true);
5012                 adapter->vf_data[vf].pf_vlan = 0;
5013                 adapter->vf_data[vf].pf_qos = 0;
5014        }
5015 out:
5016        return err;
5017 }
5018
5019 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5020 {
5021         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5022         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5023
5024         return igb_vlvf_set(adapter, vid, add, vf);
5025 }
5026
5027 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5028 {
5029         /* clear flags - except flag that indicates PF has set the MAC */
5030         adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5031         adapter->vf_data[vf].last_nack = jiffies;
5032
5033         /* reset offloads to defaults */
5034         igb_set_vmolr(adapter, vf, true);
5035
5036         /* reset vlans for device */
5037         igb_clear_vf_vfta(adapter, vf);
5038         if (adapter->vf_data[vf].pf_vlan)
5039                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5040                                     adapter->vf_data[vf].pf_vlan,
5041                                     adapter->vf_data[vf].pf_qos);
5042         else
5043                 igb_clear_vf_vfta(adapter, vf);
5044
5045         /* reset multicast table array for vf */
5046         adapter->vf_data[vf].num_vf_mc_hashes = 0;
5047
5048         /* Flush and reset the mta with the new values */
5049         igb_set_rx_mode(adapter->netdev);
5050 }
5051
5052 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5053 {
5054         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5055
5056         /* generate a new mac address as we were hotplug removed/added */
5057         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5058                 random_ether_addr(vf_mac);
5059
5060         /* process remaining reset events */
5061         igb_vf_reset(adapter, vf);
5062 }
5063
5064 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5065 {
5066         struct e1000_hw *hw = &adapter->hw;
5067         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5068         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5069         u32 reg, msgbuf[3];
5070         u8 *addr = (u8 *)(&msgbuf[1]);
5071
5072         /* process all the same items cleared in a function level reset */
5073         igb_vf_reset(adapter, vf);
5074
5075         /* set vf mac address */
5076         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5077
5078         /* enable transmit and receive for vf */
5079         reg = rd32(E1000_VFTE);
5080         wr32(E1000_VFTE, reg | (1 << vf));
5081         reg = rd32(E1000_VFRE);
5082         wr32(E1000_VFRE, reg | (1 << vf));
5083
5084         adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5085
5086         /* reply to reset with ack and vf mac address */
5087         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5088         memcpy(addr, vf_mac, 6);
5089         igb_write_mbx(hw, msgbuf, 3, vf);
5090 }
5091
5092 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5093 {
5094         /*
5095          * The VF MAC Address is stored in a packed array of bytes
5096          * starting at the second 32 bit word of the msg array
5097          */
5098         unsigned char *addr = (char *)&msg[1];
5099         int err = -1;
5100
5101         if (is_valid_ether_addr(addr))
5102                 err = igb_set_vf_mac(adapter, vf, addr);
5103
5104         return err;
5105 }
5106
5107 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5108 {
5109         struct e1000_hw *hw = &adapter->hw;
5110         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5111         u32 msg = E1000_VT_MSGTYPE_NACK;
5112
5113         /* if device isn't clear to send it shouldn't be reading either */
5114         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5115             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5116                 igb_write_mbx(hw, &msg, 1, vf);
5117                 vf_data->last_nack = jiffies;
5118         }
5119 }
5120
5121 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5122 {
5123         struct pci_dev *pdev = adapter->pdev;
5124         u32 msgbuf[E1000_VFMAILBOX_SIZE];
5125         struct e1000_hw *hw = &adapter->hw;
5126         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5127         s32 retval;
5128
5129         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5130
5131         if (retval) {
5132                 /* if receive failed revoke VF CTS stats and restart init */
5133                 dev_err(&pdev->dev, "Error receiving message from VF\n");
5134                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5135                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5136                         return;
5137                 goto out;
5138         }
5139
5140         /* this is a message we already processed, do nothing */
5141         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5142                 return;
5143
5144         /*
5145          * until the vf completes a reset it should not be
5146          * allowed to start any configuration.
5147          */
5148
5149         if (msgbuf[0] == E1000_VF_RESET) {
5150                 igb_vf_reset_msg(adapter, vf);
5151                 return;
5152         }
5153
5154         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5155                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5156                         return;
5157                 retval = -1;
5158                 goto out;
5159         }
5160
5161         switch ((msgbuf[0] & 0xFFFF)) {
5162         case E1000_VF_SET_MAC_ADDR:
5163                 retval = -EINVAL;
5164                 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5165                         retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5166                 else
5167                         dev_warn(&pdev->dev,
5168                                  "VF %d attempted to override administratively "
5169                                  "set MAC address\nReload the VF driver to "
5170                                  "resume operations\n", vf);
5171                 break;
5172         case E1000_VF_SET_PROMISC:
5173                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5174                 break;
5175         case E1000_VF_SET_MULTICAST:
5176                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5177                 break;
5178         case E1000_VF_SET_LPE:
5179                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5180                 break;
5181         case E1000_VF_SET_VLAN:
5182                 retval = -1;
5183                 if (vf_data->pf_vlan)
5184                         dev_warn(&pdev->dev,
5185                                  "VF %d attempted to override administratively "
5186                                  "set VLAN tag\nReload the VF driver to "
5187                                  "resume operations\n", vf);
5188                 else
5189                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5190                 break;
5191         default:
5192                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5193                 retval = -1;
5194                 break;
5195         }
5196
5197         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5198 out:
5199         /* notify the VF of the results of what it sent us */
5200         if (retval)
5201                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5202         else
5203                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5204
5205         igb_write_mbx(hw, msgbuf, 1, vf);
5206 }
5207
5208 static void igb_msg_task(struct igb_adapter *adapter)
5209 {
5210         struct e1000_hw *hw = &adapter->hw;
5211         u32 vf;
5212
5213         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5214                 /* process any reset requests */
5215                 if (!igb_check_for_rst(hw, vf))
5216                         igb_vf_reset_event(adapter, vf);
5217
5218                 /* process any messages pending */
5219                 if (!igb_check_for_msg(hw, vf))
5220                         igb_rcv_msg_from_vf(adapter, vf);
5221
5222                 /* process any acks */
5223                 if (!igb_check_for_ack(hw, vf))
5224                         igb_rcv_ack_from_vf(adapter, vf);
5225         }
5226 }
5227
5228 /**
5229  *  igb_set_uta - Set unicast filter table address
5230  *  @adapter: board private structure
5231  *
5232  *  The unicast table address is a register array of 32-bit registers.
5233  *  The table is meant to be used in a way similar to how the MTA is used
5234  *  however due to certain limitations in the hardware it is necessary to
5235  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
5236  *  enable bit to allow vlan tag stripping when promiscous mode is enabled
5237  **/
5238 static void igb_set_uta(struct igb_adapter *adapter)
5239 {
5240         struct e1000_hw *hw = &adapter->hw;
5241         int i;
5242
5243         /* The UTA table only exists on 82576 hardware and newer */
5244         if (hw->mac.type < e1000_82576)
5245                 return;
5246
5247         /* we only need to do this if VMDq is enabled */
5248         if (!adapter->vfs_allocated_count)
5249                 return;
5250
5251         for (i = 0; i < hw->mac.uta_reg_count; i++)
5252                 array_wr32(E1000_UTA, i, ~0);
5253 }
5254
5255 /**
5256  * igb_intr_msi - Interrupt Handler
5257  * @irq: interrupt number
5258  * @data: pointer to a network interface device structure
5259  **/
5260 static irqreturn_t igb_intr_msi(int irq, void *data)
5261 {
5262         struct igb_adapter *adapter = data;
5263         struct igb_q_vector *q_vector = adapter->q_vector[0];
5264         struct e1000_hw *hw = &adapter->hw;
5265         /* read ICR disables interrupts using IAM */
5266         u32 icr = rd32(E1000_ICR);
5267
5268         igb_write_itr(q_vector);
5269
5270         if (icr & E1000_ICR_DRSTA)
5271                 schedule_work(&adapter->reset_task);
5272
5273         if (icr & E1000_ICR_DOUTSYNC) {
5274                 /* HW is reporting DMA is out of sync */
5275                 adapter->stats.doosync++;
5276         }
5277
5278         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5279                 hw->mac.get_link_status = 1;
5280                 if (!test_bit(__IGB_DOWN, &adapter->state))
5281                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5282         }
5283
5284         napi_schedule(&q_vector->napi);
5285
5286         return IRQ_HANDLED;
5287 }
5288
5289 /**
5290  * igb_intr - Legacy Interrupt Handler
5291  * @irq: interrupt number
5292  * @data: pointer to a network interface device structure
5293  **/
5294 static irqreturn_t igb_intr(int irq, void *data)
5295 {
5296         struct igb_adapter *adapter = data;
5297         struct igb_q_vector *q_vector = adapter->q_vector[0];
5298         struct e1000_hw *hw = &adapter->hw;
5299         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5300          * need for the IMC write */
5301         u32 icr = rd32(E1000_ICR);
5302         if (!icr)
5303                 return IRQ_NONE;  /* Not our interrupt */
5304
5305         igb_write_itr(q_vector);
5306
5307         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5308          * not set, then the adapter didn't send an interrupt */
5309         if (!(icr & E1000_ICR_INT_ASSERTED))
5310                 return IRQ_NONE;
5311
5312         if (icr & E1000_ICR_DRSTA)
5313                 schedule_work(&adapter->reset_task);
5314
5315         if (icr & E1000_ICR_DOUTSYNC) {
5316                 /* HW is reporting DMA is out of sync */
5317                 adapter->stats.doosync++;
5318         }
5319
5320         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5321                 hw->mac.get_link_status = 1;
5322                 /* guard against interrupt when we're going down */
5323                 if (!test_bit(__IGB_DOWN, &adapter->state))
5324                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5325         }
5326
5327         napi_schedule(&q_vector->napi);
5328
5329         return IRQ_HANDLED;
5330 }
5331
5332 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5333 {
5334         struct igb_adapter *adapter = q_vector->adapter;
5335         struct e1000_hw *hw = &adapter->hw;
5336
5337         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5338             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5339                 if (!adapter->msix_entries)
5340                         igb_set_itr(adapter);
5341                 else
5342                         igb_update_ring_itr(q_vector);
5343         }
5344
5345         if (!test_bit(__IGB_DOWN, &adapter->state)) {
5346                 if (adapter->msix_entries)
5347                         wr32(E1000_EIMS, q_vector->eims_value);
5348                 else
5349                         igb_irq_enable(adapter);
5350         }
5351 }
5352
5353 /**
5354  * igb_poll - NAPI Rx polling callback
5355  * @napi: napi polling structure
5356  * @budget: count of how many packets we should handle
5357  **/
5358 static int igb_poll(struct napi_struct *napi, int budget)
5359 {
5360         struct igb_q_vector *q_vector = container_of(napi,
5361                                                      struct igb_q_vector,
5362                                                      napi);
5363         int tx_clean_complete = 1, work_done = 0;
5364
5365 #ifdef CONFIG_IGB_DCA
5366         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5367                 igb_update_dca(q_vector);
5368 #endif
5369         if (q_vector->tx_ring)
5370                 tx_clean_complete = igb_clean_tx_irq(q_vector);
5371
5372         if (q_vector->rx_ring)
5373                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
5374
5375         if (!tx_clean_complete)
5376                 work_done = budget;
5377
5378         /* If not enough Rx work done, exit the polling mode */
5379         if (work_done < budget) {
5380                 napi_complete(napi);
5381                 igb_ring_irq_enable(q_vector);
5382         }
5383
5384         return work_done;
5385 }
5386
5387 /**
5388  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5389  * @adapter: board private structure
5390  * @shhwtstamps: timestamp structure to update
5391  * @regval: unsigned 64bit system time value.
5392  *
5393  * We need to convert the system time value stored in the RX/TXSTMP registers
5394  * into a hwtstamp which can be used by the upper level timestamping functions
5395  */
5396 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5397                                    struct skb_shared_hwtstamps *shhwtstamps,
5398                                    u64 regval)
5399 {
5400         u64 ns;
5401
5402         /*
5403          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5404          * 24 to match clock shift we setup earlier.
5405          */
5406         if (adapter->hw.mac.type == e1000_82580)
5407                 regval <<= IGB_82580_TSYNC_SHIFT;
5408
5409         ns = timecounter_cyc2time(&adapter->clock, regval);
5410         timecompare_update(&adapter->compare, ns);
5411         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5412         shhwtstamps->hwtstamp = ns_to_ktime(ns);
5413         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5414 }
5415
5416 /**
5417  * igb_tx_hwtstamp - utility function which checks for TX time stamp
5418  * @q_vector: pointer to q_vector containing needed info
5419  * @buffer: pointer to igb_buffer structure
5420  *
5421  * If we were asked to do hardware stamping and such a time stamp is
5422  * available, then it must have been for this skb here because we only
5423  * allow only one such packet into the queue.
5424  */
5425 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *buffer_info)
5426 {
5427         struct igb_adapter *adapter = q_vector->adapter;
5428         struct e1000_hw *hw = &adapter->hw;
5429         struct skb_shared_hwtstamps shhwtstamps;
5430         u64 regval;
5431
5432         /* if skb does not support hw timestamp or TX stamp not valid exit */
5433         if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) ||
5434             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5435                 return;
5436
5437         regval = rd32(E1000_TXSTMPL);
5438         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5439
5440         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5441         skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5442 }
5443
5444 /**
5445  * igb_clean_tx_irq - Reclaim resources after transmit completes
5446  * @q_vector: pointer to q_vector containing needed info
5447  * returns true if ring is completely cleaned
5448  **/
5449 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5450 {
5451         struct igb_adapter *adapter = q_vector->adapter;
5452         struct igb_ring *tx_ring = q_vector->tx_ring;
5453         struct net_device *netdev = tx_ring->netdev;
5454         struct e1000_hw *hw = &adapter->hw;
5455         struct igb_buffer *buffer_info;
5456         union e1000_adv_tx_desc *tx_desc, *eop_desc;
5457         unsigned int total_bytes = 0, total_packets = 0;
5458         unsigned int i, eop, count = 0;
5459         bool cleaned = false;
5460
5461         i = tx_ring->next_to_clean;
5462         eop = tx_ring->buffer_info[i].next_to_watch;
5463         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5464
5465         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5466                (count < tx_ring->count)) {
5467                 rmb();  /* read buffer_info after eop_desc status */
5468                 for (cleaned = false; !cleaned; count++) {
5469                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5470                         buffer_info = &tx_ring->buffer_info[i];
5471                         cleaned = (i == eop);
5472
5473                         if (buffer_info->skb) {
5474                                 total_bytes += buffer_info->bytecount;
5475                                 /* gso_segs is currently only valid for tcp */
5476                                 total_packets += buffer_info->gso_segs;
5477                                 igb_tx_hwtstamp(q_vector, buffer_info);
5478                         }
5479
5480                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5481                         tx_desc->wb.status = 0;
5482
5483                         i++;
5484                         if (i == tx_ring->count)
5485                                 i = 0;
5486                 }
5487                 eop = tx_ring->buffer_info[i].next_to_watch;
5488                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5489         }
5490
5491         tx_ring->next_to_clean = i;
5492
5493         if (unlikely(count &&
5494                      netif_carrier_ok(netdev) &&
5495                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5496                 /* Make sure that anybody stopping the queue after this
5497                  * sees the new next_to_clean.
5498                  */
5499                 smp_mb();
5500                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5501                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5502                         netif_wake_subqueue(netdev, tx_ring->queue_index);
5503
5504                         u64_stats_update_begin(&tx_ring->tx_syncp);
5505                         tx_ring->tx_stats.restart_queue++;
5506                         u64_stats_update_end(&tx_ring->tx_syncp);
5507                 }
5508         }
5509
5510         if (tx_ring->detect_tx_hung) {
5511                 /* Detect a transmit hang in hardware, this serializes the
5512                  * check with the clearing of time_stamp and movement of i */
5513                 tx_ring->detect_tx_hung = false;
5514                 if (tx_ring->buffer_info[i].time_stamp &&
5515                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5516                                (adapter->tx_timeout_factor * HZ)) &&
5517                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5518
5519                         /* detected Tx unit hang */
5520                         dev_err(tx_ring->dev,
5521                                 "Detected Tx Unit Hang\n"
5522                                 "  Tx Queue             <%d>\n"
5523                                 "  TDH                  <%x>\n"
5524                                 "  TDT                  <%x>\n"
5525                                 "  next_to_use          <%x>\n"
5526                                 "  next_to_clean        <%x>\n"
5527                                 "buffer_info[next_to_clean]\n"
5528                                 "  time_stamp           <%lx>\n"
5529                                 "  next_to_watch        <%x>\n"
5530                                 "  jiffies              <%lx>\n"
5531                                 "  desc.status          <%x>\n",
5532                                 tx_ring->queue_index,
5533                                 readl(tx_ring->head),
5534                                 readl(tx_ring->tail),
5535                                 tx_ring->next_to_use,
5536                                 tx_ring->next_to_clean,
5537                                 tx_ring->buffer_info[eop].time_stamp,
5538                                 eop,
5539                                 jiffies,
5540                                 eop_desc->wb.status);
5541                         netif_stop_subqueue(netdev, tx_ring->queue_index);
5542                 }
5543         }
5544         tx_ring->total_bytes += total_bytes;
5545         tx_ring->total_packets += total_packets;
5546         u64_stats_update_begin(&tx_ring->tx_syncp);
5547         tx_ring->tx_stats.bytes += total_bytes;
5548         tx_ring->tx_stats.packets += total_packets;
5549         u64_stats_update_end(&tx_ring->tx_syncp);
5550         return count < tx_ring->count;
5551 }
5552
5553 /**
5554  * igb_receive_skb - helper function to handle rx indications
5555  * @q_vector: structure containing interrupt and ring information
5556  * @skb: packet to send up
5557  * @vlan_tag: vlan tag for packet
5558  **/
5559 static void igb_receive_skb(struct igb_q_vector *q_vector,
5560                             struct sk_buff *skb,
5561                             u16 vlan_tag)
5562 {
5563         struct igb_adapter *adapter = q_vector->adapter;
5564
5565         if (vlan_tag && adapter->vlgrp)
5566                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5567                                  vlan_tag, skb);
5568         else
5569                 napi_gro_receive(&q_vector->napi, skb);
5570 }
5571
5572 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5573                                        u32 status_err, struct sk_buff *skb)
5574 {
5575         skb_checksum_none_assert(skb);
5576
5577         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5578         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5579              (status_err & E1000_RXD_STAT_IXSM))
5580                 return;
5581
5582         /* TCP/UDP checksum error bit is set */
5583         if (status_err &
5584             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5585                 /*
5586                  * work around errata with sctp packets where the TCPE aka
5587                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5588                  * packets, (aka let the stack check the crc32c)
5589                  */
5590                 if ((skb->len == 60) &&
5591                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM)) {
5592                         u64_stats_update_begin(&ring->rx_syncp);
5593                         ring->rx_stats.csum_err++;
5594                         u64_stats_update_end(&ring->rx_syncp);
5595                 }
5596                 /* let the stack verify checksum errors */
5597                 return;
5598         }
5599         /* It must be a TCP or UDP packet with a valid checksum */
5600         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5601                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5602
5603         dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5604 }
5605
5606 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5607                                    struct sk_buff *skb)
5608 {
5609         struct igb_adapter *adapter = q_vector->adapter;
5610         struct e1000_hw *hw = &adapter->hw;
5611         u64 regval;
5612
5613         /*
5614          * If this bit is set, then the RX registers contain the time stamp. No
5615          * other packet will be time stamped until we read these registers, so
5616          * read the registers to make them available again. Because only one
5617          * packet can be time stamped at a time, we know that the register
5618          * values must belong to this one here and therefore we don't need to
5619          * compare any of the additional attributes stored for it.
5620          *
5621          * If nothing went wrong, then it should have a shared tx_flags that we
5622          * can turn into a skb_shared_hwtstamps.
5623          */
5624         if (staterr & E1000_RXDADV_STAT_TSIP) {
5625                 u32 *stamp = (u32 *)skb->data;
5626                 regval = le32_to_cpu(*(stamp + 2));
5627                 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5628                 skb_pull(skb, IGB_TS_HDR_LEN);
5629         } else {
5630                 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5631                         return;
5632
5633                 regval = rd32(E1000_RXSTMPL);
5634                 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5635         }
5636
5637         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5638 }
5639 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5640                                union e1000_adv_rx_desc *rx_desc)
5641 {
5642         /* HW will not DMA in data larger than the given buffer, even if it
5643          * parses the (NFS, of course) header to be larger.  In that case, it
5644          * fills the header buffer and spills the rest into the page.
5645          */
5646         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5647                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5648         if (hlen > rx_ring->rx_buffer_len)
5649                 hlen = rx_ring->rx_buffer_len;
5650         return hlen;
5651 }
5652
5653 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5654                                  int *work_done, int budget)
5655 {
5656         struct igb_ring *rx_ring = q_vector->rx_ring;
5657         struct net_device *netdev = rx_ring->netdev;
5658         struct device *dev = rx_ring->dev;
5659         union e1000_adv_rx_desc *rx_desc , *next_rxd;
5660         struct igb_buffer *buffer_info , *next_buffer;
5661         struct sk_buff *skb;
5662         bool cleaned = false;
5663         int cleaned_count = 0;
5664         int current_node = numa_node_id();
5665         unsigned int total_bytes = 0, total_packets = 0;
5666         unsigned int i;
5667         u32 staterr;
5668         u16 length;
5669         u16 vlan_tag;
5670
5671         i = rx_ring->next_to_clean;
5672         buffer_info = &rx_ring->buffer_info[i];
5673         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5674         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5675
5676         while (staterr & E1000_RXD_STAT_DD) {
5677                 if (*work_done >= budget)
5678                         break;
5679                 (*work_done)++;
5680                 rmb(); /* read descriptor and rx_buffer_info after status DD */
5681
5682                 skb = buffer_info->skb;
5683                 prefetch(skb->data - NET_IP_ALIGN);
5684                 buffer_info->skb = NULL;
5685
5686                 i++;
5687                 if (i == rx_ring->count)
5688                         i = 0;
5689
5690                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5691                 prefetch(next_rxd);
5692                 next_buffer = &rx_ring->buffer_info[i];
5693
5694                 length = le16_to_cpu(rx_desc->wb.upper.length);
5695                 cleaned = true;
5696                 cleaned_count++;
5697
5698                 if (buffer_info->dma) {
5699                         dma_unmap_single(dev, buffer_info->dma,
5700                                          rx_ring->rx_buffer_len,
5701                                          DMA_FROM_DEVICE);
5702                         buffer_info->dma = 0;
5703                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5704                                 skb_put(skb, length);
5705                                 goto send_up;
5706                         }
5707                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5708                 }
5709
5710                 if (length) {
5711                         dma_unmap_page(dev, buffer_info->page_dma,
5712                                        PAGE_SIZE / 2, DMA_FROM_DEVICE);
5713                         buffer_info->page_dma = 0;
5714
5715                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5716                                                 buffer_info->page,
5717                                                 buffer_info->page_offset,
5718                                                 length);
5719
5720                         if ((page_count(buffer_info->page) != 1) ||
5721                             (page_to_nid(buffer_info->page) != current_node))
5722                                 buffer_info->page = NULL;
5723                         else
5724                                 get_page(buffer_info->page);
5725
5726                         skb->len += length;
5727                         skb->data_len += length;
5728                         skb->truesize += length;
5729                 }
5730
5731                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5732                         buffer_info->skb = next_buffer->skb;
5733                         buffer_info->dma = next_buffer->dma;
5734                         next_buffer->skb = skb;
5735                         next_buffer->dma = 0;
5736                         goto next_desc;
5737                 }
5738 send_up:
5739                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5740                         dev_kfree_skb_irq(skb);
5741                         goto next_desc;
5742                 }
5743
5744                 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5745                         igb_rx_hwtstamp(q_vector, staterr, skb);
5746                 total_bytes += skb->len;
5747                 total_packets++;
5748
5749                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5750
5751                 skb->protocol = eth_type_trans(skb, netdev);
5752                 skb_record_rx_queue(skb, rx_ring->queue_index);
5753
5754                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5755                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5756
5757                 igb_receive_skb(q_vector, skb, vlan_tag);
5758
5759 next_desc:
5760                 rx_desc->wb.upper.status_error = 0;
5761
5762                 /* return some buffers to hardware, one at a time is too slow */
5763                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5764                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5765                         cleaned_count = 0;
5766                 }
5767
5768                 /* use prefetched values */
5769                 rx_desc = next_rxd;
5770                 buffer_info = next_buffer;
5771                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5772         }
5773
5774         rx_ring->next_to_clean = i;
5775         cleaned_count = igb_desc_unused(rx_ring);
5776
5777         if (cleaned_count)
5778                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5779
5780         rx_ring->total_packets += total_packets;
5781         rx_ring->total_bytes += total_bytes;
5782         u64_stats_update_begin(&rx_ring->rx_syncp);
5783         rx_ring->rx_stats.packets += total_packets;
5784         rx_ring->rx_stats.bytes += total_bytes;
5785         u64_stats_update_end(&rx_ring->rx_syncp);
5786         return cleaned;
5787 }
5788
5789 /**
5790  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5791  * @adapter: address of board private structure
5792  **/
5793 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5794 {
5795         struct net_device *netdev = rx_ring->netdev;
5796         union e1000_adv_rx_desc *rx_desc;
5797         struct igb_buffer *buffer_info;
5798         struct sk_buff *skb;
5799         unsigned int i;
5800         int bufsz;
5801
5802         i = rx_ring->next_to_use;
5803         buffer_info = &rx_ring->buffer_info[i];
5804
5805         bufsz = rx_ring->rx_buffer_len;
5806
5807         while (cleaned_count--) {
5808                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5809
5810                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5811                         if (!buffer_info->page) {
5812                                 buffer_info->page = netdev_alloc_page(netdev);
5813                                 if (unlikely(!buffer_info->page)) {
5814                                         u64_stats_update_begin(&rx_ring->rx_syncp);
5815                                         rx_ring->rx_stats.alloc_failed++;
5816                                         u64_stats_update_end(&rx_ring->rx_syncp);
5817                                         goto no_buffers;
5818                                 }
5819                                 buffer_info->page_offset = 0;
5820                         } else {
5821                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5822                         }
5823                         buffer_info->page_dma =
5824                                 dma_map_page(rx_ring->dev, buffer_info->page,
5825                                              buffer_info->page_offset,
5826                                              PAGE_SIZE / 2,
5827                                              DMA_FROM_DEVICE);
5828                         if (dma_mapping_error(rx_ring->dev,
5829                                               buffer_info->page_dma)) {
5830                                 buffer_info->page_dma = 0;
5831                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5832                                 rx_ring->rx_stats.alloc_failed++;
5833                                 u64_stats_update_end(&rx_ring->rx_syncp);
5834                                 goto no_buffers;
5835                         }
5836                 }
5837
5838                 skb = buffer_info->skb;
5839                 if (!skb) {
5840                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5841                         if (unlikely(!skb)) {
5842                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5843                                 rx_ring->rx_stats.alloc_failed++;
5844                                 u64_stats_update_end(&rx_ring->rx_syncp);
5845                                 goto no_buffers;
5846                         }
5847
5848                         buffer_info->skb = skb;
5849                 }
5850                 if (!buffer_info->dma) {
5851                         buffer_info->dma = dma_map_single(rx_ring->dev,
5852                                                           skb->data,
5853                                                           bufsz,
5854                                                           DMA_FROM_DEVICE);
5855                         if (dma_mapping_error(rx_ring->dev,
5856                                               buffer_info->dma)) {
5857                                 buffer_info->dma = 0;
5858                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5859                                 rx_ring->rx_stats.alloc_failed++;
5860                                 u64_stats_update_end(&rx_ring->rx_syncp);
5861                                 goto no_buffers;
5862                         }
5863                 }
5864                 /* Refresh the desc even if buffer_addrs didn't change because
5865                  * each write-back erases this info. */
5866                 if (bufsz < IGB_RXBUFFER_1024) {
5867                         rx_desc->read.pkt_addr =
5868                              cpu_to_le64(buffer_info->page_dma);
5869                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5870                 } else {
5871                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5872                         rx_desc->read.hdr_addr = 0;
5873                 }
5874
5875                 i++;
5876                 if (i == rx_ring->count)
5877                         i = 0;
5878                 buffer_info = &rx_ring->buffer_info[i];
5879         }
5880
5881 no_buffers:
5882         if (rx_ring->next_to_use != i) {
5883                 rx_ring->next_to_use = i;
5884                 if (i == 0)
5885                         i = (rx_ring->count - 1);
5886                 else
5887                         i--;
5888
5889                 /* Force memory writes to complete before letting h/w
5890                  * know there are new descriptors to fetch.  (Only
5891                  * applicable for weak-ordered memory model archs,
5892                  * such as IA-64). */
5893                 wmb();
5894                 writel(i, rx_ring->tail);
5895         }
5896 }
5897
5898 /**
5899  * igb_mii_ioctl -
5900  * @netdev:
5901  * @ifreq:
5902  * @cmd:
5903  **/
5904 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5905 {
5906         struct igb_adapter *adapter = netdev_priv(netdev);
5907         struct mii_ioctl_data *data = if_mii(ifr);
5908
5909         if (adapter->hw.phy.media_type != e1000_media_type_copper)
5910                 return -EOPNOTSUPP;
5911
5912         switch (cmd) {
5913         case SIOCGMIIPHY:
5914                 data->phy_id = adapter->hw.phy.addr;
5915                 break;
5916         case SIOCGMIIREG:
5917                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5918                                      &data->val_out))
5919                         return -EIO;
5920                 break;
5921         case SIOCSMIIREG:
5922         default:
5923                 return -EOPNOTSUPP;
5924         }
5925         return 0;
5926 }
5927
5928 /**
5929  * igb_hwtstamp_ioctl - control hardware time stamping
5930  * @netdev:
5931  * @ifreq:
5932  * @cmd:
5933  *
5934  * Outgoing time stamping can be enabled and disabled. Play nice and
5935  * disable it when requested, although it shouldn't case any overhead
5936  * when no packet needs it. At most one packet in the queue may be
5937  * marked for time stamping, otherwise it would be impossible to tell
5938  * for sure to which packet the hardware time stamp belongs.
5939  *
5940  * Incoming time stamping has to be configured via the hardware
5941  * filters. Not all combinations are supported, in particular event
5942  * type has to be specified. Matching the kind of event packet is
5943  * not supported, with the exception of "all V2 events regardless of
5944  * level 2 or 4".
5945  *
5946  **/
5947 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5948                               struct ifreq *ifr, int cmd)
5949 {
5950         struct igb_adapter *adapter = netdev_priv(netdev);
5951         struct e1000_hw *hw = &adapter->hw;
5952         struct hwtstamp_config config;
5953         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5954         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5955         u32 tsync_rx_cfg = 0;
5956         bool is_l4 = false;
5957         bool is_l2 = false;
5958         u32 regval;
5959
5960         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5961                 return -EFAULT;
5962
5963         /* reserved for future extensions */
5964         if (config.flags)
5965                 return -EINVAL;
5966
5967         switch (config.tx_type) {
5968         case HWTSTAMP_TX_OFF:
5969                 tsync_tx_ctl = 0;
5970         case HWTSTAMP_TX_ON:
5971                 break;
5972         default:
5973                 return -ERANGE;
5974         }
5975
5976         switch (config.rx_filter) {
5977         case HWTSTAMP_FILTER_NONE:
5978                 tsync_rx_ctl = 0;
5979                 break;
5980         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5981         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5982         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5983         case HWTSTAMP_FILTER_ALL:
5984                 /*
5985                  * register TSYNCRXCFG must be set, therefore it is not
5986                  * possible to time stamp both Sync and Delay_Req messages
5987                  * => fall back to time stamping all packets
5988                  */
5989                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5990                 config.rx_filter = HWTSTAMP_FILTER_ALL;
5991                 break;
5992         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5993                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5994                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5995                 is_l4 = true;
5996                 break;
5997         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5998                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5999                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6000                 is_l4 = true;
6001                 break;
6002         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6003         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6004                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6005                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6006                 is_l2 = true;
6007                 is_l4 = true;
6008                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6009                 break;
6010         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6011         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6012                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6013                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6014                 is_l2 = true;
6015                 is_l4 = true;
6016                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6017                 break;
6018         case HWTSTAMP_FILTER_PTP_V2_EVENT:
6019         case HWTSTAMP_FILTER_PTP_V2_SYNC:
6020         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6021                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6022                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6023                 is_l2 = true;
6024                 break;
6025         default:
6026                 return -ERANGE;
6027         }
6028
6029         if (hw->mac.type == e1000_82575) {
6030                 if (tsync_rx_ctl | tsync_tx_ctl)
6031                         return -EINVAL;
6032                 return 0;
6033         }
6034
6035         /*
6036          * Per-packet timestamping only works if all packets are
6037          * timestamped, so enable timestamping in all packets as
6038          * long as one rx filter was configured.
6039          */
6040         if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
6041                 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6042                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6043         }
6044
6045         /* enable/disable TX */
6046         regval = rd32(E1000_TSYNCTXCTL);
6047         regval &= ~E1000_TSYNCTXCTL_ENABLED;
6048         regval |= tsync_tx_ctl;
6049         wr32(E1000_TSYNCTXCTL, regval);
6050
6051         /* enable/disable RX */
6052         regval = rd32(E1000_TSYNCRXCTL);
6053         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6054         regval |= tsync_rx_ctl;
6055         wr32(E1000_TSYNCRXCTL, regval);
6056
6057         /* define which PTP packets are time stamped */
6058         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6059
6060         /* define ethertype filter for timestamped packets */
6061         if (is_l2)
6062                 wr32(E1000_ETQF(3),
6063                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6064                                  E1000_ETQF_1588 | /* enable timestamping */
6065                                  ETH_P_1588));     /* 1588 eth protocol type */
6066         else
6067                 wr32(E1000_ETQF(3), 0);
6068
6069 #define PTP_PORT 319
6070         /* L4 Queue Filter[3]: filter by destination port and protocol */
6071         if (is_l4) {
6072                 u32 ftqf = (IPPROTO_UDP /* UDP */
6073                         | E1000_FTQF_VF_BP /* VF not compared */
6074                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6075                         | E1000_FTQF_MASK); /* mask all inputs */
6076                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6077
6078                 wr32(E1000_IMIR(3), htons(PTP_PORT));
6079                 wr32(E1000_IMIREXT(3),
6080                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6081                 if (hw->mac.type == e1000_82576) {
6082                         /* enable source port check */
6083                         wr32(E1000_SPQF(3), htons(PTP_PORT));
6084                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6085                 }
6086                 wr32(E1000_FTQF(3), ftqf);
6087         } else {
6088                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6089         }
6090         wrfl();
6091
6092         adapter->hwtstamp_config = config;
6093
6094         /* clear TX/RX time stamp registers, just to be sure */
6095         regval = rd32(E1000_TXSTMPH);
6096         regval = rd32(E1000_RXSTMPH);
6097
6098         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6099                 -EFAULT : 0;
6100 }
6101
6102 /**
6103  * igb_ioctl -
6104  * @netdev:
6105  * @ifreq:
6106  * @cmd:
6107  **/
6108 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6109 {
6110         switch (cmd) {
6111         case SIOCGMIIPHY:
6112         case SIOCGMIIREG:
6113         case SIOCSMIIREG:
6114                 return igb_mii_ioctl(netdev, ifr, cmd);
6115         case SIOCSHWTSTAMP:
6116                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6117         default:
6118                 return -EOPNOTSUPP;
6119         }
6120 }
6121
6122 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6123 {
6124         struct igb_adapter *adapter = hw->back;
6125         u16 cap_offset;
6126
6127         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6128         if (!cap_offset)
6129                 return -E1000_ERR_CONFIG;
6130
6131         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6132
6133         return 0;
6134 }
6135
6136 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6137 {
6138         struct igb_adapter *adapter = hw->back;
6139         u16 cap_offset;
6140
6141         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6142         if (!cap_offset)
6143                 return -E1000_ERR_CONFIG;
6144
6145         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6146
6147         return 0;
6148 }
6149
6150 static void igb_vlan_rx_register(struct net_device *netdev,
6151                                  struct vlan_group *grp)
6152 {
6153         struct igb_adapter *adapter = netdev_priv(netdev);
6154         struct e1000_hw *hw = &adapter->hw;
6155         u32 ctrl, rctl;
6156
6157         igb_irq_disable(adapter);
6158         adapter->vlgrp = grp;
6159
6160         if (grp) {
6161                 /* enable VLAN tag insert/strip */
6162                 ctrl = rd32(E1000_CTRL);
6163                 ctrl |= E1000_CTRL_VME;
6164                 wr32(E1000_CTRL, ctrl);
6165
6166                 /* Disable CFI check */
6167                 rctl = rd32(E1000_RCTL);
6168                 rctl &= ~E1000_RCTL_CFIEN;
6169                 wr32(E1000_RCTL, rctl);
6170         } else {
6171                 /* disable VLAN tag insert/strip */
6172                 ctrl = rd32(E1000_CTRL);
6173                 ctrl &= ~E1000_CTRL_VME;
6174                 wr32(E1000_CTRL, ctrl);
6175         }
6176
6177         igb_rlpml_set(adapter);
6178
6179         if (!test_bit(__IGB_DOWN, &adapter->state))
6180                 igb_irq_enable(adapter);
6181 }
6182
6183 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6184 {
6185         struct igb_adapter *adapter = netdev_priv(netdev);
6186         struct e1000_hw *hw = &adapter->hw;
6187         int pf_id = adapter->vfs_allocated_count;
6188
6189         /* attempt to add filter to vlvf array */
6190         igb_vlvf_set(adapter, vid, true, pf_id);
6191
6192         /* add the filter since PF can receive vlans w/o entry in vlvf */
6193         igb_vfta_set(hw, vid, true);
6194 }
6195
6196 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6197 {
6198         struct igb_adapter *adapter = netdev_priv(netdev);
6199         struct e1000_hw *hw = &adapter->hw;
6200         int pf_id = adapter->vfs_allocated_count;
6201         s32 err;
6202
6203         igb_irq_disable(adapter);
6204         vlan_group_set_device(adapter->vlgrp, vid, NULL);
6205
6206         if (!test_bit(__IGB_DOWN, &adapter->state))
6207                 igb_irq_enable(adapter);
6208
6209         /* remove vlan from VLVF table array */
6210         err = igb_vlvf_set(adapter, vid, false, pf_id);
6211
6212         /* if vid was not present in VLVF just remove it from table */
6213         if (err)
6214                 igb_vfta_set(hw, vid, false);
6215 }
6216
6217 static void igb_restore_vlan(struct igb_adapter *adapter)
6218 {
6219         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
6220
6221         if (adapter->vlgrp) {
6222                 u16 vid;
6223                 for (vid = 0; vid < VLAN_N_VID; vid++) {
6224                         if (!vlan_group_get_device(adapter->vlgrp, vid))
6225                                 continue;
6226                         igb_vlan_rx_add_vid(adapter->netdev, vid);
6227                 }
6228         }
6229 }
6230
6231 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
6232 {
6233         struct pci_dev *pdev = adapter->pdev;
6234         struct e1000_mac_info *mac = &adapter->hw.mac;
6235
6236         mac->autoneg = 0;
6237
6238         /* Fiber NIC's only allow 1000 Gbps Full duplex */
6239         if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6240                 spddplx != (SPEED_1000 + DUPLEX_FULL)) {
6241                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6242                 return -EINVAL;
6243         }
6244
6245         switch (spddplx) {
6246         case SPEED_10 + DUPLEX_HALF:
6247                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6248                 break;
6249         case SPEED_10 + DUPLEX_FULL:
6250                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6251                 break;
6252         case SPEED_100 + DUPLEX_HALF:
6253                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6254                 break;
6255         case SPEED_100 + DUPLEX_FULL:
6256                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6257                 break;
6258         case SPEED_1000 + DUPLEX_FULL:
6259                 mac->autoneg = 1;
6260                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6261                 break;
6262         case SPEED_1000 + DUPLEX_HALF: /* not supported */
6263         default:
6264                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6265                 return -EINVAL;
6266         }
6267         return 0;
6268 }
6269
6270 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6271 {
6272         struct net_device *netdev = pci_get_drvdata(pdev);
6273         struct igb_adapter *adapter = netdev_priv(netdev);
6274         struct e1000_hw *hw = &adapter->hw;
6275         u32 ctrl, rctl, status;
6276         u32 wufc = adapter->wol;
6277 #ifdef CONFIG_PM
6278         int retval = 0;
6279 #endif
6280
6281         netif_device_detach(netdev);
6282
6283         if (netif_running(netdev))
6284                 igb_close(netdev);
6285
6286         igb_clear_interrupt_scheme(adapter);
6287
6288 #ifdef CONFIG_PM
6289         retval = pci_save_state(pdev);
6290         if (retval)
6291                 return retval;
6292 #endif
6293
6294         status = rd32(E1000_STATUS);
6295         if (status & E1000_STATUS_LU)
6296                 wufc &= ~E1000_WUFC_LNKC;
6297
6298         if (wufc) {
6299                 igb_setup_rctl(adapter);
6300                 igb_set_rx_mode(netdev);
6301
6302                 /* turn on all-multi mode if wake on multicast is enabled */
6303                 if (wufc & E1000_WUFC_MC) {
6304                         rctl = rd32(E1000_RCTL);
6305                         rctl |= E1000_RCTL_MPE;
6306                         wr32(E1000_RCTL, rctl);
6307                 }
6308
6309                 ctrl = rd32(E1000_CTRL);
6310                 /* advertise wake from D3Cold */
6311                 #define E1000_CTRL_ADVD3WUC 0x00100000
6312                 /* phy power management enable */
6313                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6314                 ctrl |= E1000_CTRL_ADVD3WUC;
6315                 wr32(E1000_CTRL, ctrl);
6316
6317                 /* Allow time for pending master requests to run */
6318                 igb_disable_pcie_master(hw);
6319
6320                 wr32(E1000_WUC, E1000_WUC_PME_EN);
6321                 wr32(E1000_WUFC, wufc);
6322         } else {
6323                 wr32(E1000_WUC, 0);
6324                 wr32(E1000_WUFC, 0);
6325         }
6326
6327         *enable_wake = wufc || adapter->en_mng_pt;
6328         if (!*enable_wake)
6329                 igb_power_down_link(adapter);
6330         else
6331                 igb_power_up_link(adapter);
6332
6333         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6334          * would have already happened in close and is redundant. */
6335         igb_release_hw_control(adapter);
6336
6337         pci_disable_device(pdev);
6338
6339         return 0;
6340 }
6341
6342 #ifdef CONFIG_PM
6343 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6344 {
6345         int retval;
6346         bool wake;
6347
6348         retval = __igb_shutdown(pdev, &wake);
6349         if (retval)
6350                 return retval;
6351
6352         if (wake) {
6353                 pci_prepare_to_sleep(pdev);
6354         } else {
6355                 pci_wake_from_d3(pdev, false);
6356                 pci_set_power_state(pdev, PCI_D3hot);
6357         }
6358
6359         return 0;
6360 }
6361
6362 static int igb_resume(struct pci_dev *pdev)
6363 {
6364         struct net_device *netdev = pci_get_drvdata(pdev);
6365         struct igb_adapter *adapter = netdev_priv(netdev);
6366         struct e1000_hw *hw = &adapter->hw;
6367         u32 err;
6368
6369         pci_set_power_state(pdev, PCI_D0);
6370         pci_restore_state(pdev);
6371         pci_save_state(pdev);
6372
6373         err = pci_enable_device_mem(pdev);
6374         if (err) {
6375                 dev_err(&pdev->dev,
6376                         "igb: Cannot enable PCI device from suspend\n");
6377                 return err;
6378         }
6379         pci_set_master(pdev);
6380
6381         pci_enable_wake(pdev, PCI_D3hot, 0);
6382         pci_enable_wake(pdev, PCI_D3cold, 0);
6383
6384         if (igb_init_interrupt_scheme(adapter)) {
6385                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6386                 return -ENOMEM;
6387         }
6388
6389         igb_reset(adapter);
6390
6391         /* let the f/w know that the h/w is now under the control of the
6392          * driver. */
6393         igb_get_hw_control(adapter);
6394
6395         wr32(E1000_WUS, ~0);
6396
6397         if (netif_running(netdev)) {
6398                 err = igb_open(netdev);
6399                 if (err)
6400                         return err;
6401         }
6402
6403         netif_device_attach(netdev);
6404
6405         return 0;
6406 }
6407 #endif
6408
6409 static void igb_shutdown(struct pci_dev *pdev)
6410 {
6411         bool wake;
6412
6413         __igb_shutdown(pdev, &wake);
6414
6415         if (system_state == SYSTEM_POWER_OFF) {
6416                 pci_wake_from_d3(pdev, wake);
6417                 pci_set_power_state(pdev, PCI_D3hot);
6418         }
6419 }
6420
6421 #ifdef CONFIG_NET_POLL_CONTROLLER
6422 /*
6423  * Polling 'interrupt' - used by things like netconsole to send skbs
6424  * without having to re-enable interrupts. It's not called while
6425  * the interrupt routine is executing.
6426  */
6427 static void igb_netpoll(struct net_device *netdev)
6428 {
6429         struct igb_adapter *adapter = netdev_priv(netdev);
6430         struct e1000_hw *hw = &adapter->hw;
6431         int i;
6432
6433         if (!adapter->msix_entries) {
6434                 struct igb_q_vector *q_vector = adapter->q_vector[0];
6435                 igb_irq_disable(adapter);
6436                 napi_schedule(&q_vector->napi);
6437                 return;
6438         }
6439
6440         for (i = 0; i < adapter->num_q_vectors; i++) {
6441                 struct igb_q_vector *q_vector = adapter->q_vector[i];
6442                 wr32(E1000_EIMC, q_vector->eims_value);
6443                 napi_schedule(&q_vector->napi);
6444         }
6445 }
6446 #endif /* CONFIG_NET_POLL_CONTROLLER */
6447
6448 /**
6449  * igb_io_error_detected - called when PCI error is detected
6450  * @pdev: Pointer to PCI device
6451  * @state: The current pci connection state
6452  *
6453  * This function is called after a PCI bus error affecting
6454  * this device has been detected.
6455  */
6456 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6457                                               pci_channel_state_t state)
6458 {
6459         struct net_device *netdev = pci_get_drvdata(pdev);
6460         struct igb_adapter *adapter = netdev_priv(netdev);
6461
6462         netif_device_detach(netdev);
6463
6464         if (state == pci_channel_io_perm_failure)
6465                 return PCI_ERS_RESULT_DISCONNECT;
6466
6467         if (netif_running(netdev))
6468                 igb_down(adapter);
6469         pci_disable_device(pdev);
6470
6471         /* Request a slot slot reset. */
6472         return PCI_ERS_RESULT_NEED_RESET;
6473 }
6474
6475 /**
6476  * igb_io_slot_reset - called after the pci bus has been reset.
6477  * @pdev: Pointer to PCI device
6478  *
6479  * Restart the card from scratch, as if from a cold-boot. Implementation
6480  * resembles the first-half of the igb_resume routine.
6481  */
6482 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6483 {
6484         struct net_device *netdev = pci_get_drvdata(pdev);
6485         struct igb_adapter *adapter = netdev_priv(netdev);
6486         struct e1000_hw *hw = &adapter->hw;
6487         pci_ers_result_t result;
6488         int err;
6489
6490         if (pci_enable_device_mem(pdev)) {
6491                 dev_err(&pdev->dev,
6492                         "Cannot re-enable PCI device after reset.\n");
6493                 result = PCI_ERS_RESULT_DISCONNECT;
6494         } else {
6495                 pci_set_master(pdev);
6496                 pci_restore_state(pdev);
6497                 pci_save_state(pdev);
6498
6499                 pci_enable_wake(pdev, PCI_D3hot, 0);
6500                 pci_enable_wake(pdev, PCI_D3cold, 0);
6501
6502                 igb_reset(adapter);
6503                 wr32(E1000_WUS, ~0);
6504                 result = PCI_ERS_RESULT_RECOVERED;
6505         }
6506
6507         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6508         if (err) {
6509                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6510                         "failed 0x%0x\n", err);
6511                 /* non-fatal, continue */
6512         }
6513
6514         return result;
6515 }
6516
6517 /**
6518  * igb_io_resume - called when traffic can start flowing again.
6519  * @pdev: Pointer to PCI device
6520  *
6521  * This callback is called when the error recovery driver tells us that
6522  * its OK to resume normal operation. Implementation resembles the
6523  * second-half of the igb_resume routine.
6524  */
6525 static void igb_io_resume(struct pci_dev *pdev)
6526 {
6527         struct net_device *netdev = pci_get_drvdata(pdev);
6528         struct igb_adapter *adapter = netdev_priv(netdev);
6529
6530         if (netif_running(netdev)) {
6531                 if (igb_up(adapter)) {
6532                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6533                         return;
6534                 }
6535         }
6536
6537         netif_device_attach(netdev);
6538
6539         /* let the f/w know that the h/w is now under the control of the
6540          * driver. */
6541         igb_get_hw_control(adapter);
6542 }
6543
6544 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6545                              u8 qsel)
6546 {
6547         u32 rar_low, rar_high;
6548         struct e1000_hw *hw = &adapter->hw;
6549
6550         /* HW expects these in little endian so we reverse the byte order
6551          * from network order (big endian) to little endian
6552          */
6553         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6554                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6555         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6556
6557         /* Indicate to hardware the Address is Valid. */
6558         rar_high |= E1000_RAH_AV;
6559
6560         if (hw->mac.type == e1000_82575)
6561                 rar_high |= E1000_RAH_POOL_1 * qsel;
6562         else
6563                 rar_high |= E1000_RAH_POOL_1 << qsel;
6564
6565         wr32(E1000_RAL(index), rar_low);
6566         wrfl();
6567         wr32(E1000_RAH(index), rar_high);
6568         wrfl();
6569 }
6570
6571 static int igb_set_vf_mac(struct igb_adapter *adapter,
6572                           int vf, unsigned char *mac_addr)
6573 {
6574         struct e1000_hw *hw = &adapter->hw;
6575         /* VF MAC addresses start at end of receive addresses and moves
6576          * torwards the first, as a result a collision should not be possible */
6577         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6578
6579         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6580
6581         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6582
6583         return 0;
6584 }
6585
6586 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6587 {
6588         struct igb_adapter *adapter = netdev_priv(netdev);
6589         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6590                 return -EINVAL;
6591         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6592         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6593         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6594                                       " change effective.");
6595         if (test_bit(__IGB_DOWN, &adapter->state)) {
6596                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6597                          " but the PF device is not up.\n");
6598                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6599                          " attempting to use the VF device.\n");
6600         }
6601         return igb_set_vf_mac(adapter, vf, mac);
6602 }
6603
6604 static int igb_link_mbps(int internal_link_speed)
6605 {
6606         switch (internal_link_speed) {
6607         case SPEED_100:
6608                 return 100;
6609         case SPEED_1000:
6610                 return 1000;
6611         default:
6612                 return 0;
6613         }
6614 }
6615
6616 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6617                                   int link_speed)
6618 {
6619         int rf_dec, rf_int;
6620         u32 bcnrc_val;
6621
6622         if (tx_rate != 0) {
6623                 /* Calculate the rate factor values to set */
6624                 rf_int = link_speed / tx_rate;
6625                 rf_dec = (link_speed - (rf_int * tx_rate));
6626                 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6627
6628                 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6629                 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6630                                E1000_RTTBCNRC_RF_INT_MASK);
6631                 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6632         } else {
6633                 bcnrc_val = 0;
6634         }
6635
6636         wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6637         wr32(E1000_RTTBCNRC, bcnrc_val);
6638 }
6639
6640 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6641 {
6642         int actual_link_speed, i;
6643         bool reset_rate = false;
6644
6645         /* VF TX rate limit was not set or not supported */
6646         if ((adapter->vf_rate_link_speed == 0) ||
6647             (adapter->hw.mac.type != e1000_82576))
6648                 return;
6649
6650         actual_link_speed = igb_link_mbps(adapter->link_speed);
6651         if (actual_link_speed != adapter->vf_rate_link_speed) {
6652                 reset_rate = true;
6653                 adapter->vf_rate_link_speed = 0;
6654                 dev_info(&adapter->pdev->dev,
6655                          "Link speed has been changed. VF Transmit "
6656                          "rate is disabled\n");
6657         }
6658
6659         for (i = 0; i < adapter->vfs_allocated_count; i++) {
6660                 if (reset_rate)
6661                         adapter->vf_data[i].tx_rate = 0;
6662
6663                 igb_set_vf_rate_limit(&adapter->hw, i,
6664                                       adapter->vf_data[i].tx_rate,
6665                                       actual_link_speed);
6666         }
6667 }
6668
6669 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6670 {
6671         struct igb_adapter *adapter = netdev_priv(netdev);
6672         struct e1000_hw *hw = &adapter->hw;
6673         int actual_link_speed;
6674
6675         if (hw->mac.type != e1000_82576)
6676                 return -EOPNOTSUPP;
6677
6678         actual_link_speed = igb_link_mbps(adapter->link_speed);
6679         if ((vf >= adapter->vfs_allocated_count) ||
6680             (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6681             (tx_rate < 0) || (tx_rate > actual_link_speed))
6682                 return -EINVAL;
6683
6684         adapter->vf_rate_link_speed = actual_link_speed;
6685         adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6686         igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6687
6688         return 0;
6689 }
6690
6691 static int igb_ndo_get_vf_config(struct net_device *netdev,
6692                                  int vf, struct ifla_vf_info *ivi)
6693 {
6694         struct igb_adapter *adapter = netdev_priv(netdev);
6695         if (vf >= adapter->vfs_allocated_count)
6696                 return -EINVAL;
6697         ivi->vf = vf;
6698         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6699         ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6700         ivi->vlan = adapter->vf_data[vf].pf_vlan;
6701         ivi->qos = adapter->vf_data[vf].pf_qos;
6702         return 0;
6703 }
6704
6705 static void igb_vmm_control(struct igb_adapter *adapter)
6706 {
6707         struct e1000_hw *hw = &adapter->hw;
6708         u32 reg;
6709
6710         switch (hw->mac.type) {
6711         case e1000_82575:
6712         default:
6713                 /* replication is not supported for 82575 */
6714                 return;
6715         case e1000_82576:
6716                 /* notify HW that the MAC is adding vlan tags */
6717                 reg = rd32(E1000_DTXCTL);
6718                 reg |= E1000_DTXCTL_VLAN_ADDED;
6719                 wr32(E1000_DTXCTL, reg);
6720         case e1000_82580:
6721                 /* enable replication vlan tag stripping */
6722                 reg = rd32(E1000_RPLOLR);
6723                 reg |= E1000_RPLOLR_STRVLAN;
6724                 wr32(E1000_RPLOLR, reg);
6725         case e1000_i350:
6726                 /* none of the above registers are supported by i350 */
6727                 break;
6728         }
6729
6730         if (adapter->vfs_allocated_count) {
6731                 igb_vmdq_set_loopback_pf(hw, true);
6732                 igb_vmdq_set_replication_pf(hw, true);
6733                 igb_vmdq_set_anti_spoofing_pf(hw, true,
6734                                                 adapter->vfs_allocated_count);
6735         } else {
6736                 igb_vmdq_set_loopback_pf(hw, false);
6737                 igb_vmdq_set_replication_pf(hw, false);
6738         }
6739 }
6740
6741 /* igb_main.c */