igb: Update RXDCTL/TXDCTL configurations
[linux-2.6-block.git] / drivers / net / ethernet / intel / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2011 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/bitops.h>
32 #include <linux/vmalloc.h>
33 #include <linux/pagemap.h>
34 #include <linux/netdevice.h>
35 #include <linux/ipv6.h>
36 #include <linux/slab.h>
37 #include <net/checksum.h>
38 #include <net/ip6_checksum.h>
39 #include <linux/net_tstamp.h>
40 #include <linux/mii.h>
41 #include <linux/ethtool.h>
42 #include <linux/if.h>
43 #include <linux/if_vlan.h>
44 #include <linux/pci.h>
45 #include <linux/pci-aspm.h>
46 #include <linux/delay.h>
47 #include <linux/interrupt.h>
48 #include <linux/if_ether.h>
49 #include <linux/aer.h>
50 #include <linux/prefetch.h>
51 #ifdef CONFIG_IGB_DCA
52 #include <linux/dca.h>
53 #endif
54 #include "igb.h"
55
56 #define MAJ 3
57 #define MIN 0
58 #define BUILD 6
59 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
60 __stringify(BUILD) "-k"
61 char igb_driver_name[] = "igb";
62 char igb_driver_version[] = DRV_VERSION;
63 static const char igb_driver_string[] =
64                                 "Intel(R) Gigabit Ethernet Network Driver";
65 static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
66
67 static const struct e1000_info *igb_info_tbl[] = {
68         [board_82575] = &e1000_82575_info,
69 };
70
71 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
74         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
81         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
82         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
83         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
84         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
85         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
86         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
87         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
88         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
89         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
90         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
91         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
92         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
93         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
94         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
95         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
96         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
97         /* required last entry */
98         {0, }
99 };
100
101 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
102
103 void igb_reset(struct igb_adapter *);
104 static int igb_setup_all_tx_resources(struct igb_adapter *);
105 static int igb_setup_all_rx_resources(struct igb_adapter *);
106 static void igb_free_all_tx_resources(struct igb_adapter *);
107 static void igb_free_all_rx_resources(struct igb_adapter *);
108 static void igb_setup_mrqc(struct igb_adapter *);
109 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
110 static void __devexit igb_remove(struct pci_dev *pdev);
111 static void igb_init_hw_timer(struct igb_adapter *adapter);
112 static int igb_sw_init(struct igb_adapter *);
113 static int igb_open(struct net_device *);
114 static int igb_close(struct net_device *);
115 static void igb_configure_tx(struct igb_adapter *);
116 static void igb_configure_rx(struct igb_adapter *);
117 static void igb_clean_all_tx_rings(struct igb_adapter *);
118 static void igb_clean_all_rx_rings(struct igb_adapter *);
119 static void igb_clean_tx_ring(struct igb_ring *);
120 static void igb_clean_rx_ring(struct igb_ring *);
121 static void igb_set_rx_mode(struct net_device *);
122 static void igb_update_phy_info(unsigned long);
123 static void igb_watchdog(unsigned long);
124 static void igb_watchdog_task(struct work_struct *);
125 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
126 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
127                                                  struct rtnl_link_stats64 *stats);
128 static int igb_change_mtu(struct net_device *, int);
129 static int igb_set_mac(struct net_device *, void *);
130 static void igb_set_uta(struct igb_adapter *adapter);
131 static irqreturn_t igb_intr(int irq, void *);
132 static irqreturn_t igb_intr_msi(int irq, void *);
133 static irqreturn_t igb_msix_other(int irq, void *);
134 static irqreturn_t igb_msix_ring(int irq, void *);
135 #ifdef CONFIG_IGB_DCA
136 static void igb_update_dca(struct igb_q_vector *);
137 static void igb_setup_dca(struct igb_adapter *);
138 #endif /* CONFIG_IGB_DCA */
139 static bool igb_clean_tx_irq(struct igb_q_vector *);
140 static int igb_poll(struct napi_struct *, int);
141 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
142 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
143 static void igb_tx_timeout(struct net_device *);
144 static void igb_reset_task(struct work_struct *);
145 static void igb_vlan_mode(struct net_device *netdev, u32 features);
146 static void igb_vlan_rx_add_vid(struct net_device *, u16);
147 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
148 static void igb_restore_vlan(struct igb_adapter *);
149 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
150 static void igb_ping_all_vfs(struct igb_adapter *);
151 static void igb_msg_task(struct igb_adapter *);
152 static void igb_vmm_control(struct igb_adapter *);
153 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
154 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
155 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
156 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
157                                int vf, u16 vlan, u8 qos);
158 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
159 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
160                                  struct ifla_vf_info *ivi);
161 static void igb_check_vf_rate_limit(struct igb_adapter *);
162
163 #ifdef CONFIG_PM
164 static int igb_suspend(struct pci_dev *, pm_message_t);
165 static int igb_resume(struct pci_dev *);
166 #endif
167 static void igb_shutdown(struct pci_dev *);
168 #ifdef CONFIG_IGB_DCA
169 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
170 static struct notifier_block dca_notifier = {
171         .notifier_call  = igb_notify_dca,
172         .next           = NULL,
173         .priority       = 0
174 };
175 #endif
176 #ifdef CONFIG_NET_POLL_CONTROLLER
177 /* for netdump / net console */
178 static void igb_netpoll(struct net_device *);
179 #endif
180 #ifdef CONFIG_PCI_IOV
181 static unsigned int max_vfs = 0;
182 module_param(max_vfs, uint, 0);
183 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
184                  "per physical function");
185 #endif /* CONFIG_PCI_IOV */
186
187 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
188                      pci_channel_state_t);
189 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
190 static void igb_io_resume(struct pci_dev *);
191
192 static struct pci_error_handlers igb_err_handler = {
193         .error_detected = igb_io_error_detected,
194         .slot_reset = igb_io_slot_reset,
195         .resume = igb_io_resume,
196 };
197
198
199 static struct pci_driver igb_driver = {
200         .name     = igb_driver_name,
201         .id_table = igb_pci_tbl,
202         .probe    = igb_probe,
203         .remove   = __devexit_p(igb_remove),
204 #ifdef CONFIG_PM
205         /* Power Management Hooks */
206         .suspend  = igb_suspend,
207         .resume   = igb_resume,
208 #endif
209         .shutdown = igb_shutdown,
210         .err_handler = &igb_err_handler
211 };
212
213 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
214 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
215 MODULE_LICENSE("GPL");
216 MODULE_VERSION(DRV_VERSION);
217
218 struct igb_reg_info {
219         u32 ofs;
220         char *name;
221 };
222
223 static const struct igb_reg_info igb_reg_info_tbl[] = {
224
225         /* General Registers */
226         {E1000_CTRL, "CTRL"},
227         {E1000_STATUS, "STATUS"},
228         {E1000_CTRL_EXT, "CTRL_EXT"},
229
230         /* Interrupt Registers */
231         {E1000_ICR, "ICR"},
232
233         /* RX Registers */
234         {E1000_RCTL, "RCTL"},
235         {E1000_RDLEN(0), "RDLEN"},
236         {E1000_RDH(0), "RDH"},
237         {E1000_RDT(0), "RDT"},
238         {E1000_RXDCTL(0), "RXDCTL"},
239         {E1000_RDBAL(0), "RDBAL"},
240         {E1000_RDBAH(0), "RDBAH"},
241
242         /* TX Registers */
243         {E1000_TCTL, "TCTL"},
244         {E1000_TDBAL(0), "TDBAL"},
245         {E1000_TDBAH(0), "TDBAH"},
246         {E1000_TDLEN(0), "TDLEN"},
247         {E1000_TDH(0), "TDH"},
248         {E1000_TDT(0), "TDT"},
249         {E1000_TXDCTL(0), "TXDCTL"},
250         {E1000_TDFH, "TDFH"},
251         {E1000_TDFT, "TDFT"},
252         {E1000_TDFHS, "TDFHS"},
253         {E1000_TDFPC, "TDFPC"},
254
255         /* List Terminator */
256         {}
257 };
258
259 /*
260  * igb_regdump - register printout routine
261  */
262 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
263 {
264         int n = 0;
265         char rname[16];
266         u32 regs[8];
267
268         switch (reginfo->ofs) {
269         case E1000_RDLEN(0):
270                 for (n = 0; n < 4; n++)
271                         regs[n] = rd32(E1000_RDLEN(n));
272                 break;
273         case E1000_RDH(0):
274                 for (n = 0; n < 4; n++)
275                         regs[n] = rd32(E1000_RDH(n));
276                 break;
277         case E1000_RDT(0):
278                 for (n = 0; n < 4; n++)
279                         regs[n] = rd32(E1000_RDT(n));
280                 break;
281         case E1000_RXDCTL(0):
282                 for (n = 0; n < 4; n++)
283                         regs[n] = rd32(E1000_RXDCTL(n));
284                 break;
285         case E1000_RDBAL(0):
286                 for (n = 0; n < 4; n++)
287                         regs[n] = rd32(E1000_RDBAL(n));
288                 break;
289         case E1000_RDBAH(0):
290                 for (n = 0; n < 4; n++)
291                         regs[n] = rd32(E1000_RDBAH(n));
292                 break;
293         case E1000_TDBAL(0):
294                 for (n = 0; n < 4; n++)
295                         regs[n] = rd32(E1000_RDBAL(n));
296                 break;
297         case E1000_TDBAH(0):
298                 for (n = 0; n < 4; n++)
299                         regs[n] = rd32(E1000_TDBAH(n));
300                 break;
301         case E1000_TDLEN(0):
302                 for (n = 0; n < 4; n++)
303                         regs[n] = rd32(E1000_TDLEN(n));
304                 break;
305         case E1000_TDH(0):
306                 for (n = 0; n < 4; n++)
307                         regs[n] = rd32(E1000_TDH(n));
308                 break;
309         case E1000_TDT(0):
310                 for (n = 0; n < 4; n++)
311                         regs[n] = rd32(E1000_TDT(n));
312                 break;
313         case E1000_TXDCTL(0):
314                 for (n = 0; n < 4; n++)
315                         regs[n] = rd32(E1000_TXDCTL(n));
316                 break;
317         default:
318                 printk(KERN_INFO "%-15s %08x\n",
319                         reginfo->name, rd32(reginfo->ofs));
320                 return;
321         }
322
323         snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
324         printk(KERN_INFO "%-15s ", rname);
325         for (n = 0; n < 4; n++)
326                 printk(KERN_CONT "%08x ", regs[n]);
327         printk(KERN_CONT "\n");
328 }
329
330 /*
331  * igb_dump - Print registers, tx-rings and rx-rings
332  */
333 static void igb_dump(struct igb_adapter *adapter)
334 {
335         struct net_device *netdev = adapter->netdev;
336         struct e1000_hw *hw = &adapter->hw;
337         struct igb_reg_info *reginfo;
338         int n = 0;
339         struct igb_ring *tx_ring;
340         union e1000_adv_tx_desc *tx_desc;
341         struct my_u0 { u64 a; u64 b; } *u0;
342         struct igb_buffer *buffer_info;
343         struct igb_ring *rx_ring;
344         union e1000_adv_rx_desc *rx_desc;
345         u32 staterr;
346         int i = 0;
347
348         if (!netif_msg_hw(adapter))
349                 return;
350
351         /* Print netdevice Info */
352         if (netdev) {
353                 dev_info(&adapter->pdev->dev, "Net device Info\n");
354                 printk(KERN_INFO "Device Name     state            "
355                         "trans_start      last_rx\n");
356                 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
357                 netdev->name,
358                 netdev->state,
359                 netdev->trans_start,
360                 netdev->last_rx);
361         }
362
363         /* Print Registers */
364         dev_info(&adapter->pdev->dev, "Register Dump\n");
365         printk(KERN_INFO " Register Name   Value\n");
366         for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
367              reginfo->name; reginfo++) {
368                 igb_regdump(hw, reginfo);
369         }
370
371         /* Print TX Ring Summary */
372         if (!netdev || !netif_running(netdev))
373                 goto exit;
374
375         dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
376         printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma  ]"
377                 " leng ntw timestamp\n");
378         for (n = 0; n < adapter->num_tx_queues; n++) {
379                 tx_ring = adapter->tx_ring[n];
380                 buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
381                 printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n",
382                            n, tx_ring->next_to_use, tx_ring->next_to_clean,
383                            (u64)buffer_info->dma,
384                            buffer_info->length,
385                            buffer_info->next_to_watch,
386                            (u64)buffer_info->time_stamp);
387         }
388
389         /* Print TX Rings */
390         if (!netif_msg_tx_done(adapter))
391                 goto rx_ring_summary;
392
393         dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
394
395         /* Transmit Descriptor Formats
396          *
397          * Advanced Transmit Descriptor
398          *   +--------------------------------------------------------------+
399          * 0 |         Buffer Address [63:0]                                |
400          *   +--------------------------------------------------------------+
401          * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
402          *   +--------------------------------------------------------------+
403          *   63      46 45    40 39 38 36 35 32 31   24             15       0
404          */
405
406         for (n = 0; n < adapter->num_tx_queues; n++) {
407                 tx_ring = adapter->tx_ring[n];
408                 printk(KERN_INFO "------------------------------------\n");
409                 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
410                 printk(KERN_INFO "------------------------------------\n");
411                 printk(KERN_INFO "T [desc]     [address 63:0  ] "
412                         "[PlPOCIStDDM Ln] [bi->dma       ] "
413                         "leng  ntw timestamp        bi->skb\n");
414
415                 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
416                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
417                         buffer_info = &tx_ring->buffer_info[i];
418                         u0 = (struct my_u0 *)tx_desc;
419                         printk(KERN_INFO "T [0x%03X]    %016llX %016llX %016llX"
420                                 " %04X  %3X %016llX %p", i,
421                                 le64_to_cpu(u0->a),
422                                 le64_to_cpu(u0->b),
423                                 (u64)buffer_info->dma,
424                                 buffer_info->length,
425                                 buffer_info->next_to_watch,
426                                 (u64)buffer_info->time_stamp,
427                                 buffer_info->skb);
428                         if (i == tx_ring->next_to_use &&
429                                 i == tx_ring->next_to_clean)
430                                 printk(KERN_CONT " NTC/U\n");
431                         else if (i == tx_ring->next_to_use)
432                                 printk(KERN_CONT " NTU\n");
433                         else if (i == tx_ring->next_to_clean)
434                                 printk(KERN_CONT " NTC\n");
435                         else
436                                 printk(KERN_CONT "\n");
437
438                         if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
439                                 print_hex_dump(KERN_INFO, "",
440                                         DUMP_PREFIX_ADDRESS,
441                                         16, 1, phys_to_virt(buffer_info->dma),
442                                         buffer_info->length, true);
443                 }
444         }
445
446         /* Print RX Rings Summary */
447 rx_ring_summary:
448         dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
449         printk(KERN_INFO "Queue [NTU] [NTC]\n");
450         for (n = 0; n < adapter->num_rx_queues; n++) {
451                 rx_ring = adapter->rx_ring[n];
452                 printk(KERN_INFO " %5d %5X %5X\n", n,
453                            rx_ring->next_to_use, rx_ring->next_to_clean);
454         }
455
456         /* Print RX Rings */
457         if (!netif_msg_rx_status(adapter))
458                 goto exit;
459
460         dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
461
462         /* Advanced Receive Descriptor (Read) Format
463          *    63                                           1        0
464          *    +-----------------------------------------------------+
465          *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
466          *    +----------------------------------------------+------+
467          *  8 |       Header Buffer Address [63:1]           |  DD  |
468          *    +-----------------------------------------------------+
469          *
470          *
471          * Advanced Receive Descriptor (Write-Back) Format
472          *
473          *   63       48 47    32 31  30      21 20 17 16   4 3     0
474          *   +------------------------------------------------------+
475          * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
476          *   | Checksum   Ident  |   |           |    | Type | Type |
477          *   +------------------------------------------------------+
478          * 8 | VLAN Tag | Length | Extended Error | Extended Status |
479          *   +------------------------------------------------------+
480          *   63       48 47    32 31            20 19               0
481          */
482
483         for (n = 0; n < adapter->num_rx_queues; n++) {
484                 rx_ring = adapter->rx_ring[n];
485                 printk(KERN_INFO "------------------------------------\n");
486                 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
487                 printk(KERN_INFO "------------------------------------\n");
488                 printk(KERN_INFO "R  [desc]      [ PktBuf     A0] "
489                         "[  HeadBuf   DD] [bi->dma       ] [bi->skb] "
490                         "<-- Adv Rx Read format\n");
491                 printk(KERN_INFO "RWB[desc]      [PcsmIpSHl PtRs] "
492                         "[vl er S cks ln] ---------------- [bi->skb] "
493                         "<-- Adv Rx Write-Back format\n");
494
495                 for (i = 0; i < rx_ring->count; i++) {
496                         buffer_info = &rx_ring->buffer_info[i];
497                         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
498                         u0 = (struct my_u0 *)rx_desc;
499                         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
500                         if (staterr & E1000_RXD_STAT_DD) {
501                                 /* Descriptor Done */
502                                 printk(KERN_INFO "RWB[0x%03X]     %016llX "
503                                         "%016llX ---------------- %p", i,
504                                         le64_to_cpu(u0->a),
505                                         le64_to_cpu(u0->b),
506                                         buffer_info->skb);
507                         } else {
508                                 printk(KERN_INFO "R  [0x%03X]     %016llX "
509                                         "%016llX %016llX %p", i,
510                                         le64_to_cpu(u0->a),
511                                         le64_to_cpu(u0->b),
512                                         (u64)buffer_info->dma,
513                                         buffer_info->skb);
514
515                                 if (netif_msg_pktdata(adapter)) {
516                                         print_hex_dump(KERN_INFO, "",
517                                                 DUMP_PREFIX_ADDRESS,
518                                                 16, 1,
519                                                 phys_to_virt(buffer_info->dma),
520                                                 rx_ring->rx_buffer_len, true);
521                                         if (rx_ring->rx_buffer_len
522                                                 < IGB_RXBUFFER_1024)
523                                                 print_hex_dump(KERN_INFO, "",
524                                                   DUMP_PREFIX_ADDRESS,
525                                                   16, 1,
526                                                   phys_to_virt(
527                                                     buffer_info->page_dma +
528                                                     buffer_info->page_offset),
529                                                   PAGE_SIZE/2, true);
530                                 }
531                         }
532
533                         if (i == rx_ring->next_to_use)
534                                 printk(KERN_CONT " NTU\n");
535                         else if (i == rx_ring->next_to_clean)
536                                 printk(KERN_CONT " NTC\n");
537                         else
538                                 printk(KERN_CONT "\n");
539
540                 }
541         }
542
543 exit:
544         return;
545 }
546
547
548 /**
549  * igb_read_clock - read raw cycle counter (to be used by time counter)
550  */
551 static cycle_t igb_read_clock(const struct cyclecounter *tc)
552 {
553         struct igb_adapter *adapter =
554                 container_of(tc, struct igb_adapter, cycles);
555         struct e1000_hw *hw = &adapter->hw;
556         u64 stamp = 0;
557         int shift = 0;
558
559         /*
560          * The timestamp latches on lowest register read. For the 82580
561          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
562          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
563          */
564         if (hw->mac.type == e1000_82580) {
565                 stamp = rd32(E1000_SYSTIMR) >> 8;
566                 shift = IGB_82580_TSYNC_SHIFT;
567         }
568
569         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
570         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
571         return stamp;
572 }
573
574 /**
575  * igb_get_hw_dev - return device
576  * used by hardware layer to print debugging information
577  **/
578 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
579 {
580         struct igb_adapter *adapter = hw->back;
581         return adapter->netdev;
582 }
583
584 /**
585  * igb_init_module - Driver Registration Routine
586  *
587  * igb_init_module is the first routine called when the driver is
588  * loaded. All it does is register with the PCI subsystem.
589  **/
590 static int __init igb_init_module(void)
591 {
592         int ret;
593         printk(KERN_INFO "%s - version %s\n",
594                igb_driver_string, igb_driver_version);
595
596         printk(KERN_INFO "%s\n", igb_copyright);
597
598 #ifdef CONFIG_IGB_DCA
599         dca_register_notify(&dca_notifier);
600 #endif
601         ret = pci_register_driver(&igb_driver);
602         return ret;
603 }
604
605 module_init(igb_init_module);
606
607 /**
608  * igb_exit_module - Driver Exit Cleanup Routine
609  *
610  * igb_exit_module is called just before the driver is removed
611  * from memory.
612  **/
613 static void __exit igb_exit_module(void)
614 {
615 #ifdef CONFIG_IGB_DCA
616         dca_unregister_notify(&dca_notifier);
617 #endif
618         pci_unregister_driver(&igb_driver);
619 }
620
621 module_exit(igb_exit_module);
622
623 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
624 /**
625  * igb_cache_ring_register - Descriptor ring to register mapping
626  * @adapter: board private structure to initialize
627  *
628  * Once we know the feature-set enabled for the device, we'll cache
629  * the register offset the descriptor ring is assigned to.
630  **/
631 static void igb_cache_ring_register(struct igb_adapter *adapter)
632 {
633         int i = 0, j = 0;
634         u32 rbase_offset = adapter->vfs_allocated_count;
635
636         switch (adapter->hw.mac.type) {
637         case e1000_82576:
638                 /* The queues are allocated for virtualization such that VF 0
639                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
640                  * In order to avoid collision we start at the first free queue
641                  * and continue consuming queues in the same sequence
642                  */
643                 if (adapter->vfs_allocated_count) {
644                         for (; i < adapter->rss_queues; i++)
645                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
646                                                                Q_IDX_82576(i);
647                 }
648         case e1000_82575:
649         case e1000_82580:
650         case e1000_i350:
651         default:
652                 for (; i < adapter->num_rx_queues; i++)
653                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
654                 for (; j < adapter->num_tx_queues; j++)
655                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
656                 break;
657         }
658 }
659
660 static void igb_free_queues(struct igb_adapter *adapter)
661 {
662         int i;
663
664         for (i = 0; i < adapter->num_tx_queues; i++) {
665                 kfree(adapter->tx_ring[i]);
666                 adapter->tx_ring[i] = NULL;
667         }
668         for (i = 0; i < adapter->num_rx_queues; i++) {
669                 kfree(adapter->rx_ring[i]);
670                 adapter->rx_ring[i] = NULL;
671         }
672         adapter->num_rx_queues = 0;
673         adapter->num_tx_queues = 0;
674 }
675
676 /**
677  * igb_alloc_queues - Allocate memory for all rings
678  * @adapter: board private structure to initialize
679  *
680  * We allocate one ring per queue at run-time since we don't know the
681  * number of queues at compile-time.
682  **/
683 static int igb_alloc_queues(struct igb_adapter *adapter)
684 {
685         struct igb_ring *ring;
686         int i;
687
688         for (i = 0; i < adapter->num_tx_queues; i++) {
689                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
690                 if (!ring)
691                         goto err;
692                 ring->count = adapter->tx_ring_count;
693                 ring->queue_index = i;
694                 ring->dev = &adapter->pdev->dev;
695                 ring->netdev = adapter->netdev;
696                 /* For 82575, context index must be unique per ring. */
697                 if (adapter->hw.mac.type == e1000_82575)
698                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
699                 adapter->tx_ring[i] = ring;
700         }
701
702         for (i = 0; i < adapter->num_rx_queues; i++) {
703                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
704                 if (!ring)
705                         goto err;
706                 ring->count = adapter->rx_ring_count;
707                 ring->queue_index = i;
708                 ring->dev = &adapter->pdev->dev;
709                 ring->netdev = adapter->netdev;
710                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
711                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
712                 /* set flag indicating ring supports SCTP checksum offload */
713                 if (adapter->hw.mac.type >= e1000_82576)
714                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
715                 adapter->rx_ring[i] = ring;
716         }
717
718         igb_cache_ring_register(adapter);
719
720         return 0;
721
722 err:
723         igb_free_queues(adapter);
724
725         return -ENOMEM;
726 }
727
728 #define IGB_N0_QUEUE -1
729 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
730 {
731         u32 msixbm = 0;
732         struct igb_adapter *adapter = q_vector->adapter;
733         struct e1000_hw *hw = &adapter->hw;
734         u32 ivar, index;
735         int rx_queue = IGB_N0_QUEUE;
736         int tx_queue = IGB_N0_QUEUE;
737
738         if (q_vector->rx_ring)
739                 rx_queue = q_vector->rx_ring->reg_idx;
740         if (q_vector->tx_ring)
741                 tx_queue = q_vector->tx_ring->reg_idx;
742
743         switch (hw->mac.type) {
744         case e1000_82575:
745                 /* The 82575 assigns vectors using a bitmask, which matches the
746                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
747                    or more queues to a vector, we write the appropriate bits
748                    into the MSIXBM register for that vector. */
749                 if (rx_queue > IGB_N0_QUEUE)
750                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
751                 if (tx_queue > IGB_N0_QUEUE)
752                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
753                 if (!adapter->msix_entries && msix_vector == 0)
754                         msixbm |= E1000_EIMS_OTHER;
755                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
756                 q_vector->eims_value = msixbm;
757                 break;
758         case e1000_82576:
759                 /* 82576 uses a table-based method for assigning vectors.
760                    Each queue has a single entry in the table to which we write
761                    a vector number along with a "valid" bit.  Sadly, the layout
762                    of the table is somewhat counterintuitive. */
763                 if (rx_queue > IGB_N0_QUEUE) {
764                         index = (rx_queue & 0x7);
765                         ivar = array_rd32(E1000_IVAR0, index);
766                         if (rx_queue < 8) {
767                                 /* vector goes into low byte of register */
768                                 ivar = ivar & 0xFFFFFF00;
769                                 ivar |= msix_vector | E1000_IVAR_VALID;
770                         } else {
771                                 /* vector goes into third byte of register */
772                                 ivar = ivar & 0xFF00FFFF;
773                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
774                         }
775                         array_wr32(E1000_IVAR0, index, ivar);
776                 }
777                 if (tx_queue > IGB_N0_QUEUE) {
778                         index = (tx_queue & 0x7);
779                         ivar = array_rd32(E1000_IVAR0, index);
780                         if (tx_queue < 8) {
781                                 /* vector goes into second byte of register */
782                                 ivar = ivar & 0xFFFF00FF;
783                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
784                         } else {
785                                 /* vector goes into high byte of register */
786                                 ivar = ivar & 0x00FFFFFF;
787                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
788                         }
789                         array_wr32(E1000_IVAR0, index, ivar);
790                 }
791                 q_vector->eims_value = 1 << msix_vector;
792                 break;
793         case e1000_82580:
794         case e1000_i350:
795                 /* 82580 uses the same table-based approach as 82576 but has fewer
796                    entries as a result we carry over for queues greater than 4. */
797                 if (rx_queue > IGB_N0_QUEUE) {
798                         index = (rx_queue >> 1);
799                         ivar = array_rd32(E1000_IVAR0, index);
800                         if (rx_queue & 0x1) {
801                                 /* vector goes into third byte of register */
802                                 ivar = ivar & 0xFF00FFFF;
803                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
804                         } else {
805                                 /* vector goes into low byte of register */
806                                 ivar = ivar & 0xFFFFFF00;
807                                 ivar |= msix_vector | E1000_IVAR_VALID;
808                         }
809                         array_wr32(E1000_IVAR0, index, ivar);
810                 }
811                 if (tx_queue > IGB_N0_QUEUE) {
812                         index = (tx_queue >> 1);
813                         ivar = array_rd32(E1000_IVAR0, index);
814                         if (tx_queue & 0x1) {
815                                 /* vector goes into high byte of register */
816                                 ivar = ivar & 0x00FFFFFF;
817                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
818                         } else {
819                                 /* vector goes into second byte of register */
820                                 ivar = ivar & 0xFFFF00FF;
821                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
822                         }
823                         array_wr32(E1000_IVAR0, index, ivar);
824                 }
825                 q_vector->eims_value = 1 << msix_vector;
826                 break;
827         default:
828                 BUG();
829                 break;
830         }
831
832         /* add q_vector eims value to global eims_enable_mask */
833         adapter->eims_enable_mask |= q_vector->eims_value;
834
835         /* configure q_vector to set itr on first interrupt */
836         q_vector->set_itr = 1;
837 }
838
839 /**
840  * igb_configure_msix - Configure MSI-X hardware
841  *
842  * igb_configure_msix sets up the hardware to properly
843  * generate MSI-X interrupts.
844  **/
845 static void igb_configure_msix(struct igb_adapter *adapter)
846 {
847         u32 tmp;
848         int i, vector = 0;
849         struct e1000_hw *hw = &adapter->hw;
850
851         adapter->eims_enable_mask = 0;
852
853         /* set vector for other causes, i.e. link changes */
854         switch (hw->mac.type) {
855         case e1000_82575:
856                 tmp = rd32(E1000_CTRL_EXT);
857                 /* enable MSI-X PBA support*/
858                 tmp |= E1000_CTRL_EXT_PBA_CLR;
859
860                 /* Auto-Mask interrupts upon ICR read. */
861                 tmp |= E1000_CTRL_EXT_EIAME;
862                 tmp |= E1000_CTRL_EXT_IRCA;
863
864                 wr32(E1000_CTRL_EXT, tmp);
865
866                 /* enable msix_other interrupt */
867                 array_wr32(E1000_MSIXBM(0), vector++,
868                                       E1000_EIMS_OTHER);
869                 adapter->eims_other = E1000_EIMS_OTHER;
870
871                 break;
872
873         case e1000_82576:
874         case e1000_82580:
875         case e1000_i350:
876                 /* Turn on MSI-X capability first, or our settings
877                  * won't stick.  And it will take days to debug. */
878                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
879                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
880                                 E1000_GPIE_NSICR);
881
882                 /* enable msix_other interrupt */
883                 adapter->eims_other = 1 << vector;
884                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
885
886                 wr32(E1000_IVAR_MISC, tmp);
887                 break;
888         default:
889                 /* do nothing, since nothing else supports MSI-X */
890                 break;
891         } /* switch (hw->mac.type) */
892
893         adapter->eims_enable_mask |= adapter->eims_other;
894
895         for (i = 0; i < adapter->num_q_vectors; i++)
896                 igb_assign_vector(adapter->q_vector[i], vector++);
897
898         wrfl();
899 }
900
901 /**
902  * igb_request_msix - Initialize MSI-X interrupts
903  *
904  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
905  * kernel.
906  **/
907 static int igb_request_msix(struct igb_adapter *adapter)
908 {
909         struct net_device *netdev = adapter->netdev;
910         struct e1000_hw *hw = &adapter->hw;
911         int i, err = 0, vector = 0;
912
913         err = request_irq(adapter->msix_entries[vector].vector,
914                           igb_msix_other, 0, netdev->name, adapter);
915         if (err)
916                 goto out;
917         vector++;
918
919         for (i = 0; i < adapter->num_q_vectors; i++) {
920                 struct igb_q_vector *q_vector = adapter->q_vector[i];
921
922                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
923
924                 if (q_vector->rx_ring && q_vector->tx_ring)
925                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
926                                 q_vector->rx_ring->queue_index);
927                 else if (q_vector->tx_ring)
928                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
929                                 q_vector->tx_ring->queue_index);
930                 else if (q_vector->rx_ring)
931                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
932                                 q_vector->rx_ring->queue_index);
933                 else
934                         sprintf(q_vector->name, "%s-unused", netdev->name);
935
936                 err = request_irq(adapter->msix_entries[vector].vector,
937                                   igb_msix_ring, 0, q_vector->name,
938                                   q_vector);
939                 if (err)
940                         goto out;
941                 vector++;
942         }
943
944         igb_configure_msix(adapter);
945         return 0;
946 out:
947         return err;
948 }
949
950 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
951 {
952         if (adapter->msix_entries) {
953                 pci_disable_msix(adapter->pdev);
954                 kfree(adapter->msix_entries);
955                 adapter->msix_entries = NULL;
956         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
957                 pci_disable_msi(adapter->pdev);
958         }
959 }
960
961 /**
962  * igb_free_q_vectors - Free memory allocated for interrupt vectors
963  * @adapter: board private structure to initialize
964  *
965  * This function frees the memory allocated to the q_vectors.  In addition if
966  * NAPI is enabled it will delete any references to the NAPI struct prior
967  * to freeing the q_vector.
968  **/
969 static void igb_free_q_vectors(struct igb_adapter *adapter)
970 {
971         int v_idx;
972
973         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
974                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
975                 adapter->q_vector[v_idx] = NULL;
976                 if (!q_vector)
977                         continue;
978                 netif_napi_del(&q_vector->napi);
979                 kfree(q_vector);
980         }
981         adapter->num_q_vectors = 0;
982 }
983
984 /**
985  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
986  *
987  * This function resets the device so that it has 0 rx queues, tx queues, and
988  * MSI-X interrupts allocated.
989  */
990 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
991 {
992         igb_free_queues(adapter);
993         igb_free_q_vectors(adapter);
994         igb_reset_interrupt_capability(adapter);
995 }
996
997 /**
998  * igb_set_interrupt_capability - set MSI or MSI-X if supported
999  *
1000  * Attempt to configure interrupts using the best available
1001  * capabilities of the hardware and kernel.
1002  **/
1003 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1004 {
1005         int err;
1006         int numvecs, i;
1007
1008         /* Number of supported queues. */
1009         adapter->num_rx_queues = adapter->rss_queues;
1010         if (adapter->vfs_allocated_count)
1011                 adapter->num_tx_queues = 1;
1012         else
1013                 adapter->num_tx_queues = adapter->rss_queues;
1014
1015         /* start with one vector for every rx queue */
1016         numvecs = adapter->num_rx_queues;
1017
1018         /* if tx handler is separate add 1 for every tx queue */
1019         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1020                 numvecs += adapter->num_tx_queues;
1021
1022         /* store the number of vectors reserved for queues */
1023         adapter->num_q_vectors = numvecs;
1024
1025         /* add 1 vector for link status interrupts */
1026         numvecs++;
1027         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1028                                         GFP_KERNEL);
1029         if (!adapter->msix_entries)
1030                 goto msi_only;
1031
1032         for (i = 0; i < numvecs; i++)
1033                 adapter->msix_entries[i].entry = i;
1034
1035         err = pci_enable_msix(adapter->pdev,
1036                               adapter->msix_entries,
1037                               numvecs);
1038         if (err == 0)
1039                 goto out;
1040
1041         igb_reset_interrupt_capability(adapter);
1042
1043         /* If we can't do MSI-X, try MSI */
1044 msi_only:
1045 #ifdef CONFIG_PCI_IOV
1046         /* disable SR-IOV for non MSI-X configurations */
1047         if (adapter->vf_data) {
1048                 struct e1000_hw *hw = &adapter->hw;
1049                 /* disable iov and allow time for transactions to clear */
1050                 pci_disable_sriov(adapter->pdev);
1051                 msleep(500);
1052
1053                 kfree(adapter->vf_data);
1054                 adapter->vf_data = NULL;
1055                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1056                 wrfl();
1057                 msleep(100);
1058                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1059         }
1060 #endif
1061         adapter->vfs_allocated_count = 0;
1062         adapter->rss_queues = 1;
1063         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1064         adapter->num_rx_queues = 1;
1065         adapter->num_tx_queues = 1;
1066         adapter->num_q_vectors = 1;
1067         if (!pci_enable_msi(adapter->pdev))
1068                 adapter->flags |= IGB_FLAG_HAS_MSI;
1069 out:
1070         /* Notify the stack of the (possibly) reduced queue counts. */
1071         netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1072         return netif_set_real_num_rx_queues(adapter->netdev,
1073                                             adapter->num_rx_queues);
1074 }
1075
1076 /**
1077  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1078  * @adapter: board private structure to initialize
1079  *
1080  * We allocate one q_vector per queue interrupt.  If allocation fails we
1081  * return -ENOMEM.
1082  **/
1083 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1084 {
1085         struct igb_q_vector *q_vector;
1086         struct e1000_hw *hw = &adapter->hw;
1087         int v_idx;
1088
1089         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1090                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1091                 if (!q_vector)
1092                         goto err_out;
1093                 q_vector->adapter = adapter;
1094                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1095                 q_vector->itr_val = IGB_START_ITR;
1096                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1097                 adapter->q_vector[v_idx] = q_vector;
1098         }
1099         return 0;
1100
1101 err_out:
1102         igb_free_q_vectors(adapter);
1103         return -ENOMEM;
1104 }
1105
1106 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1107                                       int ring_idx, int v_idx)
1108 {
1109         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1110
1111         q_vector->rx_ring = adapter->rx_ring[ring_idx];
1112         q_vector->rx_ring->q_vector = q_vector;
1113         q_vector->itr_val = adapter->rx_itr_setting;
1114         if (q_vector->itr_val && q_vector->itr_val <= 3)
1115                 q_vector->itr_val = IGB_START_ITR;
1116 }
1117
1118 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1119                                       int ring_idx, int v_idx)
1120 {
1121         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1122
1123         q_vector->tx_ring = adapter->tx_ring[ring_idx];
1124         q_vector->tx_ring->q_vector = q_vector;
1125         q_vector->itr_val = adapter->tx_itr_setting;
1126         if (q_vector->itr_val && q_vector->itr_val <= 3)
1127                 q_vector->itr_val = IGB_START_ITR;
1128 }
1129
1130 /**
1131  * igb_map_ring_to_vector - maps allocated queues to vectors
1132  *
1133  * This function maps the recently allocated queues to vectors.
1134  **/
1135 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1136 {
1137         int i;
1138         int v_idx = 0;
1139
1140         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1141             (adapter->num_q_vectors < adapter->num_tx_queues))
1142                 return -ENOMEM;
1143
1144         if (adapter->num_q_vectors >=
1145             (adapter->num_rx_queues + adapter->num_tx_queues)) {
1146                 for (i = 0; i < adapter->num_rx_queues; i++)
1147                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1148                 for (i = 0; i < adapter->num_tx_queues; i++)
1149                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1150         } else {
1151                 for (i = 0; i < adapter->num_rx_queues; i++) {
1152                         if (i < adapter->num_tx_queues)
1153                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1154                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1155                 }
1156                 for (; i < adapter->num_tx_queues; i++)
1157                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1158         }
1159         return 0;
1160 }
1161
1162 /**
1163  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1164  *
1165  * This function initializes the interrupts and allocates all of the queues.
1166  **/
1167 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1168 {
1169         struct pci_dev *pdev = adapter->pdev;
1170         int err;
1171
1172         err = igb_set_interrupt_capability(adapter);
1173         if (err)
1174                 return err;
1175
1176         err = igb_alloc_q_vectors(adapter);
1177         if (err) {
1178                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1179                 goto err_alloc_q_vectors;
1180         }
1181
1182         err = igb_alloc_queues(adapter);
1183         if (err) {
1184                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1185                 goto err_alloc_queues;
1186         }
1187
1188         err = igb_map_ring_to_vector(adapter);
1189         if (err) {
1190                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1191                 goto err_map_queues;
1192         }
1193
1194
1195         return 0;
1196 err_map_queues:
1197         igb_free_queues(adapter);
1198 err_alloc_queues:
1199         igb_free_q_vectors(adapter);
1200 err_alloc_q_vectors:
1201         igb_reset_interrupt_capability(adapter);
1202         return err;
1203 }
1204
1205 /**
1206  * igb_request_irq - initialize interrupts
1207  *
1208  * Attempts to configure interrupts using the best available
1209  * capabilities of the hardware and kernel.
1210  **/
1211 static int igb_request_irq(struct igb_adapter *adapter)
1212 {
1213         struct net_device *netdev = adapter->netdev;
1214         struct pci_dev *pdev = adapter->pdev;
1215         int err = 0;
1216
1217         if (adapter->msix_entries) {
1218                 err = igb_request_msix(adapter);
1219                 if (!err)
1220                         goto request_done;
1221                 /* fall back to MSI */
1222                 igb_clear_interrupt_scheme(adapter);
1223                 if (!pci_enable_msi(adapter->pdev))
1224                         adapter->flags |= IGB_FLAG_HAS_MSI;
1225                 igb_free_all_tx_resources(adapter);
1226                 igb_free_all_rx_resources(adapter);
1227                 adapter->num_tx_queues = 1;
1228                 adapter->num_rx_queues = 1;
1229                 adapter->num_q_vectors = 1;
1230                 err = igb_alloc_q_vectors(adapter);
1231                 if (err) {
1232                         dev_err(&pdev->dev,
1233                                 "Unable to allocate memory for vectors\n");
1234                         goto request_done;
1235                 }
1236                 err = igb_alloc_queues(adapter);
1237                 if (err) {
1238                         dev_err(&pdev->dev,
1239                                 "Unable to allocate memory for queues\n");
1240                         igb_free_q_vectors(adapter);
1241                         goto request_done;
1242                 }
1243                 igb_setup_all_tx_resources(adapter);
1244                 igb_setup_all_rx_resources(adapter);
1245         } else {
1246                 igb_assign_vector(adapter->q_vector[0], 0);
1247         }
1248
1249         if (adapter->flags & IGB_FLAG_HAS_MSI) {
1250                 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1251                                   netdev->name, adapter);
1252                 if (!err)
1253                         goto request_done;
1254
1255                 /* fall back to legacy interrupts */
1256                 igb_reset_interrupt_capability(adapter);
1257                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1258         }
1259
1260         err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1261                           netdev->name, adapter);
1262
1263         if (err)
1264                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1265                         err);
1266
1267 request_done:
1268         return err;
1269 }
1270
1271 static void igb_free_irq(struct igb_adapter *adapter)
1272 {
1273         if (adapter->msix_entries) {
1274                 int vector = 0, i;
1275
1276                 free_irq(adapter->msix_entries[vector++].vector, adapter);
1277
1278                 for (i = 0; i < adapter->num_q_vectors; i++) {
1279                         struct igb_q_vector *q_vector = adapter->q_vector[i];
1280                         free_irq(adapter->msix_entries[vector++].vector,
1281                                  q_vector);
1282                 }
1283         } else {
1284                 free_irq(adapter->pdev->irq, adapter);
1285         }
1286 }
1287
1288 /**
1289  * igb_irq_disable - Mask off interrupt generation on the NIC
1290  * @adapter: board private structure
1291  **/
1292 static void igb_irq_disable(struct igb_adapter *adapter)
1293 {
1294         struct e1000_hw *hw = &adapter->hw;
1295
1296         /*
1297          * we need to be careful when disabling interrupts.  The VFs are also
1298          * mapped into these registers and so clearing the bits can cause
1299          * issues on the VF drivers so we only need to clear what we set
1300          */
1301         if (adapter->msix_entries) {
1302                 u32 regval = rd32(E1000_EIAM);
1303                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1304                 wr32(E1000_EIMC, adapter->eims_enable_mask);
1305                 regval = rd32(E1000_EIAC);
1306                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1307         }
1308
1309         wr32(E1000_IAM, 0);
1310         wr32(E1000_IMC, ~0);
1311         wrfl();
1312         if (adapter->msix_entries) {
1313                 int i;
1314                 for (i = 0; i < adapter->num_q_vectors; i++)
1315                         synchronize_irq(adapter->msix_entries[i].vector);
1316         } else {
1317                 synchronize_irq(adapter->pdev->irq);
1318         }
1319 }
1320
1321 /**
1322  * igb_irq_enable - Enable default interrupt generation settings
1323  * @adapter: board private structure
1324  **/
1325 static void igb_irq_enable(struct igb_adapter *adapter)
1326 {
1327         struct e1000_hw *hw = &adapter->hw;
1328
1329         if (adapter->msix_entries) {
1330                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1331                 u32 regval = rd32(E1000_EIAC);
1332                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1333                 regval = rd32(E1000_EIAM);
1334                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1335                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1336                 if (adapter->vfs_allocated_count) {
1337                         wr32(E1000_MBVFIMR, 0xFF);
1338                         ims |= E1000_IMS_VMMB;
1339                 }
1340                 if (adapter->hw.mac.type == e1000_82580)
1341                         ims |= E1000_IMS_DRSTA;
1342
1343                 wr32(E1000_IMS, ims);
1344         } else {
1345                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1346                                 E1000_IMS_DRSTA);
1347                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1348                                 E1000_IMS_DRSTA);
1349         }
1350 }
1351
1352 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1353 {
1354         struct e1000_hw *hw = &adapter->hw;
1355         u16 vid = adapter->hw.mng_cookie.vlan_id;
1356         u16 old_vid = adapter->mng_vlan_id;
1357
1358         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1359                 /* add VID to filter table */
1360                 igb_vfta_set(hw, vid, true);
1361                 adapter->mng_vlan_id = vid;
1362         } else {
1363                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1364         }
1365
1366         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1367             (vid != old_vid) &&
1368             !test_bit(old_vid, adapter->active_vlans)) {
1369                 /* remove VID from filter table */
1370                 igb_vfta_set(hw, old_vid, false);
1371         }
1372 }
1373
1374 /**
1375  * igb_release_hw_control - release control of the h/w to f/w
1376  * @adapter: address of board private structure
1377  *
1378  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1379  * For ASF and Pass Through versions of f/w this means that the
1380  * driver is no longer loaded.
1381  *
1382  **/
1383 static void igb_release_hw_control(struct igb_adapter *adapter)
1384 {
1385         struct e1000_hw *hw = &adapter->hw;
1386         u32 ctrl_ext;
1387
1388         /* Let firmware take over control of h/w */
1389         ctrl_ext = rd32(E1000_CTRL_EXT);
1390         wr32(E1000_CTRL_EXT,
1391                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1392 }
1393
1394 /**
1395  * igb_get_hw_control - get control of the h/w from f/w
1396  * @adapter: address of board private structure
1397  *
1398  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1399  * For ASF and Pass Through versions of f/w this means that
1400  * the driver is loaded.
1401  *
1402  **/
1403 static void igb_get_hw_control(struct igb_adapter *adapter)
1404 {
1405         struct e1000_hw *hw = &adapter->hw;
1406         u32 ctrl_ext;
1407
1408         /* Let firmware know the driver has taken over */
1409         ctrl_ext = rd32(E1000_CTRL_EXT);
1410         wr32(E1000_CTRL_EXT,
1411                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1412 }
1413
1414 /**
1415  * igb_configure - configure the hardware for RX and TX
1416  * @adapter: private board structure
1417  **/
1418 static void igb_configure(struct igb_adapter *adapter)
1419 {
1420         struct net_device *netdev = adapter->netdev;
1421         int i;
1422
1423         igb_get_hw_control(adapter);
1424         igb_set_rx_mode(netdev);
1425
1426         igb_restore_vlan(adapter);
1427
1428         igb_setup_tctl(adapter);
1429         igb_setup_mrqc(adapter);
1430         igb_setup_rctl(adapter);
1431
1432         igb_configure_tx(adapter);
1433         igb_configure_rx(adapter);
1434
1435         igb_rx_fifo_flush_82575(&adapter->hw);
1436
1437         /* call igb_desc_unused which always leaves
1438          * at least 1 descriptor unused to make sure
1439          * next_to_use != next_to_clean */
1440         for (i = 0; i < adapter->num_rx_queues; i++) {
1441                 struct igb_ring *ring = adapter->rx_ring[i];
1442                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1443         }
1444 }
1445
1446 /**
1447  * igb_power_up_link - Power up the phy/serdes link
1448  * @adapter: address of board private structure
1449  **/
1450 void igb_power_up_link(struct igb_adapter *adapter)
1451 {
1452         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1453                 igb_power_up_phy_copper(&adapter->hw);
1454         else
1455                 igb_power_up_serdes_link_82575(&adapter->hw);
1456 }
1457
1458 /**
1459  * igb_power_down_link - Power down the phy/serdes link
1460  * @adapter: address of board private structure
1461  */
1462 static void igb_power_down_link(struct igb_adapter *adapter)
1463 {
1464         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1465                 igb_power_down_phy_copper_82575(&adapter->hw);
1466         else
1467                 igb_shutdown_serdes_link_82575(&adapter->hw);
1468 }
1469
1470 /**
1471  * igb_up - Open the interface and prepare it to handle traffic
1472  * @adapter: board private structure
1473  **/
1474 int igb_up(struct igb_adapter *adapter)
1475 {
1476         struct e1000_hw *hw = &adapter->hw;
1477         int i;
1478
1479         /* hardware has been reset, we need to reload some things */
1480         igb_configure(adapter);
1481
1482         clear_bit(__IGB_DOWN, &adapter->state);
1483
1484         for (i = 0; i < adapter->num_q_vectors; i++) {
1485                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1486                 napi_enable(&q_vector->napi);
1487         }
1488         if (adapter->msix_entries)
1489                 igb_configure_msix(adapter);
1490         else
1491                 igb_assign_vector(adapter->q_vector[0], 0);
1492
1493         /* Clear any pending interrupts. */
1494         rd32(E1000_ICR);
1495         igb_irq_enable(adapter);
1496
1497         /* notify VFs that reset has been completed */
1498         if (adapter->vfs_allocated_count) {
1499                 u32 reg_data = rd32(E1000_CTRL_EXT);
1500                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1501                 wr32(E1000_CTRL_EXT, reg_data);
1502         }
1503
1504         netif_tx_start_all_queues(adapter->netdev);
1505
1506         /* start the watchdog. */
1507         hw->mac.get_link_status = 1;
1508         schedule_work(&adapter->watchdog_task);
1509
1510         return 0;
1511 }
1512
1513 void igb_down(struct igb_adapter *adapter)
1514 {
1515         struct net_device *netdev = adapter->netdev;
1516         struct e1000_hw *hw = &adapter->hw;
1517         u32 tctl, rctl;
1518         int i;
1519
1520         /* signal that we're down so the interrupt handler does not
1521          * reschedule our watchdog timer */
1522         set_bit(__IGB_DOWN, &adapter->state);
1523
1524         /* disable receives in the hardware */
1525         rctl = rd32(E1000_RCTL);
1526         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1527         /* flush and sleep below */
1528
1529         netif_tx_stop_all_queues(netdev);
1530
1531         /* disable transmits in the hardware */
1532         tctl = rd32(E1000_TCTL);
1533         tctl &= ~E1000_TCTL_EN;
1534         wr32(E1000_TCTL, tctl);
1535         /* flush both disables and wait for them to finish */
1536         wrfl();
1537         msleep(10);
1538
1539         for (i = 0; i < adapter->num_q_vectors; i++) {
1540                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1541                 napi_disable(&q_vector->napi);
1542         }
1543
1544         igb_irq_disable(adapter);
1545
1546         del_timer_sync(&adapter->watchdog_timer);
1547         del_timer_sync(&adapter->phy_info_timer);
1548
1549         netif_carrier_off(netdev);
1550
1551         /* record the stats before reset*/
1552         spin_lock(&adapter->stats64_lock);
1553         igb_update_stats(adapter, &adapter->stats64);
1554         spin_unlock(&adapter->stats64_lock);
1555
1556         adapter->link_speed = 0;
1557         adapter->link_duplex = 0;
1558
1559         if (!pci_channel_offline(adapter->pdev))
1560                 igb_reset(adapter);
1561         igb_clean_all_tx_rings(adapter);
1562         igb_clean_all_rx_rings(adapter);
1563 #ifdef CONFIG_IGB_DCA
1564
1565         /* since we reset the hardware DCA settings were cleared */
1566         igb_setup_dca(adapter);
1567 #endif
1568 }
1569
1570 void igb_reinit_locked(struct igb_adapter *adapter)
1571 {
1572         WARN_ON(in_interrupt());
1573         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1574                 msleep(1);
1575         igb_down(adapter);
1576         igb_up(adapter);
1577         clear_bit(__IGB_RESETTING, &adapter->state);
1578 }
1579
1580 void igb_reset(struct igb_adapter *adapter)
1581 {
1582         struct pci_dev *pdev = adapter->pdev;
1583         struct e1000_hw *hw = &adapter->hw;
1584         struct e1000_mac_info *mac = &hw->mac;
1585         struct e1000_fc_info *fc = &hw->fc;
1586         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1587         u16 hwm;
1588
1589         /* Repartition Pba for greater than 9k mtu
1590          * To take effect CTRL.RST is required.
1591          */
1592         switch (mac->type) {
1593         case e1000_i350:
1594         case e1000_82580:
1595                 pba = rd32(E1000_RXPBS);
1596                 pba = igb_rxpbs_adjust_82580(pba);
1597                 break;
1598         case e1000_82576:
1599                 pba = rd32(E1000_RXPBS);
1600                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1601                 break;
1602         case e1000_82575:
1603         default:
1604                 pba = E1000_PBA_34K;
1605                 break;
1606         }
1607
1608         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1609             (mac->type < e1000_82576)) {
1610                 /* adjust PBA for jumbo frames */
1611                 wr32(E1000_PBA, pba);
1612
1613                 /* To maintain wire speed transmits, the Tx FIFO should be
1614                  * large enough to accommodate two full transmit packets,
1615                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1616                  * the Rx FIFO should be large enough to accommodate at least
1617                  * one full receive packet and is similarly rounded up and
1618                  * expressed in KB. */
1619                 pba = rd32(E1000_PBA);
1620                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1621                 tx_space = pba >> 16;
1622                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1623                 pba &= 0xffff;
1624                 /* the tx fifo also stores 16 bytes of information about the tx
1625                  * but don't include ethernet FCS because hardware appends it */
1626                 min_tx_space = (adapter->max_frame_size +
1627                                 sizeof(union e1000_adv_tx_desc) -
1628                                 ETH_FCS_LEN) * 2;
1629                 min_tx_space = ALIGN(min_tx_space, 1024);
1630                 min_tx_space >>= 10;
1631                 /* software strips receive CRC, so leave room for it */
1632                 min_rx_space = adapter->max_frame_size;
1633                 min_rx_space = ALIGN(min_rx_space, 1024);
1634                 min_rx_space >>= 10;
1635
1636                 /* If current Tx allocation is less than the min Tx FIFO size,
1637                  * and the min Tx FIFO size is less than the current Rx FIFO
1638                  * allocation, take space away from current Rx allocation */
1639                 if (tx_space < min_tx_space &&
1640                     ((min_tx_space - tx_space) < pba)) {
1641                         pba = pba - (min_tx_space - tx_space);
1642
1643                         /* if short on rx space, rx wins and must trump tx
1644                          * adjustment */
1645                         if (pba < min_rx_space)
1646                                 pba = min_rx_space;
1647                 }
1648                 wr32(E1000_PBA, pba);
1649         }
1650
1651         /* flow control settings */
1652         /* The high water mark must be low enough to fit one full frame
1653          * (or the size used for early receive) above it in the Rx FIFO.
1654          * Set it to the lower of:
1655          * - 90% of the Rx FIFO size, or
1656          * - the full Rx FIFO size minus one full frame */
1657         hwm = min(((pba << 10) * 9 / 10),
1658                         ((pba << 10) - 2 * adapter->max_frame_size));
1659
1660         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1661         fc->low_water = fc->high_water - 16;
1662         fc->pause_time = 0xFFFF;
1663         fc->send_xon = 1;
1664         fc->current_mode = fc->requested_mode;
1665
1666         /* disable receive for all VFs and wait one second */
1667         if (adapter->vfs_allocated_count) {
1668                 int i;
1669                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1670                         adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1671
1672                 /* ping all the active vfs to let them know we are going down */
1673                 igb_ping_all_vfs(adapter);
1674
1675                 /* disable transmits and receives */
1676                 wr32(E1000_VFRE, 0);
1677                 wr32(E1000_VFTE, 0);
1678         }
1679
1680         /* Allow time for pending master requests to run */
1681         hw->mac.ops.reset_hw(hw);
1682         wr32(E1000_WUC, 0);
1683
1684         if (hw->mac.ops.init_hw(hw))
1685                 dev_err(&pdev->dev, "Hardware Error\n");
1686         if (hw->mac.type > e1000_82580) {
1687                 if (adapter->flags & IGB_FLAG_DMAC) {
1688                         u32 reg;
1689
1690                         /*
1691                          * DMA Coalescing high water mark needs to be higher
1692                          * than * the * Rx threshold.  The Rx threshold is
1693                          * currently * pba - 6, so we * should use a high water
1694                          * mark of pba * - 4. */
1695                         hwm = (pba - 4) << 10;
1696
1697                         reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
1698                                & E1000_DMACR_DMACTHR_MASK);
1699
1700                         /* transition to L0x or L1 if available..*/
1701                         reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
1702
1703                         /* watchdog timer= +-1000 usec in 32usec intervals */
1704                         reg |= (1000 >> 5);
1705                         wr32(E1000_DMACR, reg);
1706
1707                         /* no lower threshold to disable coalescing(smart fifb)
1708                          * -UTRESH=0*/
1709                         wr32(E1000_DMCRTRH, 0);
1710
1711                         /* set hwm to PBA -  2 * max frame size */
1712                         wr32(E1000_FCRTC, hwm);
1713
1714                         /*
1715                          * This sets the time to wait before requesting tran-
1716                          * sition to * low power state to number of usecs needed
1717                          * to receive 1 512 * byte frame at gigabit line rate
1718                          */
1719                         reg = rd32(E1000_DMCTLX);
1720                         reg |= IGB_DMCTLX_DCFLUSH_DIS;
1721
1722                         /* Delay 255 usec before entering Lx state. */
1723                         reg |= 0xFF;
1724                         wr32(E1000_DMCTLX, reg);
1725
1726                         /* free space in Tx packet buffer to wake from DMAC */
1727                         wr32(E1000_DMCTXTH,
1728                              (IGB_MIN_TXPBSIZE -
1729                              (IGB_TX_BUF_4096 + adapter->max_frame_size))
1730                              >> 6);
1731
1732                         /* make low power state decision controlled by DMAC */
1733                         reg = rd32(E1000_PCIEMISC);
1734                         reg |= E1000_PCIEMISC_LX_DECISION;
1735                         wr32(E1000_PCIEMISC, reg);
1736                 } /* end if IGB_FLAG_DMAC set */
1737         }
1738         if (hw->mac.type == e1000_82580) {
1739                 u32 reg = rd32(E1000_PCIEMISC);
1740                 wr32(E1000_PCIEMISC,
1741                                 reg & ~E1000_PCIEMISC_LX_DECISION);
1742         }
1743         if (!netif_running(adapter->netdev))
1744                 igb_power_down_link(adapter);
1745
1746         igb_update_mng_vlan(adapter);
1747
1748         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1749         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1750
1751         igb_get_phy_info(hw);
1752 }
1753
1754 static u32 igb_fix_features(struct net_device *netdev, u32 features)
1755 {
1756         /*
1757          * Since there is no support for separate rx/tx vlan accel
1758          * enable/disable make sure tx flag is always in same state as rx.
1759          */
1760         if (features & NETIF_F_HW_VLAN_RX)
1761                 features |= NETIF_F_HW_VLAN_TX;
1762         else
1763                 features &= ~NETIF_F_HW_VLAN_TX;
1764
1765         return features;
1766 }
1767
1768 static int igb_set_features(struct net_device *netdev, u32 features)
1769 {
1770         struct igb_adapter *adapter = netdev_priv(netdev);
1771         int i;
1772         u32 changed = netdev->features ^ features;
1773
1774         for (i = 0; i < adapter->num_rx_queues; i++) {
1775                 if (features & NETIF_F_RXCSUM)
1776                         adapter->rx_ring[i]->flags |= IGB_RING_FLAG_RX_CSUM;
1777                 else
1778                         adapter->rx_ring[i]->flags &= ~IGB_RING_FLAG_RX_CSUM;
1779         }
1780
1781         if (changed & NETIF_F_HW_VLAN_RX)
1782                 igb_vlan_mode(netdev, features);
1783
1784         return 0;
1785 }
1786
1787 static const struct net_device_ops igb_netdev_ops = {
1788         .ndo_open               = igb_open,
1789         .ndo_stop               = igb_close,
1790         .ndo_start_xmit         = igb_xmit_frame_adv,
1791         .ndo_get_stats64        = igb_get_stats64,
1792         .ndo_set_rx_mode        = igb_set_rx_mode,
1793         .ndo_set_mac_address    = igb_set_mac,
1794         .ndo_change_mtu         = igb_change_mtu,
1795         .ndo_do_ioctl           = igb_ioctl,
1796         .ndo_tx_timeout         = igb_tx_timeout,
1797         .ndo_validate_addr      = eth_validate_addr,
1798         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1799         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1800         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1801         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1802         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1803         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1804 #ifdef CONFIG_NET_POLL_CONTROLLER
1805         .ndo_poll_controller    = igb_netpoll,
1806 #endif
1807         .ndo_fix_features       = igb_fix_features,
1808         .ndo_set_features       = igb_set_features,
1809 };
1810
1811 /**
1812  * igb_probe - Device Initialization Routine
1813  * @pdev: PCI device information struct
1814  * @ent: entry in igb_pci_tbl
1815  *
1816  * Returns 0 on success, negative on failure
1817  *
1818  * igb_probe initializes an adapter identified by a pci_dev structure.
1819  * The OS initialization, configuring of the adapter private structure,
1820  * and a hardware reset occur.
1821  **/
1822 static int __devinit igb_probe(struct pci_dev *pdev,
1823                                const struct pci_device_id *ent)
1824 {
1825         struct net_device *netdev;
1826         struct igb_adapter *adapter;
1827         struct e1000_hw *hw;
1828         u16 eeprom_data = 0;
1829         s32 ret_val;
1830         static int global_quad_port_a; /* global quad port a indication */
1831         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1832         unsigned long mmio_start, mmio_len;
1833         int err, pci_using_dac;
1834         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1835         u8 part_str[E1000_PBANUM_LENGTH];
1836
1837         /* Catch broken hardware that put the wrong VF device ID in
1838          * the PCIe SR-IOV capability.
1839          */
1840         if (pdev->is_virtfn) {
1841                 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1842                      pci_name(pdev), pdev->vendor, pdev->device);
1843                 return -EINVAL;
1844         }
1845
1846         err = pci_enable_device_mem(pdev);
1847         if (err)
1848                 return err;
1849
1850         pci_using_dac = 0;
1851         err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1852         if (!err) {
1853                 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1854                 if (!err)
1855                         pci_using_dac = 1;
1856         } else {
1857                 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1858                 if (err) {
1859                         err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1860                         if (err) {
1861                                 dev_err(&pdev->dev, "No usable DMA "
1862                                         "configuration, aborting\n");
1863                                 goto err_dma;
1864                         }
1865                 }
1866         }
1867
1868         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1869                                            IORESOURCE_MEM),
1870                                            igb_driver_name);
1871         if (err)
1872                 goto err_pci_reg;
1873
1874         pci_enable_pcie_error_reporting(pdev);
1875
1876         pci_set_master(pdev);
1877         pci_save_state(pdev);
1878
1879         err = -ENOMEM;
1880         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1881                                    IGB_ABS_MAX_TX_QUEUES);
1882         if (!netdev)
1883                 goto err_alloc_etherdev;
1884
1885         SET_NETDEV_DEV(netdev, &pdev->dev);
1886
1887         pci_set_drvdata(pdev, netdev);
1888         adapter = netdev_priv(netdev);
1889         adapter->netdev = netdev;
1890         adapter->pdev = pdev;
1891         hw = &adapter->hw;
1892         hw->back = adapter;
1893         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1894
1895         mmio_start = pci_resource_start(pdev, 0);
1896         mmio_len = pci_resource_len(pdev, 0);
1897
1898         err = -EIO;
1899         hw->hw_addr = ioremap(mmio_start, mmio_len);
1900         if (!hw->hw_addr)
1901                 goto err_ioremap;
1902
1903         netdev->netdev_ops = &igb_netdev_ops;
1904         igb_set_ethtool_ops(netdev);
1905         netdev->watchdog_timeo = 5 * HZ;
1906
1907         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1908
1909         netdev->mem_start = mmio_start;
1910         netdev->mem_end = mmio_start + mmio_len;
1911
1912         /* PCI config space info */
1913         hw->vendor_id = pdev->vendor;
1914         hw->device_id = pdev->device;
1915         hw->revision_id = pdev->revision;
1916         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1917         hw->subsystem_device_id = pdev->subsystem_device;
1918
1919         /* Copy the default MAC, PHY and NVM function pointers */
1920         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1921         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1922         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1923         /* Initialize skew-specific constants */
1924         err = ei->get_invariants(hw);
1925         if (err)
1926                 goto err_sw_init;
1927
1928         /* setup the private structure */
1929         err = igb_sw_init(adapter);
1930         if (err)
1931                 goto err_sw_init;
1932
1933         igb_get_bus_info_pcie(hw);
1934
1935         hw->phy.autoneg_wait_to_complete = false;
1936
1937         /* Copper options */
1938         if (hw->phy.media_type == e1000_media_type_copper) {
1939                 hw->phy.mdix = AUTO_ALL_MODES;
1940                 hw->phy.disable_polarity_correction = false;
1941                 hw->phy.ms_type = e1000_ms_hw_default;
1942         }
1943
1944         if (igb_check_reset_block(hw))
1945                 dev_info(&pdev->dev,
1946                         "PHY reset is blocked due to SOL/IDER session.\n");
1947
1948         netdev->hw_features = NETIF_F_SG |
1949                            NETIF_F_IP_CSUM |
1950                            NETIF_F_IPV6_CSUM |
1951                            NETIF_F_TSO |
1952                            NETIF_F_TSO6 |
1953                            NETIF_F_RXCSUM |
1954                            NETIF_F_HW_VLAN_RX;
1955
1956         netdev->features = netdev->hw_features |
1957                            NETIF_F_HW_VLAN_TX |
1958                            NETIF_F_HW_VLAN_FILTER;
1959
1960         netdev->vlan_features |= NETIF_F_TSO;
1961         netdev->vlan_features |= NETIF_F_TSO6;
1962         netdev->vlan_features |= NETIF_F_IP_CSUM;
1963         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1964         netdev->vlan_features |= NETIF_F_SG;
1965
1966         if (pci_using_dac) {
1967                 netdev->features |= NETIF_F_HIGHDMA;
1968                 netdev->vlan_features |= NETIF_F_HIGHDMA;
1969         }
1970
1971         if (hw->mac.type >= e1000_82576) {
1972                 netdev->hw_features |= NETIF_F_SCTP_CSUM;
1973                 netdev->features |= NETIF_F_SCTP_CSUM;
1974         }
1975
1976         netdev->priv_flags |= IFF_UNICAST_FLT;
1977
1978         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1979
1980         /* before reading the NVM, reset the controller to put the device in a
1981          * known good starting state */
1982         hw->mac.ops.reset_hw(hw);
1983
1984         /* make sure the NVM is good */
1985         if (hw->nvm.ops.validate(hw) < 0) {
1986                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1987                 err = -EIO;
1988                 goto err_eeprom;
1989         }
1990
1991         /* copy the MAC address out of the NVM */
1992         if (hw->mac.ops.read_mac_addr(hw))
1993                 dev_err(&pdev->dev, "NVM Read Error\n");
1994
1995         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1996         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1997
1998         if (!is_valid_ether_addr(netdev->perm_addr)) {
1999                 dev_err(&pdev->dev, "Invalid MAC Address\n");
2000                 err = -EIO;
2001                 goto err_eeprom;
2002         }
2003
2004         setup_timer(&adapter->watchdog_timer, igb_watchdog,
2005                     (unsigned long) adapter);
2006         setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2007                     (unsigned long) adapter);
2008
2009         INIT_WORK(&adapter->reset_task, igb_reset_task);
2010         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2011
2012         /* Initialize link properties that are user-changeable */
2013         adapter->fc_autoneg = true;
2014         hw->mac.autoneg = true;
2015         hw->phy.autoneg_advertised = 0x2f;
2016
2017         hw->fc.requested_mode = e1000_fc_default;
2018         hw->fc.current_mode = e1000_fc_default;
2019
2020         igb_validate_mdi_setting(hw);
2021
2022         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2023          * enable the ACPI Magic Packet filter
2024          */
2025
2026         if (hw->bus.func == 0)
2027                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2028         else if (hw->mac.type >= e1000_82580)
2029                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2030                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2031                                  &eeprom_data);
2032         else if (hw->bus.func == 1)
2033                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2034
2035         if (eeprom_data & eeprom_apme_mask)
2036                 adapter->eeprom_wol |= E1000_WUFC_MAG;
2037
2038         /* now that we have the eeprom settings, apply the special cases where
2039          * the eeprom may be wrong or the board simply won't support wake on
2040          * lan on a particular port */
2041         switch (pdev->device) {
2042         case E1000_DEV_ID_82575GB_QUAD_COPPER:
2043                 adapter->eeprom_wol = 0;
2044                 break;
2045         case E1000_DEV_ID_82575EB_FIBER_SERDES:
2046         case E1000_DEV_ID_82576_FIBER:
2047         case E1000_DEV_ID_82576_SERDES:
2048                 /* Wake events only supported on port A for dual fiber
2049                  * regardless of eeprom setting */
2050                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2051                         adapter->eeprom_wol = 0;
2052                 break;
2053         case E1000_DEV_ID_82576_QUAD_COPPER:
2054         case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2055                 /* if quad port adapter, disable WoL on all but port A */
2056                 if (global_quad_port_a != 0)
2057                         adapter->eeprom_wol = 0;
2058                 else
2059                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2060                 /* Reset for multiple quad port adapters */
2061                 if (++global_quad_port_a == 4)
2062                         global_quad_port_a = 0;
2063                 break;
2064         }
2065
2066         /* initialize the wol settings based on the eeprom settings */
2067         adapter->wol = adapter->eeprom_wol;
2068         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2069
2070         /* reset the hardware with the new settings */
2071         igb_reset(adapter);
2072
2073         /* let the f/w know that the h/w is now under the control of the
2074          * driver. */
2075         igb_get_hw_control(adapter);
2076
2077         strcpy(netdev->name, "eth%d");
2078         err = register_netdev(netdev);
2079         if (err)
2080                 goto err_register;
2081
2082         igb_vlan_mode(netdev, netdev->features);
2083
2084         /* carrier off reporting is important to ethtool even BEFORE open */
2085         netif_carrier_off(netdev);
2086
2087 #ifdef CONFIG_IGB_DCA
2088         if (dca_add_requester(&pdev->dev) == 0) {
2089                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2090                 dev_info(&pdev->dev, "DCA enabled\n");
2091                 igb_setup_dca(adapter);
2092         }
2093
2094 #endif
2095         /* do hw tstamp init after resetting */
2096         igb_init_hw_timer(adapter);
2097
2098         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2099         /* print bus type/speed/width info */
2100         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2101                  netdev->name,
2102                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2103                   (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2104                                                             "unknown"),
2105                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2106                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2107                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2108                    "unknown"),
2109                  netdev->dev_addr);
2110
2111         ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2112         if (ret_val)
2113                 strcpy(part_str, "Unknown");
2114         dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2115         dev_info(&pdev->dev,
2116                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2117                 adapter->msix_entries ? "MSI-X" :
2118                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2119                 adapter->num_rx_queues, adapter->num_tx_queues);
2120         switch (hw->mac.type) {
2121         case e1000_i350:
2122                 igb_set_eee_i350(hw);
2123                 break;
2124         default:
2125                 break;
2126         }
2127         return 0;
2128
2129 err_register:
2130         igb_release_hw_control(adapter);
2131 err_eeprom:
2132         if (!igb_check_reset_block(hw))
2133                 igb_reset_phy(hw);
2134
2135         if (hw->flash_address)
2136                 iounmap(hw->flash_address);
2137 err_sw_init:
2138         igb_clear_interrupt_scheme(adapter);
2139         iounmap(hw->hw_addr);
2140 err_ioremap:
2141         free_netdev(netdev);
2142 err_alloc_etherdev:
2143         pci_release_selected_regions(pdev,
2144                                      pci_select_bars(pdev, IORESOURCE_MEM));
2145 err_pci_reg:
2146 err_dma:
2147         pci_disable_device(pdev);
2148         return err;
2149 }
2150
2151 /**
2152  * igb_remove - Device Removal Routine
2153  * @pdev: PCI device information struct
2154  *
2155  * igb_remove is called by the PCI subsystem to alert the driver
2156  * that it should release a PCI device.  The could be caused by a
2157  * Hot-Plug event, or because the driver is going to be removed from
2158  * memory.
2159  **/
2160 static void __devexit igb_remove(struct pci_dev *pdev)
2161 {
2162         struct net_device *netdev = pci_get_drvdata(pdev);
2163         struct igb_adapter *adapter = netdev_priv(netdev);
2164         struct e1000_hw *hw = &adapter->hw;
2165
2166         /*
2167          * The watchdog timer may be rescheduled, so explicitly
2168          * disable watchdog from being rescheduled.
2169          */
2170         set_bit(__IGB_DOWN, &adapter->state);
2171         del_timer_sync(&adapter->watchdog_timer);
2172         del_timer_sync(&adapter->phy_info_timer);
2173
2174         cancel_work_sync(&adapter->reset_task);
2175         cancel_work_sync(&adapter->watchdog_task);
2176
2177 #ifdef CONFIG_IGB_DCA
2178         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2179                 dev_info(&pdev->dev, "DCA disabled\n");
2180                 dca_remove_requester(&pdev->dev);
2181                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2182                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2183         }
2184 #endif
2185
2186         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
2187          * would have already happened in close and is redundant. */
2188         igb_release_hw_control(adapter);
2189
2190         unregister_netdev(netdev);
2191
2192         igb_clear_interrupt_scheme(adapter);
2193
2194 #ifdef CONFIG_PCI_IOV
2195         /* reclaim resources allocated to VFs */
2196         if (adapter->vf_data) {
2197                 /* disable iov and allow time for transactions to clear */
2198                 pci_disable_sriov(pdev);
2199                 msleep(500);
2200
2201                 kfree(adapter->vf_data);
2202                 adapter->vf_data = NULL;
2203                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2204                 wrfl();
2205                 msleep(100);
2206                 dev_info(&pdev->dev, "IOV Disabled\n");
2207         }
2208 #endif
2209
2210         iounmap(hw->hw_addr);
2211         if (hw->flash_address)
2212                 iounmap(hw->flash_address);
2213         pci_release_selected_regions(pdev,
2214                                      pci_select_bars(pdev, IORESOURCE_MEM));
2215
2216         free_netdev(netdev);
2217
2218         pci_disable_pcie_error_reporting(pdev);
2219
2220         pci_disable_device(pdev);
2221 }
2222
2223 /**
2224  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2225  * @adapter: board private structure to initialize
2226  *
2227  * This function initializes the vf specific data storage and then attempts to
2228  * allocate the VFs.  The reason for ordering it this way is because it is much
2229  * mor expensive time wise to disable SR-IOV than it is to allocate and free
2230  * the memory for the VFs.
2231  **/
2232 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2233 {
2234 #ifdef CONFIG_PCI_IOV
2235         struct pci_dev *pdev = adapter->pdev;
2236
2237         if (adapter->vfs_allocated_count) {
2238                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2239                                            sizeof(struct vf_data_storage),
2240                                            GFP_KERNEL);
2241                 /* if allocation failed then we do not support SR-IOV */
2242                 if (!adapter->vf_data) {
2243                         adapter->vfs_allocated_count = 0;
2244                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
2245                                 "Data Storage\n");
2246                 }
2247         }
2248
2249         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2250                 kfree(adapter->vf_data);
2251                 adapter->vf_data = NULL;
2252 #endif /* CONFIG_PCI_IOV */
2253                 adapter->vfs_allocated_count = 0;
2254 #ifdef CONFIG_PCI_IOV
2255         } else {
2256                 unsigned char mac_addr[ETH_ALEN];
2257                 int i;
2258                 dev_info(&pdev->dev, "%d vfs allocated\n",
2259                          adapter->vfs_allocated_count);
2260                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2261                         random_ether_addr(mac_addr);
2262                         igb_set_vf_mac(adapter, i, mac_addr);
2263                 }
2264                 /* DMA Coalescing is not supported in IOV mode. */
2265                 if (adapter->flags & IGB_FLAG_DMAC)
2266                         adapter->flags &= ~IGB_FLAG_DMAC;
2267         }
2268 #endif /* CONFIG_PCI_IOV */
2269 }
2270
2271
2272 /**
2273  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2274  * @adapter: board private structure to initialize
2275  *
2276  * igb_init_hw_timer initializes the function pointer and values for the hw
2277  * timer found in hardware.
2278  **/
2279 static void igb_init_hw_timer(struct igb_adapter *adapter)
2280 {
2281         struct e1000_hw *hw = &adapter->hw;
2282
2283         switch (hw->mac.type) {
2284         case e1000_i350:
2285         case e1000_82580:
2286                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2287                 adapter->cycles.read = igb_read_clock;
2288                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2289                 adapter->cycles.mult = 1;
2290                 /*
2291                  * The 82580 timesync updates the system timer every 8ns by 8ns
2292                  * and the value cannot be shifted.  Instead we need to shift
2293                  * the registers to generate a 64bit timer value.  As a result
2294                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2295                  * 24 in order to generate a larger value for synchronization.
2296                  */
2297                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2298                 /* disable system timer temporarily by setting bit 31 */
2299                 wr32(E1000_TSAUXC, 0x80000000);
2300                 wrfl();
2301
2302                 /* Set registers so that rollover occurs soon to test this. */
2303                 wr32(E1000_SYSTIMR, 0x00000000);
2304                 wr32(E1000_SYSTIML, 0x80000000);
2305                 wr32(E1000_SYSTIMH, 0x000000FF);
2306                 wrfl();
2307
2308                 /* enable system timer by clearing bit 31 */
2309                 wr32(E1000_TSAUXC, 0x0);
2310                 wrfl();
2311
2312                 timecounter_init(&adapter->clock,
2313                                  &adapter->cycles,
2314                                  ktime_to_ns(ktime_get_real()));
2315                 /*
2316                  * Synchronize our NIC clock against system wall clock. NIC
2317                  * time stamp reading requires ~3us per sample, each sample
2318                  * was pretty stable even under load => only require 10
2319                  * samples for each offset comparison.
2320                  */
2321                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2322                 adapter->compare.source = &adapter->clock;
2323                 adapter->compare.target = ktime_get_real;
2324                 adapter->compare.num_samples = 10;
2325                 timecompare_update(&adapter->compare, 0);
2326                 break;
2327         case e1000_82576:
2328                 /*
2329                  * Initialize hardware timer: we keep it running just in case
2330                  * that some program needs it later on.
2331                  */
2332                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2333                 adapter->cycles.read = igb_read_clock;
2334                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2335                 adapter->cycles.mult = 1;
2336                 /**
2337                  * Scale the NIC clock cycle by a large factor so that
2338                  * relatively small clock corrections can be added or
2339                  * subtracted at each clock tick. The drawbacks of a large
2340                  * factor are a) that the clock register overflows more quickly
2341                  * (not such a big deal) and b) that the increment per tick has
2342                  * to fit into 24 bits.  As a result we need to use a shift of
2343                  * 19 so we can fit a value of 16 into the TIMINCA register.
2344                  */
2345                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2346                 wr32(E1000_TIMINCA,
2347                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
2348                                 (16 << IGB_82576_TSYNC_SHIFT));
2349
2350                 /* Set registers so that rollover occurs soon to test this. */
2351                 wr32(E1000_SYSTIML, 0x00000000);
2352                 wr32(E1000_SYSTIMH, 0xFF800000);
2353                 wrfl();
2354
2355                 timecounter_init(&adapter->clock,
2356                                  &adapter->cycles,
2357                                  ktime_to_ns(ktime_get_real()));
2358                 /*
2359                  * Synchronize our NIC clock against system wall clock. NIC
2360                  * time stamp reading requires ~3us per sample, each sample
2361                  * was pretty stable even under load => only require 10
2362                  * samples for each offset comparison.
2363                  */
2364                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2365                 adapter->compare.source = &adapter->clock;
2366                 adapter->compare.target = ktime_get_real;
2367                 adapter->compare.num_samples = 10;
2368                 timecompare_update(&adapter->compare, 0);
2369                 break;
2370         case e1000_82575:
2371                 /* 82575 does not support timesync */
2372         default:
2373                 break;
2374         }
2375
2376 }
2377
2378 /**
2379  * igb_sw_init - Initialize general software structures (struct igb_adapter)
2380  * @adapter: board private structure to initialize
2381  *
2382  * igb_sw_init initializes the Adapter private data structure.
2383  * Fields are initialized based on PCI device information and
2384  * OS network device settings (MTU size).
2385  **/
2386 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2387 {
2388         struct e1000_hw *hw = &adapter->hw;
2389         struct net_device *netdev = adapter->netdev;
2390         struct pci_dev *pdev = adapter->pdev;
2391
2392         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2393
2394         adapter->tx_ring_count = IGB_DEFAULT_TXD;
2395         adapter->rx_ring_count = IGB_DEFAULT_RXD;
2396         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2397         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2398
2399         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
2400         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2401
2402         spin_lock_init(&adapter->stats64_lock);
2403 #ifdef CONFIG_PCI_IOV
2404         switch (hw->mac.type) {
2405         case e1000_82576:
2406         case e1000_i350:
2407                 if (max_vfs > 7) {
2408                         dev_warn(&pdev->dev,
2409                                  "Maximum of 7 VFs per PF, using max\n");
2410                         adapter->vfs_allocated_count = 7;
2411                 } else
2412                         adapter->vfs_allocated_count = max_vfs;
2413                 break;
2414         default:
2415                 break;
2416         }
2417 #endif /* CONFIG_PCI_IOV */
2418         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2419         /* i350 cannot do RSS and SR-IOV at the same time */
2420         if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2421                 adapter->rss_queues = 1;
2422
2423         /*
2424          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2425          * then we should combine the queues into a queue pair in order to
2426          * conserve interrupts due to limited supply
2427          */
2428         if ((adapter->rss_queues > 4) ||
2429             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2430                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2431
2432         /* This call may decrease the number of queues */
2433         if (igb_init_interrupt_scheme(adapter)) {
2434                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2435                 return -ENOMEM;
2436         }
2437
2438         igb_probe_vfs(adapter);
2439
2440         /* Explicitly disable IRQ since the NIC can be in any state. */
2441         igb_irq_disable(adapter);
2442
2443         if (hw->mac.type == e1000_i350)
2444                 adapter->flags &= ~IGB_FLAG_DMAC;
2445
2446         set_bit(__IGB_DOWN, &adapter->state);
2447         return 0;
2448 }
2449
2450 /**
2451  * igb_open - Called when a network interface is made active
2452  * @netdev: network interface device structure
2453  *
2454  * Returns 0 on success, negative value on failure
2455  *
2456  * The open entry point is called when a network interface is made
2457  * active by the system (IFF_UP).  At this point all resources needed
2458  * for transmit and receive operations are allocated, the interrupt
2459  * handler is registered with the OS, the watchdog timer is started,
2460  * and the stack is notified that the interface is ready.
2461  **/
2462 static int igb_open(struct net_device *netdev)
2463 {
2464         struct igb_adapter *adapter = netdev_priv(netdev);
2465         struct e1000_hw *hw = &adapter->hw;
2466         int err;
2467         int i;
2468
2469         /* disallow open during test */
2470         if (test_bit(__IGB_TESTING, &adapter->state))
2471                 return -EBUSY;
2472
2473         netif_carrier_off(netdev);
2474
2475         /* allocate transmit descriptors */
2476         err = igb_setup_all_tx_resources(adapter);
2477         if (err)
2478                 goto err_setup_tx;
2479
2480         /* allocate receive descriptors */
2481         err = igb_setup_all_rx_resources(adapter);
2482         if (err)
2483                 goto err_setup_rx;
2484
2485         igb_power_up_link(adapter);
2486
2487         /* before we allocate an interrupt, we must be ready to handle it.
2488          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2489          * as soon as we call pci_request_irq, so we have to setup our
2490          * clean_rx handler before we do so.  */
2491         igb_configure(adapter);
2492
2493         err = igb_request_irq(adapter);
2494         if (err)
2495                 goto err_req_irq;
2496
2497         /* From here on the code is the same as igb_up() */
2498         clear_bit(__IGB_DOWN, &adapter->state);
2499
2500         for (i = 0; i < adapter->num_q_vectors; i++) {
2501                 struct igb_q_vector *q_vector = adapter->q_vector[i];
2502                 napi_enable(&q_vector->napi);
2503         }
2504
2505         /* Clear any pending interrupts. */
2506         rd32(E1000_ICR);
2507
2508         igb_irq_enable(adapter);
2509
2510         /* notify VFs that reset has been completed */
2511         if (adapter->vfs_allocated_count) {
2512                 u32 reg_data = rd32(E1000_CTRL_EXT);
2513                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2514                 wr32(E1000_CTRL_EXT, reg_data);
2515         }
2516
2517         netif_tx_start_all_queues(netdev);
2518
2519         /* start the watchdog. */
2520         hw->mac.get_link_status = 1;
2521         schedule_work(&adapter->watchdog_task);
2522
2523         return 0;
2524
2525 err_req_irq:
2526         igb_release_hw_control(adapter);
2527         igb_power_down_link(adapter);
2528         igb_free_all_rx_resources(adapter);
2529 err_setup_rx:
2530         igb_free_all_tx_resources(adapter);
2531 err_setup_tx:
2532         igb_reset(adapter);
2533
2534         return err;
2535 }
2536
2537 /**
2538  * igb_close - Disables a network interface
2539  * @netdev: network interface device structure
2540  *
2541  * Returns 0, this is not allowed to fail
2542  *
2543  * The close entry point is called when an interface is de-activated
2544  * by the OS.  The hardware is still under the driver's control, but
2545  * needs to be disabled.  A global MAC reset is issued to stop the
2546  * hardware, and all transmit and receive resources are freed.
2547  **/
2548 static int igb_close(struct net_device *netdev)
2549 {
2550         struct igb_adapter *adapter = netdev_priv(netdev);
2551
2552         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2553         igb_down(adapter);
2554
2555         igb_free_irq(adapter);
2556
2557         igb_free_all_tx_resources(adapter);
2558         igb_free_all_rx_resources(adapter);
2559
2560         return 0;
2561 }
2562
2563 /**
2564  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2565  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2566  *
2567  * Return 0 on success, negative on failure
2568  **/
2569 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2570 {
2571         struct device *dev = tx_ring->dev;
2572         int size;
2573
2574         size = sizeof(struct igb_buffer) * tx_ring->count;
2575         tx_ring->buffer_info = vzalloc(size);
2576         if (!tx_ring->buffer_info)
2577                 goto err;
2578
2579         /* round up to nearest 4K */
2580         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2581         tx_ring->size = ALIGN(tx_ring->size, 4096);
2582
2583         tx_ring->desc = dma_alloc_coherent(dev,
2584                                            tx_ring->size,
2585                                            &tx_ring->dma,
2586                                            GFP_KERNEL);
2587
2588         if (!tx_ring->desc)
2589                 goto err;
2590
2591         tx_ring->next_to_use = 0;
2592         tx_ring->next_to_clean = 0;
2593         return 0;
2594
2595 err:
2596         vfree(tx_ring->buffer_info);
2597         dev_err(dev,
2598                 "Unable to allocate memory for the transmit descriptor ring\n");
2599         return -ENOMEM;
2600 }
2601
2602 /**
2603  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2604  *                                (Descriptors) for all queues
2605  * @adapter: board private structure
2606  *
2607  * Return 0 on success, negative on failure
2608  **/
2609 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2610 {
2611         struct pci_dev *pdev = adapter->pdev;
2612         int i, err = 0;
2613
2614         for (i = 0; i < adapter->num_tx_queues; i++) {
2615                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2616                 if (err) {
2617                         dev_err(&pdev->dev,
2618                                 "Allocation for Tx Queue %u failed\n", i);
2619                         for (i--; i >= 0; i--)
2620                                 igb_free_tx_resources(adapter->tx_ring[i]);
2621                         break;
2622                 }
2623         }
2624
2625         for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2626                 int r_idx = i % adapter->num_tx_queues;
2627                 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2628         }
2629         return err;
2630 }
2631
2632 /**
2633  * igb_setup_tctl - configure the transmit control registers
2634  * @adapter: Board private structure
2635  **/
2636 void igb_setup_tctl(struct igb_adapter *adapter)
2637 {
2638         struct e1000_hw *hw = &adapter->hw;
2639         u32 tctl;
2640
2641         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2642         wr32(E1000_TXDCTL(0), 0);
2643
2644         /* Program the Transmit Control Register */
2645         tctl = rd32(E1000_TCTL);
2646         tctl &= ~E1000_TCTL_CT;
2647         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2648                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2649
2650         igb_config_collision_dist(hw);
2651
2652         /* Enable transmits */
2653         tctl |= E1000_TCTL_EN;
2654
2655         wr32(E1000_TCTL, tctl);
2656 }
2657
2658 /**
2659  * igb_configure_tx_ring - Configure transmit ring after Reset
2660  * @adapter: board private structure
2661  * @ring: tx ring to configure
2662  *
2663  * Configure a transmit ring after a reset.
2664  **/
2665 void igb_configure_tx_ring(struct igb_adapter *adapter,
2666                            struct igb_ring *ring)
2667 {
2668         struct e1000_hw *hw = &adapter->hw;
2669         u32 txdctl = 0;
2670         u64 tdba = ring->dma;
2671         int reg_idx = ring->reg_idx;
2672
2673         /* disable the queue */
2674         wr32(E1000_TXDCTL(reg_idx), 0);
2675         wrfl();
2676         mdelay(10);
2677
2678         wr32(E1000_TDLEN(reg_idx),
2679                         ring->count * sizeof(union e1000_adv_tx_desc));
2680         wr32(E1000_TDBAL(reg_idx),
2681                         tdba & 0x00000000ffffffffULL);
2682         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2683
2684         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2685         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2686         wr32(E1000_TDH(reg_idx), 0);
2687         writel(0, ring->tail);
2688
2689         txdctl |= IGB_TX_PTHRESH;
2690         txdctl |= IGB_TX_HTHRESH << 8;
2691         txdctl |= IGB_TX_WTHRESH << 16;
2692
2693         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2694         wr32(E1000_TXDCTL(reg_idx), txdctl);
2695 }
2696
2697 /**
2698  * igb_configure_tx - Configure transmit Unit after Reset
2699  * @adapter: board private structure
2700  *
2701  * Configure the Tx unit of the MAC after a reset.
2702  **/
2703 static void igb_configure_tx(struct igb_adapter *adapter)
2704 {
2705         int i;
2706
2707         for (i = 0; i < adapter->num_tx_queues; i++)
2708                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2709 }
2710
2711 /**
2712  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2713  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2714  *
2715  * Returns 0 on success, negative on failure
2716  **/
2717 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2718 {
2719         struct device *dev = rx_ring->dev;
2720         int size, desc_len;
2721
2722         size = sizeof(struct igb_buffer) * rx_ring->count;
2723         rx_ring->buffer_info = vzalloc(size);
2724         if (!rx_ring->buffer_info)
2725                 goto err;
2726
2727         desc_len = sizeof(union e1000_adv_rx_desc);
2728
2729         /* Round up to nearest 4K */
2730         rx_ring->size = rx_ring->count * desc_len;
2731         rx_ring->size = ALIGN(rx_ring->size, 4096);
2732
2733         rx_ring->desc = dma_alloc_coherent(dev,
2734                                            rx_ring->size,
2735                                            &rx_ring->dma,
2736                                            GFP_KERNEL);
2737
2738         if (!rx_ring->desc)
2739                 goto err;
2740
2741         rx_ring->next_to_clean = 0;
2742         rx_ring->next_to_use = 0;
2743
2744         return 0;
2745
2746 err:
2747         vfree(rx_ring->buffer_info);
2748         rx_ring->buffer_info = NULL;
2749         dev_err(dev, "Unable to allocate memory for the receive descriptor"
2750                 " ring\n");
2751         return -ENOMEM;
2752 }
2753
2754 /**
2755  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2756  *                                (Descriptors) for all queues
2757  * @adapter: board private structure
2758  *
2759  * Return 0 on success, negative on failure
2760  **/
2761 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2762 {
2763         struct pci_dev *pdev = adapter->pdev;
2764         int i, err = 0;
2765
2766         for (i = 0; i < adapter->num_rx_queues; i++) {
2767                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2768                 if (err) {
2769                         dev_err(&pdev->dev,
2770                                 "Allocation for Rx Queue %u failed\n", i);
2771                         for (i--; i >= 0; i--)
2772                                 igb_free_rx_resources(adapter->rx_ring[i]);
2773                         break;
2774                 }
2775         }
2776
2777         return err;
2778 }
2779
2780 /**
2781  * igb_setup_mrqc - configure the multiple receive queue control registers
2782  * @adapter: Board private structure
2783  **/
2784 static void igb_setup_mrqc(struct igb_adapter *adapter)
2785 {
2786         struct e1000_hw *hw = &adapter->hw;
2787         u32 mrqc, rxcsum;
2788         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2789         union e1000_reta {
2790                 u32 dword;
2791                 u8  bytes[4];
2792         } reta;
2793         static const u8 rsshash[40] = {
2794                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2795                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2796                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2797                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2798
2799         /* Fill out hash function seeds */
2800         for (j = 0; j < 10; j++) {
2801                 u32 rsskey = rsshash[(j * 4)];
2802                 rsskey |= rsshash[(j * 4) + 1] << 8;
2803                 rsskey |= rsshash[(j * 4) + 2] << 16;
2804                 rsskey |= rsshash[(j * 4) + 3] << 24;
2805                 array_wr32(E1000_RSSRK(0), j, rsskey);
2806         }
2807
2808         num_rx_queues = adapter->rss_queues;
2809
2810         if (adapter->vfs_allocated_count) {
2811                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2812                 switch (hw->mac.type) {
2813                 case e1000_i350:
2814                 case e1000_82580:
2815                         num_rx_queues = 1;
2816                         shift = 0;
2817                         break;
2818                 case e1000_82576:
2819                         shift = 3;
2820                         num_rx_queues = 2;
2821                         break;
2822                 case e1000_82575:
2823                         shift = 2;
2824                         shift2 = 6;
2825                 default:
2826                         break;
2827                 }
2828         } else {
2829                 if (hw->mac.type == e1000_82575)
2830                         shift = 6;
2831         }
2832
2833         for (j = 0; j < (32 * 4); j++) {
2834                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2835                 if (shift2)
2836                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2837                 if ((j & 3) == 3)
2838                         wr32(E1000_RETA(j >> 2), reta.dword);
2839         }
2840
2841         /*
2842          * Disable raw packet checksumming so that RSS hash is placed in
2843          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2844          * offloads as they are enabled by default
2845          */
2846         rxcsum = rd32(E1000_RXCSUM);
2847         rxcsum |= E1000_RXCSUM_PCSD;
2848
2849         if (adapter->hw.mac.type >= e1000_82576)
2850                 /* Enable Receive Checksum Offload for SCTP */
2851                 rxcsum |= E1000_RXCSUM_CRCOFL;
2852
2853         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2854         wr32(E1000_RXCSUM, rxcsum);
2855
2856         /* If VMDq is enabled then we set the appropriate mode for that, else
2857          * we default to RSS so that an RSS hash is calculated per packet even
2858          * if we are only using one queue */
2859         if (adapter->vfs_allocated_count) {
2860                 if (hw->mac.type > e1000_82575) {
2861                         /* Set the default pool for the PF's first queue */
2862                         u32 vtctl = rd32(E1000_VT_CTL);
2863                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2864                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2865                         vtctl |= adapter->vfs_allocated_count <<
2866                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2867                         wr32(E1000_VT_CTL, vtctl);
2868                 }
2869                 if (adapter->rss_queues > 1)
2870                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2871                 else
2872                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2873         } else {
2874                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2875         }
2876         igb_vmm_control(adapter);
2877
2878         /*
2879          * Generate RSS hash based on TCP port numbers and/or
2880          * IPv4/v6 src and dst addresses since UDP cannot be
2881          * hashed reliably due to IP fragmentation
2882          */
2883         mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2884                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2885                 E1000_MRQC_RSS_FIELD_IPV6 |
2886                 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2887                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2888
2889         wr32(E1000_MRQC, mrqc);
2890 }
2891
2892 /**
2893  * igb_setup_rctl - configure the receive control registers
2894  * @adapter: Board private structure
2895  **/
2896 void igb_setup_rctl(struct igb_adapter *adapter)
2897 {
2898         struct e1000_hw *hw = &adapter->hw;
2899         u32 rctl;
2900
2901         rctl = rd32(E1000_RCTL);
2902
2903         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2904         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2905
2906         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2907                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2908
2909         /*
2910          * enable stripping of CRC. It's unlikely this will break BMC
2911          * redirection as it did with e1000. Newer features require
2912          * that the HW strips the CRC.
2913          */
2914         rctl |= E1000_RCTL_SECRC;
2915
2916         /* disable store bad packets and clear size bits. */
2917         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2918
2919         /* enable LPE to prevent packets larger than max_frame_size */
2920         rctl |= E1000_RCTL_LPE;
2921
2922         /* disable queue 0 to prevent tail write w/o re-config */
2923         wr32(E1000_RXDCTL(0), 0);
2924
2925         /* Attention!!!  For SR-IOV PF driver operations you must enable
2926          * queue drop for all VF and PF queues to prevent head of line blocking
2927          * if an un-trusted VF does not provide descriptors to hardware.
2928          */
2929         if (adapter->vfs_allocated_count) {
2930                 /* set all queue drop enable bits */
2931                 wr32(E1000_QDE, ALL_QUEUES);
2932         }
2933
2934         wr32(E1000_RCTL, rctl);
2935 }
2936
2937 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2938                                    int vfn)
2939 {
2940         struct e1000_hw *hw = &adapter->hw;
2941         u32 vmolr;
2942
2943         /* if it isn't the PF check to see if VFs are enabled and
2944          * increase the size to support vlan tags */
2945         if (vfn < adapter->vfs_allocated_count &&
2946             adapter->vf_data[vfn].vlans_enabled)
2947                 size += VLAN_TAG_SIZE;
2948
2949         vmolr = rd32(E1000_VMOLR(vfn));
2950         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2951         vmolr |= size | E1000_VMOLR_LPE;
2952         wr32(E1000_VMOLR(vfn), vmolr);
2953
2954         return 0;
2955 }
2956
2957 /**
2958  * igb_rlpml_set - set maximum receive packet size
2959  * @adapter: board private structure
2960  *
2961  * Configure maximum receivable packet size.
2962  **/
2963 static void igb_rlpml_set(struct igb_adapter *adapter)
2964 {
2965         u32 max_frame_size;
2966         struct e1000_hw *hw = &adapter->hw;
2967         u16 pf_id = adapter->vfs_allocated_count;
2968
2969         max_frame_size = adapter->max_frame_size + VLAN_TAG_SIZE;
2970
2971         /* if vfs are enabled we set RLPML to the largest possible request
2972          * size and set the VMOLR RLPML to the size we need */
2973         if (pf_id) {
2974                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2975                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2976         }
2977
2978         wr32(E1000_RLPML, max_frame_size);
2979 }
2980
2981 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2982                                  int vfn, bool aupe)
2983 {
2984         struct e1000_hw *hw = &adapter->hw;
2985         u32 vmolr;
2986
2987         /*
2988          * This register exists only on 82576 and newer so if we are older then
2989          * we should exit and do nothing
2990          */
2991         if (hw->mac.type < e1000_82576)
2992                 return;
2993
2994         vmolr = rd32(E1000_VMOLR(vfn));
2995         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2996         if (aupe)
2997                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
2998         else
2999                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3000
3001         /* clear all bits that might not be set */
3002         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3003
3004         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3005                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3006         /*
3007          * for VMDq only allow the VFs and pool 0 to accept broadcast and
3008          * multicast packets
3009          */
3010         if (vfn <= adapter->vfs_allocated_count)
3011                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
3012
3013         wr32(E1000_VMOLR(vfn), vmolr);
3014 }
3015
3016 /**
3017  * igb_configure_rx_ring - Configure a receive ring after Reset
3018  * @adapter: board private structure
3019  * @ring: receive ring to be configured
3020  *
3021  * Configure the Rx unit of the MAC after a reset.
3022  **/
3023 void igb_configure_rx_ring(struct igb_adapter *adapter,
3024                            struct igb_ring *ring)
3025 {
3026         struct e1000_hw *hw = &adapter->hw;
3027         u64 rdba = ring->dma;
3028         int reg_idx = ring->reg_idx;
3029         u32 srrctl = 0, rxdctl = 0;
3030
3031         /* disable the queue */
3032         wr32(E1000_RXDCTL(reg_idx), 0);
3033
3034         /* Set DMA base address registers */
3035         wr32(E1000_RDBAL(reg_idx),
3036              rdba & 0x00000000ffffffffULL);
3037         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3038         wr32(E1000_RDLEN(reg_idx),
3039                        ring->count * sizeof(union e1000_adv_rx_desc));
3040
3041         /* initialize head and tail */
3042         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
3043         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3044         wr32(E1000_RDH(reg_idx), 0);
3045         writel(0, ring->tail);
3046
3047         /* set descriptor configuration */
3048         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
3049                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
3050                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3051 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3052                 srrctl |= IGB_RXBUFFER_16384 >>
3053                           E1000_SRRCTL_BSIZEPKT_SHIFT;
3054 #else
3055                 srrctl |= (PAGE_SIZE / 2) >>
3056                           E1000_SRRCTL_BSIZEPKT_SHIFT;
3057 #endif
3058                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3059         } else {
3060                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
3061                          E1000_SRRCTL_BSIZEPKT_SHIFT;
3062                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3063         }
3064         if (hw->mac.type == e1000_82580)
3065                 srrctl |= E1000_SRRCTL_TIMESTAMP;
3066         /* Only set Drop Enable if we are supporting multiple queues */
3067         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3068                 srrctl |= E1000_SRRCTL_DROP_EN;
3069
3070         wr32(E1000_SRRCTL(reg_idx), srrctl);
3071
3072         /* set filtering for VMDQ pools */
3073         igb_set_vmolr(adapter, reg_idx & 0x7, true);
3074
3075         rxdctl |= IGB_RX_PTHRESH;
3076         rxdctl |= IGB_RX_HTHRESH << 8;
3077         rxdctl |= IGB_RX_WTHRESH << 16;
3078
3079         /* enable receive descriptor fetching */
3080         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3081         wr32(E1000_RXDCTL(reg_idx), rxdctl);
3082 }
3083
3084 /**
3085  * igb_configure_rx - Configure receive Unit after Reset
3086  * @adapter: board private structure
3087  *
3088  * Configure the Rx unit of the MAC after a reset.
3089  **/
3090 static void igb_configure_rx(struct igb_adapter *adapter)
3091 {
3092         int i;
3093
3094         /* set UTA to appropriate mode */
3095         igb_set_uta(adapter);
3096
3097         /* set the correct pool for the PF default MAC address in entry 0 */
3098         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3099                          adapter->vfs_allocated_count);
3100
3101         /* Setup the HW Rx Head and Tail Descriptor Pointers and
3102          * the Base and Length of the Rx Descriptor Ring */
3103         for (i = 0; i < adapter->num_rx_queues; i++)
3104                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3105 }
3106
3107 /**
3108  * igb_free_tx_resources - Free Tx Resources per Queue
3109  * @tx_ring: Tx descriptor ring for a specific queue
3110  *
3111  * Free all transmit software resources
3112  **/
3113 void igb_free_tx_resources(struct igb_ring *tx_ring)
3114 {
3115         igb_clean_tx_ring(tx_ring);
3116
3117         vfree(tx_ring->buffer_info);
3118         tx_ring->buffer_info = NULL;
3119
3120         /* if not set, then don't free */
3121         if (!tx_ring->desc)
3122                 return;
3123
3124         dma_free_coherent(tx_ring->dev, tx_ring->size,
3125                           tx_ring->desc, tx_ring->dma);
3126
3127         tx_ring->desc = NULL;
3128 }
3129
3130 /**
3131  * igb_free_all_tx_resources - Free Tx Resources for All Queues
3132  * @adapter: board private structure
3133  *
3134  * Free all transmit software resources
3135  **/
3136 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3137 {
3138         int i;
3139
3140         for (i = 0; i < adapter->num_tx_queues; i++)
3141                 igb_free_tx_resources(adapter->tx_ring[i]);
3142 }
3143
3144 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
3145                                     struct igb_buffer *buffer_info)
3146 {
3147         if (buffer_info->dma) {
3148                 if (buffer_info->mapped_as_page)
3149                         dma_unmap_page(tx_ring->dev,
3150                                         buffer_info->dma,
3151                                         buffer_info->length,
3152                                         DMA_TO_DEVICE);
3153                 else
3154                         dma_unmap_single(tx_ring->dev,
3155                                         buffer_info->dma,
3156                                         buffer_info->length,
3157                                         DMA_TO_DEVICE);
3158                 buffer_info->dma = 0;
3159         }
3160         if (buffer_info->skb) {
3161                 dev_kfree_skb_any(buffer_info->skb);
3162                 buffer_info->skb = NULL;
3163         }
3164         buffer_info->time_stamp = 0;
3165         buffer_info->length = 0;
3166         buffer_info->next_to_watch = 0;
3167         buffer_info->mapped_as_page = false;
3168 }
3169
3170 /**
3171  * igb_clean_tx_ring - Free Tx Buffers
3172  * @tx_ring: ring to be cleaned
3173  **/
3174 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3175 {
3176         struct igb_buffer *buffer_info;
3177         unsigned long size;
3178         unsigned int i;
3179
3180         if (!tx_ring->buffer_info)
3181                 return;
3182         /* Free all the Tx ring sk_buffs */
3183
3184         for (i = 0; i < tx_ring->count; i++) {
3185                 buffer_info = &tx_ring->buffer_info[i];
3186                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3187         }
3188
3189         size = sizeof(struct igb_buffer) * tx_ring->count;
3190         memset(tx_ring->buffer_info, 0, size);
3191
3192         /* Zero out the descriptor ring */
3193         memset(tx_ring->desc, 0, tx_ring->size);
3194
3195         tx_ring->next_to_use = 0;
3196         tx_ring->next_to_clean = 0;
3197 }
3198
3199 /**
3200  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3201  * @adapter: board private structure
3202  **/
3203 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3204 {
3205         int i;
3206
3207         for (i = 0; i < adapter->num_tx_queues; i++)
3208                 igb_clean_tx_ring(adapter->tx_ring[i]);
3209 }
3210
3211 /**
3212  * igb_free_rx_resources - Free Rx Resources
3213  * @rx_ring: ring to clean the resources from
3214  *
3215  * Free all receive software resources
3216  **/
3217 void igb_free_rx_resources(struct igb_ring *rx_ring)
3218 {
3219         igb_clean_rx_ring(rx_ring);
3220
3221         vfree(rx_ring->buffer_info);
3222         rx_ring->buffer_info = NULL;
3223
3224         /* if not set, then don't free */
3225         if (!rx_ring->desc)
3226                 return;
3227
3228         dma_free_coherent(rx_ring->dev, rx_ring->size,
3229                           rx_ring->desc, rx_ring->dma);
3230
3231         rx_ring->desc = NULL;
3232 }
3233
3234 /**
3235  * igb_free_all_rx_resources - Free Rx Resources for All Queues
3236  * @adapter: board private structure
3237  *
3238  * Free all receive software resources
3239  **/
3240 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3241 {
3242         int i;
3243
3244         for (i = 0; i < adapter->num_rx_queues; i++)
3245                 igb_free_rx_resources(adapter->rx_ring[i]);
3246 }
3247
3248 /**
3249  * igb_clean_rx_ring - Free Rx Buffers per Queue
3250  * @rx_ring: ring to free buffers from
3251  **/
3252 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3253 {
3254         struct igb_buffer *buffer_info;
3255         unsigned long size;
3256         unsigned int i;
3257
3258         if (!rx_ring->buffer_info)
3259                 return;
3260
3261         /* Free all the Rx ring sk_buffs */
3262         for (i = 0; i < rx_ring->count; i++) {
3263                 buffer_info = &rx_ring->buffer_info[i];
3264                 if (buffer_info->dma) {
3265                         dma_unmap_single(rx_ring->dev,
3266                                          buffer_info->dma,
3267                                          rx_ring->rx_buffer_len,
3268                                          DMA_FROM_DEVICE);
3269                         buffer_info->dma = 0;
3270                 }
3271
3272                 if (buffer_info->skb) {
3273                         dev_kfree_skb(buffer_info->skb);
3274                         buffer_info->skb = NULL;
3275                 }
3276                 if (buffer_info->page_dma) {
3277                         dma_unmap_page(rx_ring->dev,
3278                                        buffer_info->page_dma,
3279                                        PAGE_SIZE / 2,
3280                                        DMA_FROM_DEVICE);
3281                         buffer_info->page_dma = 0;
3282                 }
3283                 if (buffer_info->page) {
3284                         put_page(buffer_info->page);
3285                         buffer_info->page = NULL;
3286                         buffer_info->page_offset = 0;
3287                 }
3288         }
3289
3290         size = sizeof(struct igb_buffer) * rx_ring->count;
3291         memset(rx_ring->buffer_info, 0, size);
3292
3293         /* Zero out the descriptor ring */
3294         memset(rx_ring->desc, 0, rx_ring->size);
3295
3296         rx_ring->next_to_clean = 0;
3297         rx_ring->next_to_use = 0;
3298 }
3299
3300 /**
3301  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3302  * @adapter: board private structure
3303  **/
3304 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3305 {
3306         int i;
3307
3308         for (i = 0; i < adapter->num_rx_queues; i++)
3309                 igb_clean_rx_ring(adapter->rx_ring[i]);
3310 }
3311
3312 /**
3313  * igb_set_mac - Change the Ethernet Address of the NIC
3314  * @netdev: network interface device structure
3315  * @p: pointer to an address structure
3316  *
3317  * Returns 0 on success, negative on failure
3318  **/
3319 static int igb_set_mac(struct net_device *netdev, void *p)
3320 {
3321         struct igb_adapter *adapter = netdev_priv(netdev);
3322         struct e1000_hw *hw = &adapter->hw;
3323         struct sockaddr *addr = p;
3324
3325         if (!is_valid_ether_addr(addr->sa_data))
3326                 return -EADDRNOTAVAIL;
3327
3328         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3329         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3330
3331         /* set the correct pool for the new PF MAC address in entry 0 */
3332         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3333                          adapter->vfs_allocated_count);
3334
3335         return 0;
3336 }
3337
3338 /**
3339  * igb_write_mc_addr_list - write multicast addresses to MTA
3340  * @netdev: network interface device structure
3341  *
3342  * Writes multicast address list to the MTA hash table.
3343  * Returns: -ENOMEM on failure
3344  *                0 on no addresses written
3345  *                X on writing X addresses to MTA
3346  **/
3347 static int igb_write_mc_addr_list(struct net_device *netdev)
3348 {
3349         struct igb_adapter *adapter = netdev_priv(netdev);
3350         struct e1000_hw *hw = &adapter->hw;
3351         struct netdev_hw_addr *ha;
3352         u8  *mta_list;
3353         int i;
3354
3355         if (netdev_mc_empty(netdev)) {
3356                 /* nothing to program, so clear mc list */
3357                 igb_update_mc_addr_list(hw, NULL, 0);
3358                 igb_restore_vf_multicasts(adapter);
3359                 return 0;
3360         }
3361
3362         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3363         if (!mta_list)
3364                 return -ENOMEM;
3365
3366         /* The shared function expects a packed array of only addresses. */
3367         i = 0;
3368         netdev_for_each_mc_addr(ha, netdev)
3369                 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3370
3371         igb_update_mc_addr_list(hw, mta_list, i);
3372         kfree(mta_list);
3373
3374         return netdev_mc_count(netdev);
3375 }
3376
3377 /**
3378  * igb_write_uc_addr_list - write unicast addresses to RAR table
3379  * @netdev: network interface device structure
3380  *
3381  * Writes unicast address list to the RAR table.
3382  * Returns: -ENOMEM on failure/insufficient address space
3383  *                0 on no addresses written
3384  *                X on writing X addresses to the RAR table
3385  **/
3386 static int igb_write_uc_addr_list(struct net_device *netdev)
3387 {
3388         struct igb_adapter *adapter = netdev_priv(netdev);
3389         struct e1000_hw *hw = &adapter->hw;
3390         unsigned int vfn = adapter->vfs_allocated_count;
3391         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3392         int count = 0;
3393
3394         /* return ENOMEM indicating insufficient memory for addresses */
3395         if (netdev_uc_count(netdev) > rar_entries)
3396                 return -ENOMEM;
3397
3398         if (!netdev_uc_empty(netdev) && rar_entries) {
3399                 struct netdev_hw_addr *ha;
3400
3401                 netdev_for_each_uc_addr(ha, netdev) {
3402                         if (!rar_entries)
3403                                 break;
3404                         igb_rar_set_qsel(adapter, ha->addr,
3405                                          rar_entries--,
3406                                          vfn);
3407                         count++;
3408                 }
3409         }
3410         /* write the addresses in reverse order to avoid write combining */
3411         for (; rar_entries > 0 ; rar_entries--) {
3412                 wr32(E1000_RAH(rar_entries), 0);
3413                 wr32(E1000_RAL(rar_entries), 0);
3414         }
3415         wrfl();
3416
3417         return count;
3418 }
3419
3420 /**
3421  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3422  * @netdev: network interface device structure
3423  *
3424  * The set_rx_mode entry point is called whenever the unicast or multicast
3425  * address lists or the network interface flags are updated.  This routine is
3426  * responsible for configuring the hardware for proper unicast, multicast,
3427  * promiscuous mode, and all-multi behavior.
3428  **/
3429 static void igb_set_rx_mode(struct net_device *netdev)
3430 {
3431         struct igb_adapter *adapter = netdev_priv(netdev);
3432         struct e1000_hw *hw = &adapter->hw;
3433         unsigned int vfn = adapter->vfs_allocated_count;
3434         u32 rctl, vmolr = 0;
3435         int count;
3436
3437         /* Check for Promiscuous and All Multicast modes */
3438         rctl = rd32(E1000_RCTL);
3439
3440         /* clear the effected bits */
3441         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3442
3443         if (netdev->flags & IFF_PROMISC) {
3444                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3445                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3446         } else {
3447                 if (netdev->flags & IFF_ALLMULTI) {
3448                         rctl |= E1000_RCTL_MPE;
3449                         vmolr |= E1000_VMOLR_MPME;
3450                 } else {
3451                         /*
3452                          * Write addresses to the MTA, if the attempt fails
3453                          * then we should just turn on promiscuous mode so
3454                          * that we can at least receive multicast traffic
3455                          */
3456                         count = igb_write_mc_addr_list(netdev);
3457                         if (count < 0) {
3458                                 rctl |= E1000_RCTL_MPE;
3459                                 vmolr |= E1000_VMOLR_MPME;
3460                         } else if (count) {
3461                                 vmolr |= E1000_VMOLR_ROMPE;
3462                         }
3463                 }
3464                 /*
3465                  * Write addresses to available RAR registers, if there is not
3466                  * sufficient space to store all the addresses then enable
3467                  * unicast promiscuous mode
3468                  */
3469                 count = igb_write_uc_addr_list(netdev);
3470                 if (count < 0) {
3471                         rctl |= E1000_RCTL_UPE;
3472                         vmolr |= E1000_VMOLR_ROPE;
3473                 }
3474                 rctl |= E1000_RCTL_VFE;
3475         }
3476         wr32(E1000_RCTL, rctl);
3477
3478         /*
3479          * In order to support SR-IOV and eventually VMDq it is necessary to set
3480          * the VMOLR to enable the appropriate modes.  Without this workaround
3481          * we will have issues with VLAN tag stripping not being done for frames
3482          * that are only arriving because we are the default pool
3483          */
3484         if (hw->mac.type < e1000_82576)
3485                 return;
3486
3487         vmolr |= rd32(E1000_VMOLR(vfn)) &
3488                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3489         wr32(E1000_VMOLR(vfn), vmolr);
3490         igb_restore_vf_multicasts(adapter);
3491 }
3492
3493 static void igb_check_wvbr(struct igb_adapter *adapter)
3494 {
3495         struct e1000_hw *hw = &adapter->hw;
3496         u32 wvbr = 0;
3497
3498         switch (hw->mac.type) {
3499         case e1000_82576:
3500         case e1000_i350:
3501                 if (!(wvbr = rd32(E1000_WVBR)))
3502                         return;
3503                 break;
3504         default:
3505                 break;
3506         }
3507
3508         adapter->wvbr |= wvbr;
3509 }
3510
3511 #define IGB_STAGGERED_QUEUE_OFFSET 8
3512
3513 static void igb_spoof_check(struct igb_adapter *adapter)
3514 {
3515         int j;
3516
3517         if (!adapter->wvbr)
3518                 return;
3519
3520         for(j = 0; j < adapter->vfs_allocated_count; j++) {
3521                 if (adapter->wvbr & (1 << j) ||
3522                     adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3523                         dev_warn(&adapter->pdev->dev,
3524                                 "Spoof event(s) detected on VF %d\n", j);
3525                         adapter->wvbr &=
3526                                 ~((1 << j) |
3527                                   (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3528                 }
3529         }
3530 }
3531
3532 /* Need to wait a few seconds after link up to get diagnostic information from
3533  * the phy */
3534 static void igb_update_phy_info(unsigned long data)
3535 {
3536         struct igb_adapter *adapter = (struct igb_adapter *) data;
3537         igb_get_phy_info(&adapter->hw);
3538 }
3539
3540 /**
3541  * igb_has_link - check shared code for link and determine up/down
3542  * @adapter: pointer to driver private info
3543  **/
3544 bool igb_has_link(struct igb_adapter *adapter)
3545 {
3546         struct e1000_hw *hw = &adapter->hw;
3547         bool link_active = false;
3548         s32 ret_val = 0;
3549
3550         /* get_link_status is set on LSC (link status) interrupt or
3551          * rx sequence error interrupt.  get_link_status will stay
3552          * false until the e1000_check_for_link establishes link
3553          * for copper adapters ONLY
3554          */
3555         switch (hw->phy.media_type) {
3556         case e1000_media_type_copper:
3557                 if (hw->mac.get_link_status) {
3558                         ret_val = hw->mac.ops.check_for_link(hw);
3559                         link_active = !hw->mac.get_link_status;
3560                 } else {
3561                         link_active = true;
3562                 }
3563                 break;
3564         case e1000_media_type_internal_serdes:
3565                 ret_val = hw->mac.ops.check_for_link(hw);
3566                 link_active = hw->mac.serdes_has_link;
3567                 break;
3568         default:
3569         case e1000_media_type_unknown:
3570                 break;
3571         }
3572
3573         return link_active;
3574 }
3575
3576 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3577 {
3578         bool ret = false;
3579         u32 ctrl_ext, thstat;
3580
3581         /* check for thermal sensor event on i350, copper only */
3582         if (hw->mac.type == e1000_i350) {
3583                 thstat = rd32(E1000_THSTAT);
3584                 ctrl_ext = rd32(E1000_CTRL_EXT);
3585
3586                 if ((hw->phy.media_type == e1000_media_type_copper) &&
3587                     !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3588                         ret = !!(thstat & event);
3589                 }
3590         }
3591
3592         return ret;
3593 }
3594
3595 /**
3596  * igb_watchdog - Timer Call-back
3597  * @data: pointer to adapter cast into an unsigned long
3598  **/
3599 static void igb_watchdog(unsigned long data)
3600 {
3601         struct igb_adapter *adapter = (struct igb_adapter *)data;
3602         /* Do the rest outside of interrupt context */
3603         schedule_work(&adapter->watchdog_task);
3604 }
3605
3606 static void igb_watchdog_task(struct work_struct *work)
3607 {
3608         struct igb_adapter *adapter = container_of(work,
3609                                                    struct igb_adapter,
3610                                                    watchdog_task);
3611         struct e1000_hw *hw = &adapter->hw;
3612         struct net_device *netdev = adapter->netdev;
3613         u32 link;
3614         int i;
3615
3616         link = igb_has_link(adapter);
3617         if (link) {
3618                 if (!netif_carrier_ok(netdev)) {
3619                         u32 ctrl;
3620                         hw->mac.ops.get_speed_and_duplex(hw,
3621                                                          &adapter->link_speed,
3622                                                          &adapter->link_duplex);
3623
3624                         ctrl = rd32(E1000_CTRL);
3625                         /* Links status message must follow this format */
3626                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3627                                  "Flow Control: %s\n",
3628                                netdev->name,
3629                                adapter->link_speed,
3630                                adapter->link_duplex == FULL_DUPLEX ?
3631                                  "Full Duplex" : "Half Duplex",
3632                                ((ctrl & E1000_CTRL_TFCE) &&
3633                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3634                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3635                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3636
3637                         /* check for thermal sensor event */
3638                         if (igb_thermal_sensor_event(hw, E1000_THSTAT_LINK_THROTTLE)) {
3639                                 printk(KERN_INFO "igb: %s The network adapter "
3640                                                  "link speed was downshifted "
3641                                                  "because it overheated.\n",
3642                                                  netdev->name);
3643                         }
3644
3645                         /* adjust timeout factor according to speed/duplex */
3646                         adapter->tx_timeout_factor = 1;
3647                         switch (adapter->link_speed) {
3648                         case SPEED_10:
3649                                 adapter->tx_timeout_factor = 14;
3650                                 break;
3651                         case SPEED_100:
3652                                 /* maybe add some timeout factor ? */
3653                                 break;
3654                         }
3655
3656                         netif_carrier_on(netdev);
3657
3658                         igb_ping_all_vfs(adapter);
3659                         igb_check_vf_rate_limit(adapter);
3660
3661                         /* link state has changed, schedule phy info update */
3662                         if (!test_bit(__IGB_DOWN, &adapter->state))
3663                                 mod_timer(&adapter->phy_info_timer,
3664                                           round_jiffies(jiffies + 2 * HZ));
3665                 }
3666         } else {
3667                 if (netif_carrier_ok(netdev)) {
3668                         adapter->link_speed = 0;
3669                         adapter->link_duplex = 0;
3670
3671                         /* check for thermal sensor event */
3672                         if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) {
3673                                 printk(KERN_ERR "igb: %s The network adapter "
3674                                                 "was stopped because it "
3675                                                 "overheated.\n",
3676                                                 netdev->name);
3677                         }
3678
3679                         /* Links status message must follow this format */
3680                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3681                                netdev->name);
3682                         netif_carrier_off(netdev);
3683
3684                         igb_ping_all_vfs(adapter);
3685
3686                         /* link state has changed, schedule phy info update */
3687                         if (!test_bit(__IGB_DOWN, &adapter->state))
3688                                 mod_timer(&adapter->phy_info_timer,
3689                                           round_jiffies(jiffies + 2 * HZ));
3690                 }
3691         }
3692
3693         spin_lock(&adapter->stats64_lock);
3694         igb_update_stats(adapter, &adapter->stats64);
3695         spin_unlock(&adapter->stats64_lock);
3696
3697         for (i = 0; i < adapter->num_tx_queues; i++) {
3698                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3699                 if (!netif_carrier_ok(netdev)) {
3700                         /* We've lost link, so the controller stops DMA,
3701                          * but we've got queued Tx work that's never going
3702                          * to get done, so reset controller to flush Tx.
3703                          * (Do the reset outside of interrupt context). */
3704                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3705                                 adapter->tx_timeout_count++;
3706                                 schedule_work(&adapter->reset_task);
3707                                 /* return immediately since reset is imminent */
3708                                 return;
3709                         }
3710                 }
3711
3712                 /* Force detection of hung controller every watchdog period */
3713                 tx_ring->detect_tx_hung = true;
3714         }
3715
3716         /* Cause software interrupt to ensure rx ring is cleaned */
3717         if (adapter->msix_entries) {
3718                 u32 eics = 0;
3719                 for (i = 0; i < adapter->num_q_vectors; i++) {
3720                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3721                         eics |= q_vector->eims_value;
3722                 }
3723                 wr32(E1000_EICS, eics);
3724         } else {
3725                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3726         }
3727
3728         igb_spoof_check(adapter);
3729
3730         /* Reset the timer */
3731         if (!test_bit(__IGB_DOWN, &adapter->state))
3732                 mod_timer(&adapter->watchdog_timer,
3733                           round_jiffies(jiffies + 2 * HZ));
3734 }
3735
3736 enum latency_range {
3737         lowest_latency = 0,
3738         low_latency = 1,
3739         bulk_latency = 2,
3740         latency_invalid = 255
3741 };
3742
3743 /**
3744  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3745  *
3746  *      Stores a new ITR value based on strictly on packet size.  This
3747  *      algorithm is less sophisticated than that used in igb_update_itr,
3748  *      due to the difficulty of synchronizing statistics across multiple
3749  *      receive rings.  The divisors and thresholds used by this function
3750  *      were determined based on theoretical maximum wire speed and testing
3751  *      data, in order to minimize response time while increasing bulk
3752  *      throughput.
3753  *      This functionality is controlled by the InterruptThrottleRate module
3754  *      parameter (see igb_param.c)
3755  *      NOTE:  This function is called only when operating in a multiqueue
3756  *             receive environment.
3757  * @q_vector: pointer to q_vector
3758  **/
3759 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3760 {
3761         int new_val = q_vector->itr_val;
3762         int avg_wire_size = 0;
3763         struct igb_adapter *adapter = q_vector->adapter;
3764         struct igb_ring *ring;
3765         unsigned int packets;
3766
3767         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3768          * ints/sec - ITR timer value of 120 ticks.
3769          */
3770         if (adapter->link_speed != SPEED_1000) {
3771                 new_val = 976;
3772                 goto set_itr_val;
3773         }
3774
3775         ring = q_vector->rx_ring;
3776         if (ring) {
3777                 packets = ACCESS_ONCE(ring->total_packets);
3778
3779                 if (packets)
3780                         avg_wire_size = ring->total_bytes / packets;
3781         }
3782
3783         ring = q_vector->tx_ring;
3784         if (ring) {
3785                 packets = ACCESS_ONCE(ring->total_packets);
3786
3787                 if (packets)
3788                         avg_wire_size = max_t(u32, avg_wire_size,
3789                                               ring->total_bytes / packets);
3790         }
3791
3792         /* if avg_wire_size isn't set no work was done */
3793         if (!avg_wire_size)
3794                 goto clear_counts;
3795
3796         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3797         avg_wire_size += 24;
3798
3799         /* Don't starve jumbo frames */
3800         avg_wire_size = min(avg_wire_size, 3000);
3801
3802         /* Give a little boost to mid-size frames */
3803         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3804                 new_val = avg_wire_size / 3;
3805         else
3806                 new_val = avg_wire_size / 2;
3807
3808         /* when in itr mode 3 do not exceed 20K ints/sec */
3809         if (adapter->rx_itr_setting == 3 && new_val < 196)
3810                 new_val = 196;
3811
3812 set_itr_val:
3813         if (new_val != q_vector->itr_val) {
3814                 q_vector->itr_val = new_val;
3815                 q_vector->set_itr = 1;
3816         }
3817 clear_counts:
3818         if (q_vector->rx_ring) {
3819                 q_vector->rx_ring->total_bytes = 0;
3820                 q_vector->rx_ring->total_packets = 0;
3821         }
3822         if (q_vector->tx_ring) {
3823                 q_vector->tx_ring->total_bytes = 0;
3824                 q_vector->tx_ring->total_packets = 0;
3825         }
3826 }
3827
3828 /**
3829  * igb_update_itr - update the dynamic ITR value based on statistics
3830  *      Stores a new ITR value based on packets and byte
3831  *      counts during the last interrupt.  The advantage of per interrupt
3832  *      computation is faster updates and more accurate ITR for the current
3833  *      traffic pattern.  Constants in this function were computed
3834  *      based on theoretical maximum wire speed and thresholds were set based
3835  *      on testing data as well as attempting to minimize response time
3836  *      while increasing bulk throughput.
3837  *      this functionality is controlled by the InterruptThrottleRate module
3838  *      parameter (see igb_param.c)
3839  *      NOTE:  These calculations are only valid when operating in a single-
3840  *             queue environment.
3841  * @adapter: pointer to adapter
3842  * @itr_setting: current q_vector->itr_val
3843  * @packets: the number of packets during this measurement interval
3844  * @bytes: the number of bytes during this measurement interval
3845  **/
3846 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3847                                    int packets, int bytes)
3848 {
3849         unsigned int retval = itr_setting;
3850
3851         if (packets == 0)
3852                 goto update_itr_done;
3853
3854         switch (itr_setting) {
3855         case lowest_latency:
3856                 /* handle TSO and jumbo frames */
3857                 if (bytes/packets > 8000)
3858                         retval = bulk_latency;
3859                 else if ((packets < 5) && (bytes > 512))
3860                         retval = low_latency;
3861                 break;
3862         case low_latency:  /* 50 usec aka 20000 ints/s */
3863                 if (bytes > 10000) {
3864                         /* this if handles the TSO accounting */
3865                         if (bytes/packets > 8000) {
3866                                 retval = bulk_latency;
3867                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3868                                 retval = bulk_latency;
3869                         } else if ((packets > 35)) {
3870                                 retval = lowest_latency;
3871                         }
3872                 } else if (bytes/packets > 2000) {
3873                         retval = bulk_latency;
3874                 } else if (packets <= 2 && bytes < 512) {
3875                         retval = lowest_latency;
3876                 }
3877                 break;
3878         case bulk_latency: /* 250 usec aka 4000 ints/s */
3879                 if (bytes > 25000) {
3880                         if (packets > 35)
3881                                 retval = low_latency;
3882                 } else if (bytes < 1500) {
3883                         retval = low_latency;
3884                 }
3885                 break;
3886         }
3887
3888 update_itr_done:
3889         return retval;
3890 }
3891
3892 static void igb_set_itr(struct igb_adapter *adapter)
3893 {
3894         struct igb_q_vector *q_vector = adapter->q_vector[0];
3895         u16 current_itr;
3896         u32 new_itr = q_vector->itr_val;
3897
3898         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3899         if (adapter->link_speed != SPEED_1000) {
3900                 current_itr = 0;
3901                 new_itr = 4000;
3902                 goto set_itr_now;
3903         }
3904
3905         adapter->rx_itr = igb_update_itr(adapter,
3906                                     adapter->rx_itr,
3907                                     q_vector->rx_ring->total_packets,
3908                                     q_vector->rx_ring->total_bytes);
3909
3910         adapter->tx_itr = igb_update_itr(adapter,
3911                                     adapter->tx_itr,
3912                                     q_vector->tx_ring->total_packets,
3913                                     q_vector->tx_ring->total_bytes);
3914         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3915
3916         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3917         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3918                 current_itr = low_latency;
3919
3920         switch (current_itr) {
3921         /* counts and packets in update_itr are dependent on these numbers */
3922         case lowest_latency:
3923                 new_itr = 56;  /* aka 70,000 ints/sec */
3924                 break;
3925         case low_latency:
3926                 new_itr = 196; /* aka 20,000 ints/sec */
3927                 break;
3928         case bulk_latency:
3929                 new_itr = 980; /* aka 4,000 ints/sec */
3930                 break;
3931         default:
3932                 break;
3933         }
3934
3935 set_itr_now:
3936         q_vector->rx_ring->total_bytes = 0;
3937         q_vector->rx_ring->total_packets = 0;
3938         q_vector->tx_ring->total_bytes = 0;
3939         q_vector->tx_ring->total_packets = 0;
3940
3941         if (new_itr != q_vector->itr_val) {
3942                 /* this attempts to bias the interrupt rate towards Bulk
3943                  * by adding intermediate steps when interrupt rate is
3944                  * increasing */
3945                 new_itr = new_itr > q_vector->itr_val ?
3946                              max((new_itr * q_vector->itr_val) /
3947                                  (new_itr + (q_vector->itr_val >> 2)),
3948                                  new_itr) :
3949                              new_itr;
3950                 /* Don't write the value here; it resets the adapter's
3951                  * internal timer, and causes us to delay far longer than
3952                  * we should between interrupts.  Instead, we write the ITR
3953                  * value at the beginning of the next interrupt so the timing
3954                  * ends up being correct.
3955                  */
3956                 q_vector->itr_val = new_itr;
3957                 q_vector->set_itr = 1;
3958         }
3959 }
3960
3961 #define IGB_TX_FLAGS_CSUM               0x00000001
3962 #define IGB_TX_FLAGS_VLAN               0x00000002
3963 #define IGB_TX_FLAGS_TSO                0x00000004
3964 #define IGB_TX_FLAGS_IPV4               0x00000008
3965 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3966 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3967 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3968
3969 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3970                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3971 {
3972         struct e1000_adv_tx_context_desc *context_desc;
3973         unsigned int i;
3974         int err;
3975         struct igb_buffer *buffer_info;
3976         u32 info = 0, tu_cmd = 0;
3977         u32 mss_l4len_idx;
3978         u8 l4len;
3979
3980         if (skb_header_cloned(skb)) {
3981                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3982                 if (err)
3983                         return err;
3984         }
3985
3986         l4len = tcp_hdrlen(skb);
3987         *hdr_len += l4len;
3988
3989         if (skb->protocol == htons(ETH_P_IP)) {
3990                 struct iphdr *iph = ip_hdr(skb);
3991                 iph->tot_len = 0;
3992                 iph->check = 0;
3993                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3994                                                          iph->daddr, 0,
3995                                                          IPPROTO_TCP,
3996                                                          0);
3997         } else if (skb_is_gso_v6(skb)) {
3998                 ipv6_hdr(skb)->payload_len = 0;
3999                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4000                                                        &ipv6_hdr(skb)->daddr,
4001                                                        0, IPPROTO_TCP, 0);
4002         }
4003
4004         i = tx_ring->next_to_use;
4005
4006         buffer_info = &tx_ring->buffer_info[i];
4007         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
4008         /* VLAN MACLEN IPLEN */
4009         if (tx_flags & IGB_TX_FLAGS_VLAN)
4010                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
4011         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
4012         *hdr_len += skb_network_offset(skb);
4013         info |= skb_network_header_len(skb);
4014         *hdr_len += skb_network_header_len(skb);
4015         context_desc->vlan_macip_lens = cpu_to_le32(info);
4016
4017         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4018         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
4019
4020         if (skb->protocol == htons(ETH_P_IP))
4021                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
4022         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4023
4024         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
4025
4026         /* MSS L4LEN IDX */
4027         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
4028         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
4029
4030         /* For 82575, context index must be unique per ring. */
4031         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
4032                 mss_l4len_idx |= tx_ring->reg_idx << 4;
4033
4034         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
4035         context_desc->seqnum_seed = 0;
4036
4037         buffer_info->time_stamp = jiffies;
4038         buffer_info->next_to_watch = i;
4039         buffer_info->dma = 0;
4040         i++;
4041         if (i == tx_ring->count)
4042                 i = 0;
4043
4044         tx_ring->next_to_use = i;
4045
4046         return true;
4047 }
4048
4049 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
4050                                    struct sk_buff *skb, u32 tx_flags)
4051 {
4052         struct e1000_adv_tx_context_desc *context_desc;
4053         struct device *dev = tx_ring->dev;
4054         struct igb_buffer *buffer_info;
4055         u32 info = 0, tu_cmd = 0;
4056         unsigned int i;
4057
4058         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
4059             (tx_flags & IGB_TX_FLAGS_VLAN)) {
4060                 i = tx_ring->next_to_use;
4061                 buffer_info = &tx_ring->buffer_info[i];
4062                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
4063
4064                 if (tx_flags & IGB_TX_FLAGS_VLAN)
4065                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
4066
4067                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
4068                 if (skb->ip_summed == CHECKSUM_PARTIAL)
4069                         info |= skb_network_header_len(skb);
4070
4071                 context_desc->vlan_macip_lens = cpu_to_le32(info);
4072
4073                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
4074
4075                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
4076                         __be16 protocol;
4077
4078                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
4079                                 const struct vlan_ethhdr *vhdr =
4080                                           (const struct vlan_ethhdr*)skb->data;
4081
4082                                 protocol = vhdr->h_vlan_encapsulated_proto;
4083                         } else {
4084                                 protocol = skb->protocol;
4085                         }
4086
4087                         switch (protocol) {
4088                         case cpu_to_be16(ETH_P_IP):
4089                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
4090                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
4091                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4092                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
4093                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4094                                 break;
4095                         case cpu_to_be16(ETH_P_IPV6):
4096                                 /* XXX what about other V6 headers?? */
4097                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
4098                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4099                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
4100                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4101                                 break;
4102                         default:
4103                                 if (unlikely(net_ratelimit()))
4104                                         dev_warn(dev,
4105                                             "partial checksum but proto=%x!\n",
4106                                             skb->protocol);
4107                                 break;
4108                         }
4109                 }
4110
4111                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
4112                 context_desc->seqnum_seed = 0;
4113                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
4114                         context_desc->mss_l4len_idx =
4115                                 cpu_to_le32(tx_ring->reg_idx << 4);
4116
4117                 buffer_info->time_stamp = jiffies;
4118                 buffer_info->next_to_watch = i;
4119                 buffer_info->dma = 0;
4120
4121                 i++;
4122                 if (i == tx_ring->count)
4123                         i = 0;
4124                 tx_ring->next_to_use = i;
4125
4126                 return true;
4127         }
4128         return false;
4129 }
4130
4131 #define IGB_MAX_TXD_PWR 16
4132 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
4133
4134 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
4135                                  unsigned int first)
4136 {
4137         struct igb_buffer *buffer_info;
4138         struct device *dev = tx_ring->dev;
4139         unsigned int hlen = skb_headlen(skb);
4140         unsigned int count = 0, i;
4141         unsigned int f;
4142         u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
4143
4144         i = tx_ring->next_to_use;
4145
4146         buffer_info = &tx_ring->buffer_info[i];
4147         BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD);
4148         buffer_info->length = hlen;
4149         /* set time_stamp *before* dma to help avoid a possible race */
4150         buffer_info->time_stamp = jiffies;
4151         buffer_info->next_to_watch = i;
4152         buffer_info->dma = dma_map_single(dev, skb->data, hlen,
4153                                           DMA_TO_DEVICE);
4154         if (dma_mapping_error(dev, buffer_info->dma))
4155                 goto dma_error;
4156
4157         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
4158                 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f];
4159                 unsigned int len = frag->size;
4160
4161                 count++;
4162                 i++;
4163                 if (i == tx_ring->count)
4164                         i = 0;
4165
4166                 buffer_info = &tx_ring->buffer_info[i];
4167                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
4168                 buffer_info->length = len;
4169                 buffer_info->time_stamp = jiffies;
4170                 buffer_info->next_to_watch = i;
4171                 buffer_info->mapped_as_page = true;
4172                 buffer_info->dma = skb_frag_dma_map(dev, frag, 0, len,
4173                                                 DMA_TO_DEVICE);
4174                 if (dma_mapping_error(dev, buffer_info->dma))
4175                         goto dma_error;
4176
4177         }
4178
4179         tx_ring->buffer_info[i].skb = skb;
4180         tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags;
4181         /* multiply data chunks by size of headers */
4182         tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len;
4183         tx_ring->buffer_info[i].gso_segs = gso_segs;
4184         tx_ring->buffer_info[first].next_to_watch = i;
4185
4186         return ++count;
4187
4188 dma_error:
4189         dev_err(dev, "TX DMA map failed\n");
4190
4191         /* clear timestamp and dma mappings for failed buffer_info mapping */
4192         buffer_info->dma = 0;
4193         buffer_info->time_stamp = 0;
4194         buffer_info->length = 0;
4195         buffer_info->next_to_watch = 0;
4196         buffer_info->mapped_as_page = false;
4197
4198         /* clear timestamp and dma mappings for remaining portion of packet */
4199         while (count--) {
4200                 if (i == 0)
4201                         i = tx_ring->count;
4202                 i--;
4203                 buffer_info = &tx_ring->buffer_info[i];
4204                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4205         }
4206
4207         return 0;
4208 }
4209
4210 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
4211                                     u32 tx_flags, int count, u32 paylen,
4212                                     u8 hdr_len)
4213 {
4214         union e1000_adv_tx_desc *tx_desc;
4215         struct igb_buffer *buffer_info;
4216         u32 olinfo_status = 0, cmd_type_len;
4217         unsigned int i = tx_ring->next_to_use;
4218
4219         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
4220                         E1000_ADVTXD_DCMD_DEXT);
4221
4222         if (tx_flags & IGB_TX_FLAGS_VLAN)
4223                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
4224
4225         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4226                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
4227
4228         if (tx_flags & IGB_TX_FLAGS_TSO) {
4229                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
4230
4231                 /* insert tcp checksum */
4232                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4233
4234                 /* insert ip checksum */
4235                 if (tx_flags & IGB_TX_FLAGS_IPV4)
4236                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4237
4238         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
4239                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4240         }
4241
4242         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
4243             (tx_flags & (IGB_TX_FLAGS_CSUM |
4244                          IGB_TX_FLAGS_TSO |
4245                          IGB_TX_FLAGS_VLAN)))
4246                 olinfo_status |= tx_ring->reg_idx << 4;
4247
4248         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
4249
4250         do {
4251                 buffer_info = &tx_ring->buffer_info[i];
4252                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4253                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4254                 tx_desc->read.cmd_type_len =
4255                         cpu_to_le32(cmd_type_len | buffer_info->length);
4256                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4257                 count--;
4258                 i++;
4259                 if (i == tx_ring->count)
4260                         i = 0;
4261         } while (count > 0);
4262
4263         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
4264         /* Force memory writes to complete before letting h/w
4265          * know there are new descriptors to fetch.  (Only
4266          * applicable for weak-ordered memory model archs,
4267          * such as IA-64). */
4268         wmb();
4269
4270         tx_ring->next_to_use = i;
4271         writel(i, tx_ring->tail);
4272         /* we need this if more than one processor can write to our tail
4273          * at a time, it syncronizes IO on IA64/Altix systems */
4274         mmiowb();
4275 }
4276
4277 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4278 {
4279         struct net_device *netdev = tx_ring->netdev;
4280
4281         netif_stop_subqueue(netdev, tx_ring->queue_index);
4282
4283         /* Herbert's original patch had:
4284          *  smp_mb__after_netif_stop_queue();
4285          * but since that doesn't exist yet, just open code it. */
4286         smp_mb();
4287
4288         /* We need to check again in a case another CPU has just
4289          * made room available. */
4290         if (igb_desc_unused(tx_ring) < size)
4291                 return -EBUSY;
4292
4293         /* A reprieve! */
4294         netif_wake_subqueue(netdev, tx_ring->queue_index);
4295
4296         u64_stats_update_begin(&tx_ring->tx_syncp2);
4297         tx_ring->tx_stats.restart_queue2++;
4298         u64_stats_update_end(&tx_ring->tx_syncp2);
4299
4300         return 0;
4301 }
4302
4303 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4304 {
4305         if (igb_desc_unused(tx_ring) >= size)
4306                 return 0;
4307         return __igb_maybe_stop_tx(tx_ring, size);
4308 }
4309
4310 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
4311                                     struct igb_ring *tx_ring)
4312 {
4313         int tso = 0, count;
4314         u32 tx_flags = 0;
4315         u16 first;
4316         u8 hdr_len = 0;
4317
4318         /* need: 1 descriptor per page,
4319          *       + 2 desc gap to keep tail from touching head,
4320          *       + 1 desc for skb->data,
4321          *       + 1 desc for context descriptor,
4322          * otherwise try next time */
4323         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4324                 /* this is a hard error */
4325                 return NETDEV_TX_BUSY;
4326         }
4327
4328         if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4329                 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4330                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4331         }
4332
4333         if (vlan_tx_tag_present(skb)) {
4334                 tx_flags |= IGB_TX_FLAGS_VLAN;
4335                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4336         }
4337
4338         if (skb->protocol == htons(ETH_P_IP))
4339                 tx_flags |= IGB_TX_FLAGS_IPV4;
4340
4341         first = tx_ring->next_to_use;
4342         if (skb_is_gso(skb)) {
4343                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
4344
4345                 if (tso < 0) {
4346                         dev_kfree_skb_any(skb);
4347                         return NETDEV_TX_OK;
4348                 }
4349         }
4350
4351         if (tso)
4352                 tx_flags |= IGB_TX_FLAGS_TSO;
4353         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
4354                  (skb->ip_summed == CHECKSUM_PARTIAL))
4355                 tx_flags |= IGB_TX_FLAGS_CSUM;
4356
4357         /*
4358          * count reflects descriptors mapped, if 0 or less then mapping error
4359          * has occurred and we need to rewind the descriptor queue
4360          */
4361         count = igb_tx_map_adv(tx_ring, skb, first);
4362         if (!count) {
4363                 dev_kfree_skb_any(skb);
4364                 tx_ring->buffer_info[first].time_stamp = 0;
4365                 tx_ring->next_to_use = first;
4366                 return NETDEV_TX_OK;
4367         }
4368
4369         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
4370
4371         /* Make sure there is space in the ring for the next send. */
4372         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4373
4374         return NETDEV_TX_OK;
4375 }
4376
4377 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
4378                                       struct net_device *netdev)
4379 {
4380         struct igb_adapter *adapter = netdev_priv(netdev);
4381         struct igb_ring *tx_ring;
4382         int r_idx = 0;
4383
4384         if (test_bit(__IGB_DOWN, &adapter->state)) {
4385                 dev_kfree_skb_any(skb);
4386                 return NETDEV_TX_OK;
4387         }
4388
4389         if (skb->len <= 0) {
4390                 dev_kfree_skb_any(skb);
4391                 return NETDEV_TX_OK;
4392         }
4393
4394         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
4395         tx_ring = adapter->multi_tx_table[r_idx];
4396
4397         /* This goes back to the question of how to logically map a tx queue
4398          * to a flow.  Right now, performance is impacted slightly negatively
4399          * if using multiple tx queues.  If the stack breaks away from a
4400          * single qdisc implementation, we can look at this again. */
4401         return igb_xmit_frame_ring_adv(skb, tx_ring);
4402 }
4403
4404 /**
4405  * igb_tx_timeout - Respond to a Tx Hang
4406  * @netdev: network interface device structure
4407  **/
4408 static void igb_tx_timeout(struct net_device *netdev)
4409 {
4410         struct igb_adapter *adapter = netdev_priv(netdev);
4411         struct e1000_hw *hw = &adapter->hw;
4412
4413         /* Do the reset outside of interrupt context */
4414         adapter->tx_timeout_count++;
4415
4416         if (hw->mac.type == e1000_82580)
4417                 hw->dev_spec._82575.global_device_reset = true;
4418
4419         schedule_work(&adapter->reset_task);
4420         wr32(E1000_EICS,
4421              (adapter->eims_enable_mask & ~adapter->eims_other));
4422 }
4423
4424 static void igb_reset_task(struct work_struct *work)
4425 {
4426         struct igb_adapter *adapter;
4427         adapter = container_of(work, struct igb_adapter, reset_task);
4428
4429         igb_dump(adapter);
4430         netdev_err(adapter->netdev, "Reset adapter\n");
4431         igb_reinit_locked(adapter);
4432 }
4433
4434 /**
4435  * igb_get_stats64 - Get System Network Statistics
4436  * @netdev: network interface device structure
4437  * @stats: rtnl_link_stats64 pointer
4438  *
4439  **/
4440 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4441                                                  struct rtnl_link_stats64 *stats)
4442 {
4443         struct igb_adapter *adapter = netdev_priv(netdev);
4444
4445         spin_lock(&adapter->stats64_lock);
4446         igb_update_stats(adapter, &adapter->stats64);
4447         memcpy(stats, &adapter->stats64, sizeof(*stats));
4448         spin_unlock(&adapter->stats64_lock);
4449
4450         return stats;
4451 }
4452
4453 /**
4454  * igb_change_mtu - Change the Maximum Transfer Unit
4455  * @netdev: network interface device structure
4456  * @new_mtu: new value for maximum frame size
4457  *
4458  * Returns 0 on success, negative on failure
4459  **/
4460 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4461 {
4462         struct igb_adapter *adapter = netdev_priv(netdev);
4463         struct pci_dev *pdev = adapter->pdev;
4464         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
4465         u32 rx_buffer_len, i;
4466
4467         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4468                 dev_err(&pdev->dev, "Invalid MTU setting\n");
4469                 return -EINVAL;
4470         }
4471
4472         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4473                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4474                 return -EINVAL;
4475         }
4476
4477         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4478                 msleep(1);
4479
4480         /* igb_down has a dependency on max_frame_size */
4481         adapter->max_frame_size = max_frame;
4482
4483         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
4484          * means we reserve 2 more, this pushes us to allocate from the next
4485          * larger slab size.
4486          * i.e. RXBUFFER_2048 --> size-4096 slab
4487          */
4488
4489         if (adapter->hw.mac.type == e1000_82580)
4490                 max_frame += IGB_TS_HDR_LEN;
4491
4492         if (max_frame <= IGB_RXBUFFER_1024)
4493                 rx_buffer_len = IGB_RXBUFFER_1024;
4494         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
4495                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
4496         else
4497                 rx_buffer_len = IGB_RXBUFFER_128;
4498
4499         if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
4500              (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
4501                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
4502
4503         if ((adapter->hw.mac.type == e1000_82580) &&
4504             (rx_buffer_len == IGB_RXBUFFER_128))
4505                 rx_buffer_len += IGB_RXBUFFER_64;
4506
4507         if (netif_running(netdev))
4508                 igb_down(adapter);
4509
4510         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4511                  netdev->mtu, new_mtu);
4512         netdev->mtu = new_mtu;
4513
4514         for (i = 0; i < adapter->num_rx_queues; i++)
4515                 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
4516
4517         if (netif_running(netdev))
4518                 igb_up(adapter);
4519         else
4520                 igb_reset(adapter);
4521
4522         clear_bit(__IGB_RESETTING, &adapter->state);
4523
4524         return 0;
4525 }
4526
4527 /**
4528  * igb_update_stats - Update the board statistics counters
4529  * @adapter: board private structure
4530  **/
4531
4532 void igb_update_stats(struct igb_adapter *adapter,
4533                       struct rtnl_link_stats64 *net_stats)
4534 {
4535         struct e1000_hw *hw = &adapter->hw;
4536         struct pci_dev *pdev = adapter->pdev;
4537         u32 reg, mpc;
4538         u16 phy_tmp;
4539         int i;
4540         u64 bytes, packets;
4541         unsigned int start;
4542         u64 _bytes, _packets;
4543
4544 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4545
4546         /*
4547          * Prevent stats update while adapter is being reset, or if the pci
4548          * connection is down.
4549          */
4550         if (adapter->link_speed == 0)
4551                 return;
4552         if (pci_channel_offline(pdev))
4553                 return;
4554
4555         bytes = 0;
4556         packets = 0;
4557         for (i = 0; i < adapter->num_rx_queues; i++) {
4558                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4559                 struct igb_ring *ring = adapter->rx_ring[i];
4560
4561                 ring->rx_stats.drops += rqdpc_tmp;
4562                 net_stats->rx_fifo_errors += rqdpc_tmp;
4563
4564                 do {
4565                         start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4566                         _bytes = ring->rx_stats.bytes;
4567                         _packets = ring->rx_stats.packets;
4568                 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4569                 bytes += _bytes;
4570                 packets += _packets;
4571         }
4572
4573         net_stats->rx_bytes = bytes;
4574         net_stats->rx_packets = packets;
4575
4576         bytes = 0;
4577         packets = 0;
4578         for (i = 0; i < adapter->num_tx_queues; i++) {
4579                 struct igb_ring *ring = adapter->tx_ring[i];
4580                 do {
4581                         start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4582                         _bytes = ring->tx_stats.bytes;
4583                         _packets = ring->tx_stats.packets;
4584                 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4585                 bytes += _bytes;
4586                 packets += _packets;
4587         }
4588         net_stats->tx_bytes = bytes;
4589         net_stats->tx_packets = packets;
4590
4591         /* read stats registers */
4592         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4593         adapter->stats.gprc += rd32(E1000_GPRC);
4594         adapter->stats.gorc += rd32(E1000_GORCL);
4595         rd32(E1000_GORCH); /* clear GORCL */
4596         adapter->stats.bprc += rd32(E1000_BPRC);
4597         adapter->stats.mprc += rd32(E1000_MPRC);
4598         adapter->stats.roc += rd32(E1000_ROC);
4599
4600         adapter->stats.prc64 += rd32(E1000_PRC64);
4601         adapter->stats.prc127 += rd32(E1000_PRC127);
4602         adapter->stats.prc255 += rd32(E1000_PRC255);
4603         adapter->stats.prc511 += rd32(E1000_PRC511);
4604         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4605         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4606         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4607         adapter->stats.sec += rd32(E1000_SEC);
4608
4609         mpc = rd32(E1000_MPC);
4610         adapter->stats.mpc += mpc;
4611         net_stats->rx_fifo_errors += mpc;
4612         adapter->stats.scc += rd32(E1000_SCC);
4613         adapter->stats.ecol += rd32(E1000_ECOL);
4614         adapter->stats.mcc += rd32(E1000_MCC);
4615         adapter->stats.latecol += rd32(E1000_LATECOL);
4616         adapter->stats.dc += rd32(E1000_DC);
4617         adapter->stats.rlec += rd32(E1000_RLEC);
4618         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4619         adapter->stats.xontxc += rd32(E1000_XONTXC);
4620         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4621         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4622         adapter->stats.fcruc += rd32(E1000_FCRUC);
4623         adapter->stats.gptc += rd32(E1000_GPTC);
4624         adapter->stats.gotc += rd32(E1000_GOTCL);
4625         rd32(E1000_GOTCH); /* clear GOTCL */
4626         adapter->stats.rnbc += rd32(E1000_RNBC);
4627         adapter->stats.ruc += rd32(E1000_RUC);
4628         adapter->stats.rfc += rd32(E1000_RFC);
4629         adapter->stats.rjc += rd32(E1000_RJC);
4630         adapter->stats.tor += rd32(E1000_TORH);
4631         adapter->stats.tot += rd32(E1000_TOTH);
4632         adapter->stats.tpr += rd32(E1000_TPR);
4633
4634         adapter->stats.ptc64 += rd32(E1000_PTC64);
4635         adapter->stats.ptc127 += rd32(E1000_PTC127);
4636         adapter->stats.ptc255 += rd32(E1000_PTC255);
4637         adapter->stats.ptc511 += rd32(E1000_PTC511);
4638         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4639         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4640
4641         adapter->stats.mptc += rd32(E1000_MPTC);
4642         adapter->stats.bptc += rd32(E1000_BPTC);
4643
4644         adapter->stats.tpt += rd32(E1000_TPT);
4645         adapter->stats.colc += rd32(E1000_COLC);
4646
4647         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4648         /* read internal phy specific stats */
4649         reg = rd32(E1000_CTRL_EXT);
4650         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4651                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4652                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4653         }
4654
4655         adapter->stats.tsctc += rd32(E1000_TSCTC);
4656         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4657
4658         adapter->stats.iac += rd32(E1000_IAC);
4659         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4660         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4661         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4662         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4663         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4664         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4665         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4666         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4667
4668         /* Fill out the OS statistics structure */
4669         net_stats->multicast = adapter->stats.mprc;
4670         net_stats->collisions = adapter->stats.colc;
4671
4672         /* Rx Errors */
4673
4674         /* RLEC on some newer hardware can be incorrect so build
4675          * our own version based on RUC and ROC */
4676         net_stats->rx_errors = adapter->stats.rxerrc +
4677                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4678                 adapter->stats.ruc + adapter->stats.roc +
4679                 adapter->stats.cexterr;
4680         net_stats->rx_length_errors = adapter->stats.ruc +
4681                                       adapter->stats.roc;
4682         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4683         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4684         net_stats->rx_missed_errors = adapter->stats.mpc;
4685
4686         /* Tx Errors */
4687         net_stats->tx_errors = adapter->stats.ecol +
4688                                adapter->stats.latecol;
4689         net_stats->tx_aborted_errors = adapter->stats.ecol;
4690         net_stats->tx_window_errors = adapter->stats.latecol;
4691         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4692
4693         /* Tx Dropped needs to be maintained elsewhere */
4694
4695         /* Phy Stats */
4696         if (hw->phy.media_type == e1000_media_type_copper) {
4697                 if ((adapter->link_speed == SPEED_1000) &&
4698                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4699                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4700                         adapter->phy_stats.idle_errors += phy_tmp;
4701                 }
4702         }
4703
4704         /* Management Stats */
4705         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4706         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4707         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4708
4709         /* OS2BMC Stats */
4710         reg = rd32(E1000_MANC);
4711         if (reg & E1000_MANC_EN_BMC2OS) {
4712                 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4713                 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4714                 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4715                 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4716         }
4717 }
4718
4719 static irqreturn_t igb_msix_other(int irq, void *data)
4720 {
4721         struct igb_adapter *adapter = data;
4722         struct e1000_hw *hw = &adapter->hw;
4723         u32 icr = rd32(E1000_ICR);
4724         /* reading ICR causes bit 31 of EICR to be cleared */
4725
4726         if (icr & E1000_ICR_DRSTA)
4727                 schedule_work(&adapter->reset_task);
4728
4729         if (icr & E1000_ICR_DOUTSYNC) {
4730                 /* HW is reporting DMA is out of sync */
4731                 adapter->stats.doosync++;
4732                 /* The DMA Out of Sync is also indication of a spoof event
4733                  * in IOV mode. Check the Wrong VM Behavior register to
4734                  * see if it is really a spoof event. */
4735                 igb_check_wvbr(adapter);
4736         }
4737
4738         /* Check for a mailbox event */
4739         if (icr & E1000_ICR_VMMB)
4740                 igb_msg_task(adapter);
4741
4742         if (icr & E1000_ICR_LSC) {
4743                 hw->mac.get_link_status = 1;
4744                 /* guard against interrupt when we're going down */
4745                 if (!test_bit(__IGB_DOWN, &adapter->state))
4746                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4747         }
4748
4749         if (adapter->vfs_allocated_count)
4750                 wr32(E1000_IMS, E1000_IMS_LSC |
4751                                 E1000_IMS_VMMB |
4752                                 E1000_IMS_DOUTSYNC);
4753         else
4754                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4755         wr32(E1000_EIMS, adapter->eims_other);
4756
4757         return IRQ_HANDLED;
4758 }
4759
4760 static void igb_write_itr(struct igb_q_vector *q_vector)
4761 {
4762         struct igb_adapter *adapter = q_vector->adapter;
4763         u32 itr_val = q_vector->itr_val & 0x7FFC;
4764
4765         if (!q_vector->set_itr)
4766                 return;
4767
4768         if (!itr_val)
4769                 itr_val = 0x4;
4770
4771         if (adapter->hw.mac.type == e1000_82575)
4772                 itr_val |= itr_val << 16;
4773         else
4774                 itr_val |= 0x8000000;
4775
4776         writel(itr_val, q_vector->itr_register);
4777         q_vector->set_itr = 0;
4778 }
4779
4780 static irqreturn_t igb_msix_ring(int irq, void *data)
4781 {
4782         struct igb_q_vector *q_vector = data;
4783
4784         /* Write the ITR value calculated from the previous interrupt. */
4785         igb_write_itr(q_vector);
4786
4787         napi_schedule(&q_vector->napi);
4788
4789         return IRQ_HANDLED;
4790 }
4791
4792 #ifdef CONFIG_IGB_DCA
4793 static void igb_update_dca(struct igb_q_vector *q_vector)
4794 {
4795         struct igb_adapter *adapter = q_vector->adapter;
4796         struct e1000_hw *hw = &adapter->hw;
4797         int cpu = get_cpu();
4798
4799         if (q_vector->cpu == cpu)
4800                 goto out_no_update;
4801
4802         if (q_vector->tx_ring) {
4803                 int q = q_vector->tx_ring->reg_idx;
4804                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4805                 if (hw->mac.type == e1000_82575) {
4806                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4807                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4808                 } else {
4809                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4810                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4811                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4812                 }
4813                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4814                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4815         }
4816         if (q_vector->rx_ring) {
4817                 int q = q_vector->rx_ring->reg_idx;
4818                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4819                 if (hw->mac.type == e1000_82575) {
4820                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4821                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4822                 } else {
4823                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4824                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4825                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4826                 }
4827                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4828                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4829                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4830                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4831         }
4832         q_vector->cpu = cpu;
4833 out_no_update:
4834         put_cpu();
4835 }
4836
4837 static void igb_setup_dca(struct igb_adapter *adapter)
4838 {
4839         struct e1000_hw *hw = &adapter->hw;
4840         int i;
4841
4842         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4843                 return;
4844
4845         /* Always use CB2 mode, difference is masked in the CB driver. */
4846         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4847
4848         for (i = 0; i < adapter->num_q_vectors; i++) {
4849                 adapter->q_vector[i]->cpu = -1;
4850                 igb_update_dca(adapter->q_vector[i]);
4851         }
4852 }
4853
4854 static int __igb_notify_dca(struct device *dev, void *data)
4855 {
4856         struct net_device *netdev = dev_get_drvdata(dev);
4857         struct igb_adapter *adapter = netdev_priv(netdev);
4858         struct pci_dev *pdev = adapter->pdev;
4859         struct e1000_hw *hw = &adapter->hw;
4860         unsigned long event = *(unsigned long *)data;
4861
4862         switch (event) {
4863         case DCA_PROVIDER_ADD:
4864                 /* if already enabled, don't do it again */
4865                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4866                         break;
4867                 if (dca_add_requester(dev) == 0) {
4868                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4869                         dev_info(&pdev->dev, "DCA enabled\n");
4870                         igb_setup_dca(adapter);
4871                         break;
4872                 }
4873                 /* Fall Through since DCA is disabled. */
4874         case DCA_PROVIDER_REMOVE:
4875                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4876                         /* without this a class_device is left
4877                          * hanging around in the sysfs model */
4878                         dca_remove_requester(dev);
4879                         dev_info(&pdev->dev, "DCA disabled\n");
4880                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4881                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4882                 }
4883                 break;
4884         }
4885
4886         return 0;
4887 }
4888
4889 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4890                           void *p)
4891 {
4892         int ret_val;
4893
4894         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4895                                          __igb_notify_dca);
4896
4897         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4898 }
4899 #endif /* CONFIG_IGB_DCA */
4900
4901 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4902 {
4903         struct e1000_hw *hw = &adapter->hw;
4904         u32 ping;
4905         int i;
4906
4907         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4908                 ping = E1000_PF_CONTROL_MSG;
4909                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4910                         ping |= E1000_VT_MSGTYPE_CTS;
4911                 igb_write_mbx(hw, &ping, 1, i);
4912         }
4913 }
4914
4915 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4916 {
4917         struct e1000_hw *hw = &adapter->hw;
4918         u32 vmolr = rd32(E1000_VMOLR(vf));
4919         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4920
4921         vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4922                             IGB_VF_FLAG_MULTI_PROMISC);
4923         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4924
4925         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4926                 vmolr |= E1000_VMOLR_MPME;
4927                 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4928                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4929         } else {
4930                 /*
4931                  * if we have hashes and we are clearing a multicast promisc
4932                  * flag we need to write the hashes to the MTA as this step
4933                  * was previously skipped
4934                  */
4935                 if (vf_data->num_vf_mc_hashes > 30) {
4936                         vmolr |= E1000_VMOLR_MPME;
4937                 } else if (vf_data->num_vf_mc_hashes) {
4938                         int j;
4939                         vmolr |= E1000_VMOLR_ROMPE;
4940                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4941                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4942                 }
4943         }
4944
4945         wr32(E1000_VMOLR(vf), vmolr);
4946
4947         /* there are flags left unprocessed, likely not supported */
4948         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4949                 return -EINVAL;
4950
4951         return 0;
4952
4953 }
4954
4955 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4956                                   u32 *msgbuf, u32 vf)
4957 {
4958         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4959         u16 *hash_list = (u16 *)&msgbuf[1];
4960         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4961         int i;
4962
4963         /* salt away the number of multicast addresses assigned
4964          * to this VF for later use to restore when the PF multi cast
4965          * list changes
4966          */
4967         vf_data->num_vf_mc_hashes = n;
4968
4969         /* only up to 30 hash values supported */
4970         if (n > 30)
4971                 n = 30;
4972
4973         /* store the hashes for later use */
4974         for (i = 0; i < n; i++)
4975                 vf_data->vf_mc_hashes[i] = hash_list[i];
4976
4977         /* Flush and reset the mta with the new values */
4978         igb_set_rx_mode(adapter->netdev);
4979
4980         return 0;
4981 }
4982
4983 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4984 {
4985         struct e1000_hw *hw = &adapter->hw;
4986         struct vf_data_storage *vf_data;
4987         int i, j;
4988
4989         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4990                 u32 vmolr = rd32(E1000_VMOLR(i));
4991                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4992
4993                 vf_data = &adapter->vf_data[i];
4994
4995                 if ((vf_data->num_vf_mc_hashes > 30) ||
4996                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4997                         vmolr |= E1000_VMOLR_MPME;
4998                 } else if (vf_data->num_vf_mc_hashes) {
4999                         vmolr |= E1000_VMOLR_ROMPE;
5000                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5001                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5002                 }
5003                 wr32(E1000_VMOLR(i), vmolr);
5004         }
5005 }
5006
5007 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5008 {
5009         struct e1000_hw *hw = &adapter->hw;
5010         u32 pool_mask, reg, vid;
5011         int i;
5012
5013         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5014
5015         /* Find the vlan filter for this id */
5016         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5017                 reg = rd32(E1000_VLVF(i));
5018
5019                 /* remove the vf from the pool */
5020                 reg &= ~pool_mask;
5021
5022                 /* if pool is empty then remove entry from vfta */
5023                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5024                     (reg & E1000_VLVF_VLANID_ENABLE)) {
5025                         reg = 0;
5026                         vid = reg & E1000_VLVF_VLANID_MASK;
5027                         igb_vfta_set(hw, vid, false);
5028                 }
5029
5030                 wr32(E1000_VLVF(i), reg);
5031         }
5032
5033         adapter->vf_data[vf].vlans_enabled = 0;
5034 }
5035
5036 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5037 {
5038         struct e1000_hw *hw = &adapter->hw;
5039         u32 reg, i;
5040
5041         /* The vlvf table only exists on 82576 hardware and newer */
5042         if (hw->mac.type < e1000_82576)
5043                 return -1;
5044
5045         /* we only need to do this if VMDq is enabled */
5046         if (!adapter->vfs_allocated_count)
5047                 return -1;
5048
5049         /* Find the vlan filter for this id */
5050         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5051                 reg = rd32(E1000_VLVF(i));
5052                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5053                     vid == (reg & E1000_VLVF_VLANID_MASK))
5054                         break;
5055         }
5056
5057         if (add) {
5058                 if (i == E1000_VLVF_ARRAY_SIZE) {
5059                         /* Did not find a matching VLAN ID entry that was
5060                          * enabled.  Search for a free filter entry, i.e.
5061                          * one without the enable bit set
5062                          */
5063                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5064                                 reg = rd32(E1000_VLVF(i));
5065                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5066                                         break;
5067                         }
5068                 }
5069                 if (i < E1000_VLVF_ARRAY_SIZE) {
5070                         /* Found an enabled/available entry */
5071                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5072
5073                         /* if !enabled we need to set this up in vfta */
5074                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5075                                 /* add VID to filter table */
5076                                 igb_vfta_set(hw, vid, true);
5077                                 reg |= E1000_VLVF_VLANID_ENABLE;
5078                         }
5079                         reg &= ~E1000_VLVF_VLANID_MASK;
5080                         reg |= vid;
5081                         wr32(E1000_VLVF(i), reg);
5082
5083                         /* do not modify RLPML for PF devices */
5084                         if (vf >= adapter->vfs_allocated_count)
5085                                 return 0;
5086
5087                         if (!adapter->vf_data[vf].vlans_enabled) {
5088                                 u32 size;
5089                                 reg = rd32(E1000_VMOLR(vf));
5090                                 size = reg & E1000_VMOLR_RLPML_MASK;
5091                                 size += 4;
5092                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5093                                 reg |= size;
5094                                 wr32(E1000_VMOLR(vf), reg);
5095                         }
5096
5097                         adapter->vf_data[vf].vlans_enabled++;
5098                         return 0;
5099                 }
5100         } else {
5101                 if (i < E1000_VLVF_ARRAY_SIZE) {
5102                         /* remove vf from the pool */
5103                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5104                         /* if pool is empty then remove entry from vfta */
5105                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5106                                 reg = 0;
5107                                 igb_vfta_set(hw, vid, false);
5108                         }
5109                         wr32(E1000_VLVF(i), reg);
5110
5111                         /* do not modify RLPML for PF devices */
5112                         if (vf >= adapter->vfs_allocated_count)
5113                                 return 0;
5114
5115                         adapter->vf_data[vf].vlans_enabled--;
5116                         if (!adapter->vf_data[vf].vlans_enabled) {
5117                                 u32 size;
5118                                 reg = rd32(E1000_VMOLR(vf));
5119                                 size = reg & E1000_VMOLR_RLPML_MASK;
5120                                 size -= 4;
5121                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5122                                 reg |= size;
5123                                 wr32(E1000_VMOLR(vf), reg);
5124                         }
5125                 }
5126         }
5127         return 0;
5128 }
5129
5130 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5131 {
5132         struct e1000_hw *hw = &adapter->hw;
5133
5134         if (vid)
5135                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5136         else
5137                 wr32(E1000_VMVIR(vf), 0);
5138 }
5139
5140 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5141                                int vf, u16 vlan, u8 qos)
5142 {
5143         int err = 0;
5144         struct igb_adapter *adapter = netdev_priv(netdev);
5145
5146         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5147                 return -EINVAL;
5148         if (vlan || qos) {
5149                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5150                 if (err)
5151                         goto out;
5152                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5153                 igb_set_vmolr(adapter, vf, !vlan);
5154                 adapter->vf_data[vf].pf_vlan = vlan;
5155                 adapter->vf_data[vf].pf_qos = qos;
5156                 dev_info(&adapter->pdev->dev,
5157                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5158                 if (test_bit(__IGB_DOWN, &adapter->state)) {
5159                         dev_warn(&adapter->pdev->dev,
5160                                  "The VF VLAN has been set,"
5161                                  " but the PF device is not up.\n");
5162                         dev_warn(&adapter->pdev->dev,
5163                                  "Bring the PF device up before"
5164                                  " attempting to use the VF device.\n");
5165                 }
5166         } else {
5167                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5168                                    false, vf);
5169                 igb_set_vmvir(adapter, vlan, vf);
5170                 igb_set_vmolr(adapter, vf, true);
5171                 adapter->vf_data[vf].pf_vlan = 0;
5172                 adapter->vf_data[vf].pf_qos = 0;
5173        }
5174 out:
5175        return err;
5176 }
5177
5178 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5179 {
5180         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5181         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5182
5183         return igb_vlvf_set(adapter, vid, add, vf);
5184 }
5185
5186 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5187 {
5188         /* clear flags - except flag that indicates PF has set the MAC */
5189         adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5190         adapter->vf_data[vf].last_nack = jiffies;
5191
5192         /* reset offloads to defaults */
5193         igb_set_vmolr(adapter, vf, true);
5194
5195         /* reset vlans for device */
5196         igb_clear_vf_vfta(adapter, vf);
5197         if (adapter->vf_data[vf].pf_vlan)
5198                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5199                                     adapter->vf_data[vf].pf_vlan,
5200                                     adapter->vf_data[vf].pf_qos);
5201         else
5202                 igb_clear_vf_vfta(adapter, vf);
5203
5204         /* reset multicast table array for vf */
5205         adapter->vf_data[vf].num_vf_mc_hashes = 0;
5206
5207         /* Flush and reset the mta with the new values */
5208         igb_set_rx_mode(adapter->netdev);
5209 }
5210
5211 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5212 {
5213         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5214
5215         /* generate a new mac address as we were hotplug removed/added */
5216         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5217                 random_ether_addr(vf_mac);
5218
5219         /* process remaining reset events */
5220         igb_vf_reset(adapter, vf);
5221 }
5222
5223 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5224 {
5225         struct e1000_hw *hw = &adapter->hw;
5226         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5227         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5228         u32 reg, msgbuf[3];
5229         u8 *addr = (u8 *)(&msgbuf[1]);
5230
5231         /* process all the same items cleared in a function level reset */
5232         igb_vf_reset(adapter, vf);
5233
5234         /* set vf mac address */
5235         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5236
5237         /* enable transmit and receive for vf */
5238         reg = rd32(E1000_VFTE);
5239         wr32(E1000_VFTE, reg | (1 << vf));
5240         reg = rd32(E1000_VFRE);
5241         wr32(E1000_VFRE, reg | (1 << vf));
5242
5243         adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5244
5245         /* reply to reset with ack and vf mac address */
5246         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5247         memcpy(addr, vf_mac, 6);
5248         igb_write_mbx(hw, msgbuf, 3, vf);
5249 }
5250
5251 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5252 {
5253         /*
5254          * The VF MAC Address is stored in a packed array of bytes
5255          * starting at the second 32 bit word of the msg array
5256          */
5257         unsigned char *addr = (char *)&msg[1];
5258         int err = -1;
5259
5260         if (is_valid_ether_addr(addr))
5261                 err = igb_set_vf_mac(adapter, vf, addr);
5262
5263         return err;
5264 }
5265
5266 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5267 {
5268         struct e1000_hw *hw = &adapter->hw;
5269         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5270         u32 msg = E1000_VT_MSGTYPE_NACK;
5271
5272         /* if device isn't clear to send it shouldn't be reading either */
5273         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5274             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5275                 igb_write_mbx(hw, &msg, 1, vf);
5276                 vf_data->last_nack = jiffies;
5277         }
5278 }
5279
5280 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5281 {
5282         struct pci_dev *pdev = adapter->pdev;
5283         u32 msgbuf[E1000_VFMAILBOX_SIZE];
5284         struct e1000_hw *hw = &adapter->hw;
5285         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5286         s32 retval;
5287
5288         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5289
5290         if (retval) {
5291                 /* if receive failed revoke VF CTS stats and restart init */
5292                 dev_err(&pdev->dev, "Error receiving message from VF\n");
5293                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5294                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5295                         return;
5296                 goto out;
5297         }
5298
5299         /* this is a message we already processed, do nothing */
5300         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5301                 return;
5302
5303         /*
5304          * until the vf completes a reset it should not be
5305          * allowed to start any configuration.
5306          */
5307
5308         if (msgbuf[0] == E1000_VF_RESET) {
5309                 igb_vf_reset_msg(adapter, vf);
5310                 return;
5311         }
5312
5313         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5314                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5315                         return;
5316                 retval = -1;
5317                 goto out;
5318         }
5319
5320         switch ((msgbuf[0] & 0xFFFF)) {
5321         case E1000_VF_SET_MAC_ADDR:
5322                 retval = -EINVAL;
5323                 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5324                         retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5325                 else
5326                         dev_warn(&pdev->dev,
5327                                  "VF %d attempted to override administratively "
5328                                  "set MAC address\nReload the VF driver to "
5329                                  "resume operations\n", vf);
5330                 break;
5331         case E1000_VF_SET_PROMISC:
5332                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5333                 break;
5334         case E1000_VF_SET_MULTICAST:
5335                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5336                 break;
5337         case E1000_VF_SET_LPE:
5338                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5339                 break;
5340         case E1000_VF_SET_VLAN:
5341                 retval = -1;
5342                 if (vf_data->pf_vlan)
5343                         dev_warn(&pdev->dev,
5344                                  "VF %d attempted to override administratively "
5345                                  "set VLAN tag\nReload the VF driver to "
5346                                  "resume operations\n", vf);
5347                 else
5348                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5349                 break;
5350         default:
5351                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5352                 retval = -1;
5353                 break;
5354         }
5355
5356         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5357 out:
5358         /* notify the VF of the results of what it sent us */
5359         if (retval)
5360                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5361         else
5362                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5363
5364         igb_write_mbx(hw, msgbuf, 1, vf);
5365 }
5366
5367 static void igb_msg_task(struct igb_adapter *adapter)
5368 {
5369         struct e1000_hw *hw = &adapter->hw;
5370         u32 vf;
5371
5372         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5373                 /* process any reset requests */
5374                 if (!igb_check_for_rst(hw, vf))
5375                         igb_vf_reset_event(adapter, vf);
5376
5377                 /* process any messages pending */
5378                 if (!igb_check_for_msg(hw, vf))
5379                         igb_rcv_msg_from_vf(adapter, vf);
5380
5381                 /* process any acks */
5382                 if (!igb_check_for_ack(hw, vf))
5383                         igb_rcv_ack_from_vf(adapter, vf);
5384         }
5385 }
5386
5387 /**
5388  *  igb_set_uta - Set unicast filter table address
5389  *  @adapter: board private structure
5390  *
5391  *  The unicast table address is a register array of 32-bit registers.
5392  *  The table is meant to be used in a way similar to how the MTA is used
5393  *  however due to certain limitations in the hardware it is necessary to
5394  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5395  *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
5396  **/
5397 static void igb_set_uta(struct igb_adapter *adapter)
5398 {
5399         struct e1000_hw *hw = &adapter->hw;
5400         int i;
5401
5402         /* The UTA table only exists on 82576 hardware and newer */
5403         if (hw->mac.type < e1000_82576)
5404                 return;
5405
5406         /* we only need to do this if VMDq is enabled */
5407         if (!adapter->vfs_allocated_count)
5408                 return;
5409
5410         for (i = 0; i < hw->mac.uta_reg_count; i++)
5411                 array_wr32(E1000_UTA, i, ~0);
5412 }
5413
5414 /**
5415  * igb_intr_msi - Interrupt Handler
5416  * @irq: interrupt number
5417  * @data: pointer to a network interface device structure
5418  **/
5419 static irqreturn_t igb_intr_msi(int irq, void *data)
5420 {
5421         struct igb_adapter *adapter = data;
5422         struct igb_q_vector *q_vector = adapter->q_vector[0];
5423         struct e1000_hw *hw = &adapter->hw;
5424         /* read ICR disables interrupts using IAM */
5425         u32 icr = rd32(E1000_ICR);
5426
5427         igb_write_itr(q_vector);
5428
5429         if (icr & E1000_ICR_DRSTA)
5430                 schedule_work(&adapter->reset_task);
5431
5432         if (icr & E1000_ICR_DOUTSYNC) {
5433                 /* HW is reporting DMA is out of sync */
5434                 adapter->stats.doosync++;
5435         }
5436
5437         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5438                 hw->mac.get_link_status = 1;
5439                 if (!test_bit(__IGB_DOWN, &adapter->state))
5440                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5441         }
5442
5443         napi_schedule(&q_vector->napi);
5444
5445         return IRQ_HANDLED;
5446 }
5447
5448 /**
5449  * igb_intr - Legacy Interrupt Handler
5450  * @irq: interrupt number
5451  * @data: pointer to a network interface device structure
5452  **/
5453 static irqreturn_t igb_intr(int irq, void *data)
5454 {
5455         struct igb_adapter *adapter = data;
5456         struct igb_q_vector *q_vector = adapter->q_vector[0];
5457         struct e1000_hw *hw = &adapter->hw;
5458         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5459          * need for the IMC write */
5460         u32 icr = rd32(E1000_ICR);
5461         if (!icr)
5462                 return IRQ_NONE;  /* Not our interrupt */
5463
5464         igb_write_itr(q_vector);
5465
5466         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5467          * not set, then the adapter didn't send an interrupt */
5468         if (!(icr & E1000_ICR_INT_ASSERTED))
5469                 return IRQ_NONE;
5470
5471         if (icr & E1000_ICR_DRSTA)
5472                 schedule_work(&adapter->reset_task);
5473
5474         if (icr & E1000_ICR_DOUTSYNC) {
5475                 /* HW is reporting DMA is out of sync */
5476                 adapter->stats.doosync++;
5477         }
5478
5479         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5480                 hw->mac.get_link_status = 1;
5481                 /* guard against interrupt when we're going down */
5482                 if (!test_bit(__IGB_DOWN, &adapter->state))
5483                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5484         }
5485
5486         napi_schedule(&q_vector->napi);
5487
5488         return IRQ_HANDLED;
5489 }
5490
5491 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5492 {
5493         struct igb_adapter *adapter = q_vector->adapter;
5494         struct e1000_hw *hw = &adapter->hw;
5495
5496         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5497             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5498                 if (!adapter->msix_entries)
5499                         igb_set_itr(adapter);
5500                 else
5501                         igb_update_ring_itr(q_vector);
5502         }
5503
5504         if (!test_bit(__IGB_DOWN, &adapter->state)) {
5505                 if (adapter->msix_entries)
5506                         wr32(E1000_EIMS, q_vector->eims_value);
5507                 else
5508                         igb_irq_enable(adapter);
5509         }
5510 }
5511
5512 /**
5513  * igb_poll - NAPI Rx polling callback
5514  * @napi: napi polling structure
5515  * @budget: count of how many packets we should handle
5516  **/
5517 static int igb_poll(struct napi_struct *napi, int budget)
5518 {
5519         struct igb_q_vector *q_vector = container_of(napi,
5520                                                      struct igb_q_vector,
5521                                                      napi);
5522         int tx_clean_complete = 1, work_done = 0;
5523
5524 #ifdef CONFIG_IGB_DCA
5525         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5526                 igb_update_dca(q_vector);
5527 #endif
5528         if (q_vector->tx_ring)
5529                 tx_clean_complete = igb_clean_tx_irq(q_vector);
5530
5531         if (q_vector->rx_ring)
5532                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
5533
5534         if (!tx_clean_complete)
5535                 work_done = budget;
5536
5537         /* If not enough Rx work done, exit the polling mode */
5538         if (work_done < budget) {
5539                 napi_complete(napi);
5540                 igb_ring_irq_enable(q_vector);
5541         }
5542
5543         return work_done;
5544 }
5545
5546 /**
5547  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5548  * @adapter: board private structure
5549  * @shhwtstamps: timestamp structure to update
5550  * @regval: unsigned 64bit system time value.
5551  *
5552  * We need to convert the system time value stored in the RX/TXSTMP registers
5553  * into a hwtstamp which can be used by the upper level timestamping functions
5554  */
5555 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5556                                    struct skb_shared_hwtstamps *shhwtstamps,
5557                                    u64 regval)
5558 {
5559         u64 ns;
5560
5561         /*
5562          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5563          * 24 to match clock shift we setup earlier.
5564          */
5565         if (adapter->hw.mac.type == e1000_82580)
5566                 regval <<= IGB_82580_TSYNC_SHIFT;
5567
5568         ns = timecounter_cyc2time(&adapter->clock, regval);
5569         timecompare_update(&adapter->compare, ns);
5570         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5571         shhwtstamps->hwtstamp = ns_to_ktime(ns);
5572         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5573 }
5574
5575 /**
5576  * igb_tx_hwtstamp - utility function which checks for TX time stamp
5577  * @q_vector: pointer to q_vector containing needed info
5578  * @buffer: pointer to igb_buffer structure
5579  *
5580  * If we were asked to do hardware stamping and such a time stamp is
5581  * available, then it must have been for this skb here because we only
5582  * allow only one such packet into the queue.
5583  */
5584 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *buffer_info)
5585 {
5586         struct igb_adapter *adapter = q_vector->adapter;
5587         struct e1000_hw *hw = &adapter->hw;
5588         struct skb_shared_hwtstamps shhwtstamps;
5589         u64 regval;
5590
5591         /* if skb does not support hw timestamp or TX stamp not valid exit */
5592         if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) ||
5593             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5594                 return;
5595
5596         regval = rd32(E1000_TXSTMPL);
5597         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5598
5599         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5600         skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5601 }
5602
5603 /**
5604  * igb_clean_tx_irq - Reclaim resources after transmit completes
5605  * @q_vector: pointer to q_vector containing needed info
5606  * returns true if ring is completely cleaned
5607  **/
5608 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5609 {
5610         struct igb_adapter *adapter = q_vector->adapter;
5611         struct igb_ring *tx_ring = q_vector->tx_ring;
5612         struct net_device *netdev = tx_ring->netdev;
5613         struct e1000_hw *hw = &adapter->hw;
5614         struct igb_buffer *buffer_info;
5615         union e1000_adv_tx_desc *tx_desc, *eop_desc;
5616         unsigned int total_bytes = 0, total_packets = 0;
5617         unsigned int i, eop, count = 0;
5618         bool cleaned = false;
5619
5620         i = tx_ring->next_to_clean;
5621         eop = tx_ring->buffer_info[i].next_to_watch;
5622         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5623
5624         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5625                (count < tx_ring->count)) {
5626                 rmb();  /* read buffer_info after eop_desc status */
5627                 for (cleaned = false; !cleaned; count++) {
5628                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5629                         buffer_info = &tx_ring->buffer_info[i];
5630                         cleaned = (i == eop);
5631
5632                         if (buffer_info->skb) {
5633                                 total_bytes += buffer_info->bytecount;
5634                                 /* gso_segs is currently only valid for tcp */
5635                                 total_packets += buffer_info->gso_segs;
5636                                 igb_tx_hwtstamp(q_vector, buffer_info);
5637                         }
5638
5639                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5640                         tx_desc->wb.status = 0;
5641
5642                         i++;
5643                         if (i == tx_ring->count)
5644                                 i = 0;
5645                 }
5646                 eop = tx_ring->buffer_info[i].next_to_watch;
5647                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5648         }
5649
5650         tx_ring->next_to_clean = i;
5651
5652         if (unlikely(count &&
5653                      netif_carrier_ok(netdev) &&
5654                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5655                 /* Make sure that anybody stopping the queue after this
5656                  * sees the new next_to_clean.
5657                  */
5658                 smp_mb();
5659                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5660                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5661                         netif_wake_subqueue(netdev, tx_ring->queue_index);
5662
5663                         u64_stats_update_begin(&tx_ring->tx_syncp);
5664                         tx_ring->tx_stats.restart_queue++;
5665                         u64_stats_update_end(&tx_ring->tx_syncp);
5666                 }
5667         }
5668
5669         if (tx_ring->detect_tx_hung) {
5670                 /* Detect a transmit hang in hardware, this serializes the
5671                  * check with the clearing of time_stamp and movement of i */
5672                 tx_ring->detect_tx_hung = false;
5673                 if (tx_ring->buffer_info[i].time_stamp &&
5674                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5675                                (adapter->tx_timeout_factor * HZ)) &&
5676                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5677
5678                         /* detected Tx unit hang */
5679                         dev_err(tx_ring->dev,
5680                                 "Detected Tx Unit Hang\n"
5681                                 "  Tx Queue             <%d>\n"
5682                                 "  TDH                  <%x>\n"
5683                                 "  TDT                  <%x>\n"
5684                                 "  next_to_use          <%x>\n"
5685                                 "  next_to_clean        <%x>\n"
5686                                 "buffer_info[next_to_clean]\n"
5687                                 "  time_stamp           <%lx>\n"
5688                                 "  next_to_watch        <%x>\n"
5689                                 "  jiffies              <%lx>\n"
5690                                 "  desc.status          <%x>\n",
5691                                 tx_ring->queue_index,
5692                                 readl(tx_ring->head),
5693                                 readl(tx_ring->tail),
5694                                 tx_ring->next_to_use,
5695                                 tx_ring->next_to_clean,
5696                                 tx_ring->buffer_info[eop].time_stamp,
5697                                 eop,
5698                                 jiffies,
5699                                 eop_desc->wb.status);
5700                         netif_stop_subqueue(netdev, tx_ring->queue_index);
5701                 }
5702         }
5703         tx_ring->total_bytes += total_bytes;
5704         tx_ring->total_packets += total_packets;
5705         u64_stats_update_begin(&tx_ring->tx_syncp);
5706         tx_ring->tx_stats.bytes += total_bytes;
5707         tx_ring->tx_stats.packets += total_packets;
5708         u64_stats_update_end(&tx_ring->tx_syncp);
5709         return count < tx_ring->count;
5710 }
5711
5712 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5713                                        u32 status_err, struct sk_buff *skb)
5714 {
5715         skb_checksum_none_assert(skb);
5716
5717         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5718         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5719              (status_err & E1000_RXD_STAT_IXSM))
5720                 return;
5721
5722         /* TCP/UDP checksum error bit is set */
5723         if (status_err &
5724             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5725                 /*
5726                  * work around errata with sctp packets where the TCPE aka
5727                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5728                  * packets, (aka let the stack check the crc32c)
5729                  */
5730                 if ((skb->len == 60) &&
5731                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM)) {
5732                         u64_stats_update_begin(&ring->rx_syncp);
5733                         ring->rx_stats.csum_err++;
5734                         u64_stats_update_end(&ring->rx_syncp);
5735                 }
5736                 /* let the stack verify checksum errors */
5737                 return;
5738         }
5739         /* It must be a TCP or UDP packet with a valid checksum */
5740         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5741                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5742
5743         dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5744 }
5745
5746 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5747                                    struct sk_buff *skb)
5748 {
5749         struct igb_adapter *adapter = q_vector->adapter;
5750         struct e1000_hw *hw = &adapter->hw;
5751         u64 regval;
5752
5753         /*
5754          * If this bit is set, then the RX registers contain the time stamp. No
5755          * other packet will be time stamped until we read these registers, so
5756          * read the registers to make them available again. Because only one
5757          * packet can be time stamped at a time, we know that the register
5758          * values must belong to this one here and therefore we don't need to
5759          * compare any of the additional attributes stored for it.
5760          *
5761          * If nothing went wrong, then it should have a shared tx_flags that we
5762          * can turn into a skb_shared_hwtstamps.
5763          */
5764         if (staterr & E1000_RXDADV_STAT_TSIP) {
5765                 u32 *stamp = (u32 *)skb->data;
5766                 regval = le32_to_cpu(*(stamp + 2));
5767                 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5768                 skb_pull(skb, IGB_TS_HDR_LEN);
5769         } else {
5770                 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5771                         return;
5772
5773                 regval = rd32(E1000_RXSTMPL);
5774                 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5775         }
5776
5777         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5778 }
5779 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5780                                union e1000_adv_rx_desc *rx_desc)
5781 {
5782         /* HW will not DMA in data larger than the given buffer, even if it
5783          * parses the (NFS, of course) header to be larger.  In that case, it
5784          * fills the header buffer and spills the rest into the page.
5785          */
5786         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5787                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5788         if (hlen > rx_ring->rx_buffer_len)
5789                 hlen = rx_ring->rx_buffer_len;
5790         return hlen;
5791 }
5792
5793 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5794                                  int *work_done, int budget)
5795 {
5796         struct igb_ring *rx_ring = q_vector->rx_ring;
5797         struct net_device *netdev = rx_ring->netdev;
5798         struct device *dev = rx_ring->dev;
5799         union e1000_adv_rx_desc *rx_desc , *next_rxd;
5800         struct igb_buffer *buffer_info , *next_buffer;
5801         struct sk_buff *skb;
5802         bool cleaned = false;
5803         int cleaned_count = 0;
5804         int current_node = numa_node_id();
5805         unsigned int total_bytes = 0, total_packets = 0;
5806         unsigned int i;
5807         u32 staterr;
5808         u16 length;
5809
5810         i = rx_ring->next_to_clean;
5811         buffer_info = &rx_ring->buffer_info[i];
5812         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5813         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5814
5815         while (staterr & E1000_RXD_STAT_DD) {
5816                 if (*work_done >= budget)
5817                         break;
5818                 (*work_done)++;
5819                 rmb(); /* read descriptor and rx_buffer_info after status DD */
5820
5821                 skb = buffer_info->skb;
5822                 prefetch(skb->data - NET_IP_ALIGN);
5823                 buffer_info->skb = NULL;
5824
5825                 i++;
5826                 if (i == rx_ring->count)
5827                         i = 0;
5828
5829                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5830                 prefetch(next_rxd);
5831                 next_buffer = &rx_ring->buffer_info[i];
5832
5833                 length = le16_to_cpu(rx_desc->wb.upper.length);
5834                 cleaned = true;
5835                 cleaned_count++;
5836
5837                 if (buffer_info->dma) {
5838                         dma_unmap_single(dev, buffer_info->dma,
5839                                          rx_ring->rx_buffer_len,
5840                                          DMA_FROM_DEVICE);
5841                         buffer_info->dma = 0;
5842                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5843                                 skb_put(skb, length);
5844                                 goto send_up;
5845                         }
5846                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5847                 }
5848
5849                 if (length) {
5850                         dma_unmap_page(dev, buffer_info->page_dma,
5851                                        PAGE_SIZE / 2, DMA_FROM_DEVICE);
5852                         buffer_info->page_dma = 0;
5853
5854                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5855                                                 buffer_info->page,
5856                                                 buffer_info->page_offset,
5857                                                 length);
5858
5859                         if ((page_count(buffer_info->page) != 1) ||
5860                             (page_to_nid(buffer_info->page) != current_node))
5861                                 buffer_info->page = NULL;
5862                         else
5863                                 get_page(buffer_info->page);
5864
5865                         skb->len += length;
5866                         skb->data_len += length;
5867                         skb->truesize += length;
5868                 }
5869
5870                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5871                         buffer_info->skb = next_buffer->skb;
5872                         buffer_info->dma = next_buffer->dma;
5873                         next_buffer->skb = skb;
5874                         next_buffer->dma = 0;
5875                         goto next_desc;
5876                 }
5877 send_up:
5878                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5879                         dev_kfree_skb_irq(skb);
5880                         goto next_desc;
5881                 }
5882
5883                 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5884                         igb_rx_hwtstamp(q_vector, staterr, skb);
5885                 total_bytes += skb->len;
5886                 total_packets++;
5887
5888                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5889
5890                 skb->protocol = eth_type_trans(skb, netdev);
5891                 skb_record_rx_queue(skb, rx_ring->queue_index);
5892
5893                 if (staterr & E1000_RXD_STAT_VP) {
5894                         u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
5895
5896                         __vlan_hwaccel_put_tag(skb, vid);
5897                 }
5898                 napi_gro_receive(&q_vector->napi, skb);
5899
5900 next_desc:
5901                 rx_desc->wb.upper.status_error = 0;
5902
5903                 /* return some buffers to hardware, one at a time is too slow */
5904                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5905                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5906                         cleaned_count = 0;
5907                 }
5908
5909                 /* use prefetched values */
5910                 rx_desc = next_rxd;
5911                 buffer_info = next_buffer;
5912                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5913         }
5914
5915         rx_ring->next_to_clean = i;
5916         cleaned_count = igb_desc_unused(rx_ring);
5917
5918         if (cleaned_count)
5919                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5920
5921         rx_ring->total_packets += total_packets;
5922         rx_ring->total_bytes += total_bytes;
5923         u64_stats_update_begin(&rx_ring->rx_syncp);
5924         rx_ring->rx_stats.packets += total_packets;
5925         rx_ring->rx_stats.bytes += total_bytes;
5926         u64_stats_update_end(&rx_ring->rx_syncp);
5927         return cleaned;
5928 }
5929
5930 /**
5931  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5932  * @adapter: address of board private structure
5933  **/
5934 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5935 {
5936         struct net_device *netdev = rx_ring->netdev;
5937         union e1000_adv_rx_desc *rx_desc;
5938         struct igb_buffer *buffer_info;
5939         struct sk_buff *skb;
5940         unsigned int i;
5941         int bufsz;
5942
5943         i = rx_ring->next_to_use;
5944         buffer_info = &rx_ring->buffer_info[i];
5945
5946         bufsz = rx_ring->rx_buffer_len;
5947
5948         while (cleaned_count--) {
5949                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5950
5951                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5952                         if (!buffer_info->page) {
5953                                 buffer_info->page = netdev_alloc_page(netdev);
5954                                 if (unlikely(!buffer_info->page)) {
5955                                         u64_stats_update_begin(&rx_ring->rx_syncp);
5956                                         rx_ring->rx_stats.alloc_failed++;
5957                                         u64_stats_update_end(&rx_ring->rx_syncp);
5958                                         goto no_buffers;
5959                                 }
5960                                 buffer_info->page_offset = 0;
5961                         } else {
5962                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5963                         }
5964                         buffer_info->page_dma =
5965                                 dma_map_page(rx_ring->dev, buffer_info->page,
5966                                              buffer_info->page_offset,
5967                                              PAGE_SIZE / 2,
5968                                              DMA_FROM_DEVICE);
5969                         if (dma_mapping_error(rx_ring->dev,
5970                                               buffer_info->page_dma)) {
5971                                 buffer_info->page_dma = 0;
5972                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5973                                 rx_ring->rx_stats.alloc_failed++;
5974                                 u64_stats_update_end(&rx_ring->rx_syncp);
5975                                 goto no_buffers;
5976                         }
5977                 }
5978
5979                 skb = buffer_info->skb;
5980                 if (!skb) {
5981                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5982                         if (unlikely(!skb)) {
5983                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5984                                 rx_ring->rx_stats.alloc_failed++;
5985                                 u64_stats_update_end(&rx_ring->rx_syncp);
5986                                 goto no_buffers;
5987                         }
5988
5989                         buffer_info->skb = skb;
5990                 }
5991                 if (!buffer_info->dma) {
5992                         buffer_info->dma = dma_map_single(rx_ring->dev,
5993                                                           skb->data,
5994                                                           bufsz,
5995                                                           DMA_FROM_DEVICE);
5996                         if (dma_mapping_error(rx_ring->dev,
5997                                               buffer_info->dma)) {
5998                                 buffer_info->dma = 0;
5999                                 u64_stats_update_begin(&rx_ring->rx_syncp);
6000                                 rx_ring->rx_stats.alloc_failed++;
6001                                 u64_stats_update_end(&rx_ring->rx_syncp);
6002                                 goto no_buffers;
6003                         }
6004                 }
6005                 /* Refresh the desc even if buffer_addrs didn't change because
6006                  * each write-back erases this info. */
6007                 if (bufsz < IGB_RXBUFFER_1024) {
6008                         rx_desc->read.pkt_addr =
6009                              cpu_to_le64(buffer_info->page_dma);
6010                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
6011                 } else {
6012                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
6013                         rx_desc->read.hdr_addr = 0;
6014                 }
6015
6016                 i++;
6017                 if (i == rx_ring->count)
6018                         i = 0;
6019                 buffer_info = &rx_ring->buffer_info[i];
6020         }
6021
6022 no_buffers:
6023         if (rx_ring->next_to_use != i) {
6024                 rx_ring->next_to_use = i;
6025                 if (i == 0)
6026                         i = (rx_ring->count - 1);
6027                 else
6028                         i--;
6029
6030                 /* Force memory writes to complete before letting h/w
6031                  * know there are new descriptors to fetch.  (Only
6032                  * applicable for weak-ordered memory model archs,
6033                  * such as IA-64). */
6034                 wmb();
6035                 writel(i, rx_ring->tail);
6036         }
6037 }
6038
6039 /**
6040  * igb_mii_ioctl -
6041  * @netdev:
6042  * @ifreq:
6043  * @cmd:
6044  **/
6045 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6046 {
6047         struct igb_adapter *adapter = netdev_priv(netdev);
6048         struct mii_ioctl_data *data = if_mii(ifr);
6049
6050         if (adapter->hw.phy.media_type != e1000_media_type_copper)
6051                 return -EOPNOTSUPP;
6052
6053         switch (cmd) {
6054         case SIOCGMIIPHY:
6055                 data->phy_id = adapter->hw.phy.addr;
6056                 break;
6057         case SIOCGMIIREG:
6058                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6059                                      &data->val_out))
6060                         return -EIO;
6061                 break;
6062         case SIOCSMIIREG:
6063         default:
6064                 return -EOPNOTSUPP;
6065         }
6066         return 0;
6067 }
6068
6069 /**
6070  * igb_hwtstamp_ioctl - control hardware time stamping
6071  * @netdev:
6072  * @ifreq:
6073  * @cmd:
6074  *
6075  * Outgoing time stamping can be enabled and disabled. Play nice and
6076  * disable it when requested, although it shouldn't case any overhead
6077  * when no packet needs it. At most one packet in the queue may be
6078  * marked for time stamping, otherwise it would be impossible to tell
6079  * for sure to which packet the hardware time stamp belongs.
6080  *
6081  * Incoming time stamping has to be configured via the hardware
6082  * filters. Not all combinations are supported, in particular event
6083  * type has to be specified. Matching the kind of event packet is
6084  * not supported, with the exception of "all V2 events regardless of
6085  * level 2 or 4".
6086  *
6087  **/
6088 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6089                               struct ifreq *ifr, int cmd)
6090 {
6091         struct igb_adapter *adapter = netdev_priv(netdev);
6092         struct e1000_hw *hw = &adapter->hw;
6093         struct hwtstamp_config config;
6094         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6095         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6096         u32 tsync_rx_cfg = 0;
6097         bool is_l4 = false;
6098         bool is_l2 = false;
6099         u32 regval;
6100
6101         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6102                 return -EFAULT;
6103
6104         /* reserved for future extensions */
6105         if (config.flags)
6106                 return -EINVAL;
6107
6108         switch (config.tx_type) {
6109         case HWTSTAMP_TX_OFF:
6110                 tsync_tx_ctl = 0;
6111         case HWTSTAMP_TX_ON:
6112                 break;
6113         default:
6114                 return -ERANGE;
6115         }
6116
6117         switch (config.rx_filter) {
6118         case HWTSTAMP_FILTER_NONE:
6119                 tsync_rx_ctl = 0;
6120                 break;
6121         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6122         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6123         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6124         case HWTSTAMP_FILTER_ALL:
6125                 /*
6126                  * register TSYNCRXCFG must be set, therefore it is not
6127                  * possible to time stamp both Sync and Delay_Req messages
6128                  * => fall back to time stamping all packets
6129                  */
6130                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6131                 config.rx_filter = HWTSTAMP_FILTER_ALL;
6132                 break;
6133         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6134                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6135                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6136                 is_l4 = true;
6137                 break;
6138         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6139                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6140                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6141                 is_l4 = true;
6142                 break;
6143         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6144         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6145                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6146                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6147                 is_l2 = true;
6148                 is_l4 = true;
6149                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6150                 break;
6151         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6152         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6153                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6154                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6155                 is_l2 = true;
6156                 is_l4 = true;
6157                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6158                 break;
6159         case HWTSTAMP_FILTER_PTP_V2_EVENT:
6160         case HWTSTAMP_FILTER_PTP_V2_SYNC:
6161         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6162                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6163                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6164                 is_l2 = true;
6165                 break;
6166         default:
6167                 return -ERANGE;
6168         }
6169
6170         if (hw->mac.type == e1000_82575) {
6171                 if (tsync_rx_ctl | tsync_tx_ctl)
6172                         return -EINVAL;
6173                 return 0;
6174         }
6175
6176         /*
6177          * Per-packet timestamping only works if all packets are
6178          * timestamped, so enable timestamping in all packets as
6179          * long as one rx filter was configured.
6180          */
6181         if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
6182                 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6183                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6184         }
6185
6186         /* enable/disable TX */
6187         regval = rd32(E1000_TSYNCTXCTL);
6188         regval &= ~E1000_TSYNCTXCTL_ENABLED;
6189         regval |= tsync_tx_ctl;
6190         wr32(E1000_TSYNCTXCTL, regval);
6191
6192         /* enable/disable RX */
6193         regval = rd32(E1000_TSYNCRXCTL);
6194         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6195         regval |= tsync_rx_ctl;
6196         wr32(E1000_TSYNCRXCTL, regval);
6197
6198         /* define which PTP packets are time stamped */
6199         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6200
6201         /* define ethertype filter for timestamped packets */
6202         if (is_l2)
6203                 wr32(E1000_ETQF(3),
6204                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6205                                  E1000_ETQF_1588 | /* enable timestamping */
6206                                  ETH_P_1588));     /* 1588 eth protocol type */
6207         else
6208                 wr32(E1000_ETQF(3), 0);
6209
6210 #define PTP_PORT 319
6211         /* L4 Queue Filter[3]: filter by destination port and protocol */
6212         if (is_l4) {
6213                 u32 ftqf = (IPPROTO_UDP /* UDP */
6214                         | E1000_FTQF_VF_BP /* VF not compared */
6215                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6216                         | E1000_FTQF_MASK); /* mask all inputs */
6217                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6218
6219                 wr32(E1000_IMIR(3), htons(PTP_PORT));
6220                 wr32(E1000_IMIREXT(3),
6221                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6222                 if (hw->mac.type == e1000_82576) {
6223                         /* enable source port check */
6224                         wr32(E1000_SPQF(3), htons(PTP_PORT));
6225                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6226                 }
6227                 wr32(E1000_FTQF(3), ftqf);
6228         } else {
6229                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6230         }
6231         wrfl();
6232
6233         adapter->hwtstamp_config = config;
6234
6235         /* clear TX/RX time stamp registers, just to be sure */
6236         regval = rd32(E1000_TXSTMPH);
6237         regval = rd32(E1000_RXSTMPH);
6238
6239         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6240                 -EFAULT : 0;
6241 }
6242
6243 /**
6244  * igb_ioctl -
6245  * @netdev:
6246  * @ifreq:
6247  * @cmd:
6248  **/
6249 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6250 {
6251         switch (cmd) {
6252         case SIOCGMIIPHY:
6253         case SIOCGMIIREG:
6254         case SIOCSMIIREG:
6255                 return igb_mii_ioctl(netdev, ifr, cmd);
6256         case SIOCSHWTSTAMP:
6257                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6258         default:
6259                 return -EOPNOTSUPP;
6260         }
6261 }
6262
6263 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6264 {
6265         struct igb_adapter *adapter = hw->back;
6266         u16 cap_offset;
6267
6268         cap_offset = adapter->pdev->pcie_cap;
6269         if (!cap_offset)
6270                 return -E1000_ERR_CONFIG;
6271
6272         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6273
6274         return 0;
6275 }
6276
6277 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6278 {
6279         struct igb_adapter *adapter = hw->back;
6280         u16 cap_offset;
6281
6282         cap_offset = adapter->pdev->pcie_cap;
6283         if (!cap_offset)
6284                 return -E1000_ERR_CONFIG;
6285
6286         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6287
6288         return 0;
6289 }
6290
6291 static void igb_vlan_mode(struct net_device *netdev, u32 features)
6292 {
6293         struct igb_adapter *adapter = netdev_priv(netdev);
6294         struct e1000_hw *hw = &adapter->hw;
6295         u32 ctrl, rctl;
6296
6297         igb_irq_disable(adapter);
6298
6299         if (features & NETIF_F_HW_VLAN_RX) {
6300                 /* enable VLAN tag insert/strip */
6301                 ctrl = rd32(E1000_CTRL);
6302                 ctrl |= E1000_CTRL_VME;
6303                 wr32(E1000_CTRL, ctrl);
6304
6305                 /* Disable CFI check */
6306                 rctl = rd32(E1000_RCTL);
6307                 rctl &= ~E1000_RCTL_CFIEN;
6308                 wr32(E1000_RCTL, rctl);
6309         } else {
6310                 /* disable VLAN tag insert/strip */
6311                 ctrl = rd32(E1000_CTRL);
6312                 ctrl &= ~E1000_CTRL_VME;
6313                 wr32(E1000_CTRL, ctrl);
6314         }
6315
6316         igb_rlpml_set(adapter);
6317
6318         if (!test_bit(__IGB_DOWN, &adapter->state))
6319                 igb_irq_enable(adapter);
6320 }
6321
6322 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6323 {
6324         struct igb_adapter *adapter = netdev_priv(netdev);
6325         struct e1000_hw *hw = &adapter->hw;
6326         int pf_id = adapter->vfs_allocated_count;
6327
6328         /* attempt to add filter to vlvf array */
6329         igb_vlvf_set(adapter, vid, true, pf_id);
6330
6331         /* add the filter since PF can receive vlans w/o entry in vlvf */
6332         igb_vfta_set(hw, vid, true);
6333
6334         set_bit(vid, adapter->active_vlans);
6335 }
6336
6337 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6338 {
6339         struct igb_adapter *adapter = netdev_priv(netdev);
6340         struct e1000_hw *hw = &adapter->hw;
6341         int pf_id = adapter->vfs_allocated_count;
6342         s32 err;
6343
6344         igb_irq_disable(adapter);
6345
6346         if (!test_bit(__IGB_DOWN, &adapter->state))
6347                 igb_irq_enable(adapter);
6348
6349         /* remove vlan from VLVF table array */
6350         err = igb_vlvf_set(adapter, vid, false, pf_id);
6351
6352         /* if vid was not present in VLVF just remove it from table */
6353         if (err)
6354                 igb_vfta_set(hw, vid, false);
6355
6356         clear_bit(vid, adapter->active_vlans);
6357 }
6358
6359 static void igb_restore_vlan(struct igb_adapter *adapter)
6360 {
6361         u16 vid;
6362
6363         for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6364                 igb_vlan_rx_add_vid(adapter->netdev, vid);
6365 }
6366
6367 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6368 {
6369         struct pci_dev *pdev = adapter->pdev;
6370         struct e1000_mac_info *mac = &adapter->hw.mac;
6371
6372         mac->autoneg = 0;
6373
6374         /* Make sure dplx is at most 1 bit and lsb of speed is not set
6375          * for the switch() below to work */
6376         if ((spd & 1) || (dplx & ~1))
6377                 goto err_inval;
6378
6379         /* Fiber NIC's only allow 1000 Gbps Full duplex */
6380         if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6381             spd != SPEED_1000 &&
6382             dplx != DUPLEX_FULL)
6383                 goto err_inval;
6384
6385         switch (spd + dplx) {
6386         case SPEED_10 + DUPLEX_HALF:
6387                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6388                 break;
6389         case SPEED_10 + DUPLEX_FULL:
6390                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6391                 break;
6392         case SPEED_100 + DUPLEX_HALF:
6393                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6394                 break;
6395         case SPEED_100 + DUPLEX_FULL:
6396                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6397                 break;
6398         case SPEED_1000 + DUPLEX_FULL:
6399                 mac->autoneg = 1;
6400                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6401                 break;
6402         case SPEED_1000 + DUPLEX_HALF: /* not supported */
6403         default:
6404                 goto err_inval;
6405         }
6406         return 0;
6407
6408 err_inval:
6409         dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6410         return -EINVAL;
6411 }
6412
6413 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6414 {
6415         struct net_device *netdev = pci_get_drvdata(pdev);
6416         struct igb_adapter *adapter = netdev_priv(netdev);
6417         struct e1000_hw *hw = &adapter->hw;
6418         u32 ctrl, rctl, status;
6419         u32 wufc = adapter->wol;
6420 #ifdef CONFIG_PM
6421         int retval = 0;
6422 #endif
6423
6424         netif_device_detach(netdev);
6425
6426         if (netif_running(netdev))
6427                 igb_close(netdev);
6428
6429         igb_clear_interrupt_scheme(adapter);
6430
6431 #ifdef CONFIG_PM
6432         retval = pci_save_state(pdev);
6433         if (retval)
6434                 return retval;
6435 #endif
6436
6437         status = rd32(E1000_STATUS);
6438         if (status & E1000_STATUS_LU)
6439                 wufc &= ~E1000_WUFC_LNKC;
6440
6441         if (wufc) {
6442                 igb_setup_rctl(adapter);
6443                 igb_set_rx_mode(netdev);
6444
6445                 /* turn on all-multi mode if wake on multicast is enabled */
6446                 if (wufc & E1000_WUFC_MC) {
6447                         rctl = rd32(E1000_RCTL);
6448                         rctl |= E1000_RCTL_MPE;
6449                         wr32(E1000_RCTL, rctl);
6450                 }
6451
6452                 ctrl = rd32(E1000_CTRL);
6453                 /* advertise wake from D3Cold */
6454                 #define E1000_CTRL_ADVD3WUC 0x00100000
6455                 /* phy power management enable */
6456                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6457                 ctrl |= E1000_CTRL_ADVD3WUC;
6458                 wr32(E1000_CTRL, ctrl);
6459
6460                 /* Allow time for pending master requests to run */
6461                 igb_disable_pcie_master(hw);
6462
6463                 wr32(E1000_WUC, E1000_WUC_PME_EN);
6464                 wr32(E1000_WUFC, wufc);
6465         } else {
6466                 wr32(E1000_WUC, 0);
6467                 wr32(E1000_WUFC, 0);
6468         }
6469
6470         *enable_wake = wufc || adapter->en_mng_pt;
6471         if (!*enable_wake)
6472                 igb_power_down_link(adapter);
6473         else
6474                 igb_power_up_link(adapter);
6475
6476         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6477          * would have already happened in close and is redundant. */
6478         igb_release_hw_control(adapter);
6479
6480         pci_disable_device(pdev);
6481
6482         return 0;
6483 }
6484
6485 #ifdef CONFIG_PM
6486 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6487 {
6488         int retval;
6489         bool wake;
6490
6491         retval = __igb_shutdown(pdev, &wake);
6492         if (retval)
6493                 return retval;
6494
6495         if (wake) {
6496                 pci_prepare_to_sleep(pdev);
6497         } else {
6498                 pci_wake_from_d3(pdev, false);
6499                 pci_set_power_state(pdev, PCI_D3hot);
6500         }
6501
6502         return 0;
6503 }
6504
6505 static int igb_resume(struct pci_dev *pdev)
6506 {
6507         struct net_device *netdev = pci_get_drvdata(pdev);
6508         struct igb_adapter *adapter = netdev_priv(netdev);
6509         struct e1000_hw *hw = &adapter->hw;
6510         u32 err;
6511
6512         pci_set_power_state(pdev, PCI_D0);
6513         pci_restore_state(pdev);
6514         pci_save_state(pdev);
6515
6516         err = pci_enable_device_mem(pdev);
6517         if (err) {
6518                 dev_err(&pdev->dev,
6519                         "igb: Cannot enable PCI device from suspend\n");
6520                 return err;
6521         }
6522         pci_set_master(pdev);
6523
6524         pci_enable_wake(pdev, PCI_D3hot, 0);
6525         pci_enable_wake(pdev, PCI_D3cold, 0);
6526
6527         if (igb_init_interrupt_scheme(adapter)) {
6528                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6529                 return -ENOMEM;
6530         }
6531
6532         igb_reset(adapter);
6533
6534         /* let the f/w know that the h/w is now under the control of the
6535          * driver. */
6536         igb_get_hw_control(adapter);
6537
6538         wr32(E1000_WUS, ~0);
6539
6540         if (netif_running(netdev)) {
6541                 err = igb_open(netdev);
6542                 if (err)
6543                         return err;
6544         }
6545
6546         netif_device_attach(netdev);
6547
6548         return 0;
6549 }
6550 #endif
6551
6552 static void igb_shutdown(struct pci_dev *pdev)
6553 {
6554         bool wake;
6555
6556         __igb_shutdown(pdev, &wake);
6557
6558         if (system_state == SYSTEM_POWER_OFF) {
6559                 pci_wake_from_d3(pdev, wake);
6560                 pci_set_power_state(pdev, PCI_D3hot);
6561         }
6562 }
6563
6564 #ifdef CONFIG_NET_POLL_CONTROLLER
6565 /*
6566  * Polling 'interrupt' - used by things like netconsole to send skbs
6567  * without having to re-enable interrupts. It's not called while
6568  * the interrupt routine is executing.
6569  */
6570 static void igb_netpoll(struct net_device *netdev)
6571 {
6572         struct igb_adapter *adapter = netdev_priv(netdev);
6573         struct e1000_hw *hw = &adapter->hw;
6574         int i;
6575
6576         if (!adapter->msix_entries) {
6577                 struct igb_q_vector *q_vector = adapter->q_vector[0];
6578                 igb_irq_disable(adapter);
6579                 napi_schedule(&q_vector->napi);
6580                 return;
6581         }
6582
6583         for (i = 0; i < adapter->num_q_vectors; i++) {
6584                 struct igb_q_vector *q_vector = adapter->q_vector[i];
6585                 wr32(E1000_EIMC, q_vector->eims_value);
6586                 napi_schedule(&q_vector->napi);
6587         }
6588 }
6589 #endif /* CONFIG_NET_POLL_CONTROLLER */
6590
6591 /**
6592  * igb_io_error_detected - called when PCI error is detected
6593  * @pdev: Pointer to PCI device
6594  * @state: The current pci connection state
6595  *
6596  * This function is called after a PCI bus error affecting
6597  * this device has been detected.
6598  */
6599 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6600                                               pci_channel_state_t state)
6601 {
6602         struct net_device *netdev = pci_get_drvdata(pdev);
6603         struct igb_adapter *adapter = netdev_priv(netdev);
6604
6605         netif_device_detach(netdev);
6606
6607         if (state == pci_channel_io_perm_failure)
6608                 return PCI_ERS_RESULT_DISCONNECT;
6609
6610         if (netif_running(netdev))
6611                 igb_down(adapter);
6612         pci_disable_device(pdev);
6613
6614         /* Request a slot slot reset. */
6615         return PCI_ERS_RESULT_NEED_RESET;
6616 }
6617
6618 /**
6619  * igb_io_slot_reset - called after the pci bus has been reset.
6620  * @pdev: Pointer to PCI device
6621  *
6622  * Restart the card from scratch, as if from a cold-boot. Implementation
6623  * resembles the first-half of the igb_resume routine.
6624  */
6625 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6626 {
6627         struct net_device *netdev = pci_get_drvdata(pdev);
6628         struct igb_adapter *adapter = netdev_priv(netdev);
6629         struct e1000_hw *hw = &adapter->hw;
6630         pci_ers_result_t result;
6631         int err;
6632
6633         if (pci_enable_device_mem(pdev)) {
6634                 dev_err(&pdev->dev,
6635                         "Cannot re-enable PCI device after reset.\n");
6636                 result = PCI_ERS_RESULT_DISCONNECT;
6637         } else {
6638                 pci_set_master(pdev);
6639                 pci_restore_state(pdev);
6640                 pci_save_state(pdev);
6641
6642                 pci_enable_wake(pdev, PCI_D3hot, 0);
6643                 pci_enable_wake(pdev, PCI_D3cold, 0);
6644
6645                 igb_reset(adapter);
6646                 wr32(E1000_WUS, ~0);
6647                 result = PCI_ERS_RESULT_RECOVERED;
6648         }
6649
6650         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6651         if (err) {
6652                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6653                         "failed 0x%0x\n", err);
6654                 /* non-fatal, continue */
6655         }
6656
6657         return result;
6658 }
6659
6660 /**
6661  * igb_io_resume - called when traffic can start flowing again.
6662  * @pdev: Pointer to PCI device
6663  *
6664  * This callback is called when the error recovery driver tells us that
6665  * its OK to resume normal operation. Implementation resembles the
6666  * second-half of the igb_resume routine.
6667  */
6668 static void igb_io_resume(struct pci_dev *pdev)
6669 {
6670         struct net_device *netdev = pci_get_drvdata(pdev);
6671         struct igb_adapter *adapter = netdev_priv(netdev);
6672
6673         if (netif_running(netdev)) {
6674                 if (igb_up(adapter)) {
6675                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6676                         return;
6677                 }
6678         }
6679
6680         netif_device_attach(netdev);
6681
6682         /* let the f/w know that the h/w is now under the control of the
6683          * driver. */
6684         igb_get_hw_control(adapter);
6685 }
6686
6687 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6688                              u8 qsel)
6689 {
6690         u32 rar_low, rar_high;
6691         struct e1000_hw *hw = &adapter->hw;
6692
6693         /* HW expects these in little endian so we reverse the byte order
6694          * from network order (big endian) to little endian
6695          */
6696         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6697                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6698         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6699
6700         /* Indicate to hardware the Address is Valid. */
6701         rar_high |= E1000_RAH_AV;
6702
6703         if (hw->mac.type == e1000_82575)
6704                 rar_high |= E1000_RAH_POOL_1 * qsel;
6705         else
6706                 rar_high |= E1000_RAH_POOL_1 << qsel;
6707
6708         wr32(E1000_RAL(index), rar_low);
6709         wrfl();
6710         wr32(E1000_RAH(index), rar_high);
6711         wrfl();
6712 }
6713
6714 static int igb_set_vf_mac(struct igb_adapter *adapter,
6715                           int vf, unsigned char *mac_addr)
6716 {
6717         struct e1000_hw *hw = &adapter->hw;
6718         /* VF MAC addresses start at end of receive addresses and moves
6719          * torwards the first, as a result a collision should not be possible */
6720         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6721
6722         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6723
6724         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6725
6726         return 0;
6727 }
6728
6729 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6730 {
6731         struct igb_adapter *adapter = netdev_priv(netdev);
6732         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6733                 return -EINVAL;
6734         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6735         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6736         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6737                                       " change effective.");
6738         if (test_bit(__IGB_DOWN, &adapter->state)) {
6739                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6740                          " but the PF device is not up.\n");
6741                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6742                          " attempting to use the VF device.\n");
6743         }
6744         return igb_set_vf_mac(adapter, vf, mac);
6745 }
6746
6747 static int igb_link_mbps(int internal_link_speed)
6748 {
6749         switch (internal_link_speed) {
6750         case SPEED_100:
6751                 return 100;
6752         case SPEED_1000:
6753                 return 1000;
6754         default:
6755                 return 0;
6756         }
6757 }
6758
6759 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6760                                   int link_speed)
6761 {
6762         int rf_dec, rf_int;
6763         u32 bcnrc_val;
6764
6765         if (tx_rate != 0) {
6766                 /* Calculate the rate factor values to set */
6767                 rf_int = link_speed / tx_rate;
6768                 rf_dec = (link_speed - (rf_int * tx_rate));
6769                 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6770
6771                 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6772                 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6773                                E1000_RTTBCNRC_RF_INT_MASK);
6774                 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6775         } else {
6776                 bcnrc_val = 0;
6777         }
6778
6779         wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6780         wr32(E1000_RTTBCNRC, bcnrc_val);
6781 }
6782
6783 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6784 {
6785         int actual_link_speed, i;
6786         bool reset_rate = false;
6787
6788         /* VF TX rate limit was not set or not supported */
6789         if ((adapter->vf_rate_link_speed == 0) ||
6790             (adapter->hw.mac.type != e1000_82576))
6791                 return;
6792
6793         actual_link_speed = igb_link_mbps(adapter->link_speed);
6794         if (actual_link_speed != adapter->vf_rate_link_speed) {
6795                 reset_rate = true;
6796                 adapter->vf_rate_link_speed = 0;
6797                 dev_info(&adapter->pdev->dev,
6798                          "Link speed has been changed. VF Transmit "
6799                          "rate is disabled\n");
6800         }
6801
6802         for (i = 0; i < adapter->vfs_allocated_count; i++) {
6803                 if (reset_rate)
6804                         adapter->vf_data[i].tx_rate = 0;
6805
6806                 igb_set_vf_rate_limit(&adapter->hw, i,
6807                                       adapter->vf_data[i].tx_rate,
6808                                       actual_link_speed);
6809         }
6810 }
6811
6812 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6813 {
6814         struct igb_adapter *adapter = netdev_priv(netdev);
6815         struct e1000_hw *hw = &adapter->hw;
6816         int actual_link_speed;
6817
6818         if (hw->mac.type != e1000_82576)
6819                 return -EOPNOTSUPP;
6820
6821         actual_link_speed = igb_link_mbps(adapter->link_speed);
6822         if ((vf >= adapter->vfs_allocated_count) ||
6823             (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6824             (tx_rate < 0) || (tx_rate > actual_link_speed))
6825                 return -EINVAL;
6826
6827         adapter->vf_rate_link_speed = actual_link_speed;
6828         adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6829         igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6830
6831         return 0;
6832 }
6833
6834 static int igb_ndo_get_vf_config(struct net_device *netdev,
6835                                  int vf, struct ifla_vf_info *ivi)
6836 {
6837         struct igb_adapter *adapter = netdev_priv(netdev);
6838         if (vf >= adapter->vfs_allocated_count)
6839                 return -EINVAL;
6840         ivi->vf = vf;
6841         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6842         ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6843         ivi->vlan = adapter->vf_data[vf].pf_vlan;
6844         ivi->qos = adapter->vf_data[vf].pf_qos;
6845         return 0;
6846 }
6847
6848 static void igb_vmm_control(struct igb_adapter *adapter)
6849 {
6850         struct e1000_hw *hw = &adapter->hw;
6851         u32 reg;
6852
6853         switch (hw->mac.type) {
6854         case e1000_82575:
6855         default:
6856                 /* replication is not supported for 82575 */
6857                 return;
6858         case e1000_82576:
6859                 /* notify HW that the MAC is adding vlan tags */
6860                 reg = rd32(E1000_DTXCTL);
6861                 reg |= E1000_DTXCTL_VLAN_ADDED;
6862                 wr32(E1000_DTXCTL, reg);
6863         case e1000_82580:
6864                 /* enable replication vlan tag stripping */
6865                 reg = rd32(E1000_RPLOLR);
6866                 reg |= E1000_RPLOLR_STRVLAN;
6867                 wr32(E1000_RPLOLR, reg);
6868         case e1000_i350:
6869                 /* none of the above registers are supported by i350 */
6870                 break;
6871         }
6872
6873         if (adapter->vfs_allocated_count) {
6874                 igb_vmdq_set_loopback_pf(hw, true);
6875                 igb_vmdq_set_replication_pf(hw, true);
6876                 igb_vmdq_set_anti_spoofing_pf(hw, true,
6877                                                 adapter->vfs_allocated_count);
6878         } else {
6879                 igb_vmdq_set_loopback_pf(hw, false);
6880                 igb_vmdq_set_replication_pf(hw, false);
6881         }
6882 }
6883
6884 /* igb_main.c */