1 /*******************************************************************************
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2011 Intel Corporation.
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
26 *******************************************************************************/
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/bitops.h>
32 #include <linux/vmalloc.h>
33 #include <linux/pagemap.h>
34 #include <linux/netdevice.h>
35 #include <linux/ipv6.h>
36 #include <linux/slab.h>
37 #include <net/checksum.h>
38 #include <net/ip6_checksum.h>
39 #include <linux/net_tstamp.h>
40 #include <linux/mii.h>
41 #include <linux/ethtool.h>
43 #include <linux/if_vlan.h>
44 #include <linux/pci.h>
45 #include <linux/pci-aspm.h>
46 #include <linux/delay.h>
47 #include <linux/interrupt.h>
49 #include <linux/tcp.h>
50 #include <linux/sctp.h>
51 #include <linux/if_ether.h>
52 #include <linux/aer.h>
53 #include <linux/prefetch.h>
55 #include <linux/dca.h>
62 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
63 __stringify(BUILD) "-k"
64 char igb_driver_name[] = "igb";
65 char igb_driver_version[] = DRV_VERSION;
66 static const char igb_driver_string[] =
67 "Intel(R) Gigabit Ethernet Network Driver";
68 static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
70 static const struct e1000_info *igb_info_tbl[] = {
71 [board_82575] = &e1000_82575_info,
74 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
75 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
76 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
77 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
78 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
79 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
80 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
81 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
82 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
83 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
84 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
85 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
86 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
87 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
88 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
89 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
90 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
91 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
92 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
93 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
94 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
95 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
96 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
97 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
98 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
99 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
100 /* required last entry */
104 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
106 void igb_reset(struct igb_adapter *);
107 static int igb_setup_all_tx_resources(struct igb_adapter *);
108 static int igb_setup_all_rx_resources(struct igb_adapter *);
109 static void igb_free_all_tx_resources(struct igb_adapter *);
110 static void igb_free_all_rx_resources(struct igb_adapter *);
111 static void igb_setup_mrqc(struct igb_adapter *);
112 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
113 static void __devexit igb_remove(struct pci_dev *pdev);
114 static void igb_init_hw_timer(struct igb_adapter *adapter);
115 static int igb_sw_init(struct igb_adapter *);
116 static int igb_open(struct net_device *);
117 static int igb_close(struct net_device *);
118 static void igb_configure_tx(struct igb_adapter *);
119 static void igb_configure_rx(struct igb_adapter *);
120 static void igb_clean_all_tx_rings(struct igb_adapter *);
121 static void igb_clean_all_rx_rings(struct igb_adapter *);
122 static void igb_clean_tx_ring(struct igb_ring *);
123 static void igb_clean_rx_ring(struct igb_ring *);
124 static void igb_set_rx_mode(struct net_device *);
125 static void igb_update_phy_info(unsigned long);
126 static void igb_watchdog(unsigned long);
127 static void igb_watchdog_task(struct work_struct *);
128 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
129 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
130 struct rtnl_link_stats64 *stats);
131 static int igb_change_mtu(struct net_device *, int);
132 static int igb_set_mac(struct net_device *, void *);
133 static void igb_set_uta(struct igb_adapter *adapter);
134 static irqreturn_t igb_intr(int irq, void *);
135 static irqreturn_t igb_intr_msi(int irq, void *);
136 static irqreturn_t igb_msix_other(int irq, void *);
137 static irqreturn_t igb_msix_ring(int irq, void *);
138 #ifdef CONFIG_IGB_DCA
139 static void igb_update_dca(struct igb_q_vector *);
140 static void igb_setup_dca(struct igb_adapter *);
141 #endif /* CONFIG_IGB_DCA */
142 static int igb_poll(struct napi_struct *, int);
143 static bool igb_clean_tx_irq(struct igb_q_vector *);
144 static bool igb_clean_rx_irq(struct igb_q_vector *, int);
145 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
146 static void igb_tx_timeout(struct net_device *);
147 static void igb_reset_task(struct work_struct *);
148 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features);
149 static void igb_vlan_rx_add_vid(struct net_device *, u16);
150 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
151 static void igb_restore_vlan(struct igb_adapter *);
152 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
153 static void igb_ping_all_vfs(struct igb_adapter *);
154 static void igb_msg_task(struct igb_adapter *);
155 static void igb_vmm_control(struct igb_adapter *);
156 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
157 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
158 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
159 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
160 int vf, u16 vlan, u8 qos);
161 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
162 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
163 struct ifla_vf_info *ivi);
164 static void igb_check_vf_rate_limit(struct igb_adapter *);
166 #ifdef CONFIG_PCI_IOV
167 static int igb_vf_configure(struct igb_adapter *adapter, int vf);
168 static int igb_find_enabled_vfs(struct igb_adapter *adapter);
169 static int igb_check_vf_assignment(struct igb_adapter *adapter);
173 static int igb_suspend(struct pci_dev *, pm_message_t);
174 static int igb_resume(struct pci_dev *);
176 static void igb_shutdown(struct pci_dev *);
177 #ifdef CONFIG_IGB_DCA
178 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
179 static struct notifier_block dca_notifier = {
180 .notifier_call = igb_notify_dca,
185 #ifdef CONFIG_NET_POLL_CONTROLLER
186 /* for netdump / net console */
187 static void igb_netpoll(struct net_device *);
189 #ifdef CONFIG_PCI_IOV
190 static unsigned int max_vfs = 0;
191 module_param(max_vfs, uint, 0);
192 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
193 "per physical function");
194 #endif /* CONFIG_PCI_IOV */
196 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
197 pci_channel_state_t);
198 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
199 static void igb_io_resume(struct pci_dev *);
201 static struct pci_error_handlers igb_err_handler = {
202 .error_detected = igb_io_error_detected,
203 .slot_reset = igb_io_slot_reset,
204 .resume = igb_io_resume,
207 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
209 static struct pci_driver igb_driver = {
210 .name = igb_driver_name,
211 .id_table = igb_pci_tbl,
213 .remove = __devexit_p(igb_remove),
215 /* Power Management Hooks */
216 .suspend = igb_suspend,
217 .resume = igb_resume,
219 .shutdown = igb_shutdown,
220 .err_handler = &igb_err_handler
223 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
224 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
225 MODULE_LICENSE("GPL");
226 MODULE_VERSION(DRV_VERSION);
228 struct igb_reg_info {
233 static const struct igb_reg_info igb_reg_info_tbl[] = {
235 /* General Registers */
236 {E1000_CTRL, "CTRL"},
237 {E1000_STATUS, "STATUS"},
238 {E1000_CTRL_EXT, "CTRL_EXT"},
240 /* Interrupt Registers */
244 {E1000_RCTL, "RCTL"},
245 {E1000_RDLEN(0), "RDLEN"},
246 {E1000_RDH(0), "RDH"},
247 {E1000_RDT(0), "RDT"},
248 {E1000_RXDCTL(0), "RXDCTL"},
249 {E1000_RDBAL(0), "RDBAL"},
250 {E1000_RDBAH(0), "RDBAH"},
253 {E1000_TCTL, "TCTL"},
254 {E1000_TDBAL(0), "TDBAL"},
255 {E1000_TDBAH(0), "TDBAH"},
256 {E1000_TDLEN(0), "TDLEN"},
257 {E1000_TDH(0), "TDH"},
258 {E1000_TDT(0), "TDT"},
259 {E1000_TXDCTL(0), "TXDCTL"},
260 {E1000_TDFH, "TDFH"},
261 {E1000_TDFT, "TDFT"},
262 {E1000_TDFHS, "TDFHS"},
263 {E1000_TDFPC, "TDFPC"},
265 /* List Terminator */
270 * igb_regdump - register printout routine
272 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
278 switch (reginfo->ofs) {
280 for (n = 0; n < 4; n++)
281 regs[n] = rd32(E1000_RDLEN(n));
284 for (n = 0; n < 4; n++)
285 regs[n] = rd32(E1000_RDH(n));
288 for (n = 0; n < 4; n++)
289 regs[n] = rd32(E1000_RDT(n));
291 case E1000_RXDCTL(0):
292 for (n = 0; n < 4; n++)
293 regs[n] = rd32(E1000_RXDCTL(n));
296 for (n = 0; n < 4; n++)
297 regs[n] = rd32(E1000_RDBAL(n));
300 for (n = 0; n < 4; n++)
301 regs[n] = rd32(E1000_RDBAH(n));
304 for (n = 0; n < 4; n++)
305 regs[n] = rd32(E1000_RDBAL(n));
308 for (n = 0; n < 4; n++)
309 regs[n] = rd32(E1000_TDBAH(n));
312 for (n = 0; n < 4; n++)
313 regs[n] = rd32(E1000_TDLEN(n));
316 for (n = 0; n < 4; n++)
317 regs[n] = rd32(E1000_TDH(n));
320 for (n = 0; n < 4; n++)
321 regs[n] = rd32(E1000_TDT(n));
323 case E1000_TXDCTL(0):
324 for (n = 0; n < 4; n++)
325 regs[n] = rd32(E1000_TXDCTL(n));
328 printk(KERN_INFO "%-15s %08x\n",
329 reginfo->name, rd32(reginfo->ofs));
333 snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
334 printk(KERN_INFO "%-15s ", rname);
335 for (n = 0; n < 4; n++)
336 printk(KERN_CONT "%08x ", regs[n]);
337 printk(KERN_CONT "\n");
341 * igb_dump - Print registers, tx-rings and rx-rings
343 static void igb_dump(struct igb_adapter *adapter)
345 struct net_device *netdev = adapter->netdev;
346 struct e1000_hw *hw = &adapter->hw;
347 struct igb_reg_info *reginfo;
348 struct igb_ring *tx_ring;
349 union e1000_adv_tx_desc *tx_desc;
350 struct my_u0 { u64 a; u64 b; } *u0;
351 struct igb_ring *rx_ring;
352 union e1000_adv_rx_desc *rx_desc;
356 if (!netif_msg_hw(adapter))
359 /* Print netdevice Info */
361 dev_info(&adapter->pdev->dev, "Net device Info\n");
362 printk(KERN_INFO "Device Name state "
363 "trans_start last_rx\n");
364 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
371 /* Print Registers */
372 dev_info(&adapter->pdev->dev, "Register Dump\n");
373 printk(KERN_INFO " Register Name Value\n");
374 for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
375 reginfo->name; reginfo++) {
376 igb_regdump(hw, reginfo);
379 /* Print TX Ring Summary */
380 if (!netdev || !netif_running(netdev))
383 dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
384 printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma ]"
385 " leng ntw timestamp\n");
386 for (n = 0; n < adapter->num_tx_queues; n++) {
387 struct igb_tx_buffer *buffer_info;
388 tx_ring = adapter->tx_ring[n];
389 buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
390 printk(KERN_INFO " %5d %5X %5X %016llX %04X %p %016llX\n",
391 n, tx_ring->next_to_use, tx_ring->next_to_clean,
392 (u64)buffer_info->dma,
394 buffer_info->next_to_watch,
395 (u64)buffer_info->time_stamp);
399 if (!netif_msg_tx_done(adapter))
400 goto rx_ring_summary;
402 dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
404 /* Transmit Descriptor Formats
406 * Advanced Transmit Descriptor
407 * +--------------------------------------------------------------+
408 * 0 | Buffer Address [63:0] |
409 * +--------------------------------------------------------------+
410 * 8 | PAYLEN | PORTS |CC|IDX | STA | DCMD |DTYP|MAC|RSV| DTALEN |
411 * +--------------------------------------------------------------+
412 * 63 46 45 40 39 38 36 35 32 31 24 15 0
415 for (n = 0; n < adapter->num_tx_queues; n++) {
416 tx_ring = adapter->tx_ring[n];
417 printk(KERN_INFO "------------------------------------\n");
418 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
419 printk(KERN_INFO "------------------------------------\n");
420 printk(KERN_INFO "T [desc] [address 63:0 ] "
421 "[PlPOCIStDDM Ln] [bi->dma ] "
422 "leng ntw timestamp bi->skb\n");
424 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
425 struct igb_tx_buffer *buffer_info;
426 tx_desc = IGB_TX_DESC(tx_ring, i);
427 buffer_info = &tx_ring->tx_buffer_info[i];
428 u0 = (struct my_u0 *)tx_desc;
429 printk(KERN_INFO "T [0x%03X] %016llX %016llX %016llX"
430 " %04X %p %016llX %p", i,
433 (u64)buffer_info->dma,
435 buffer_info->next_to_watch,
436 (u64)buffer_info->time_stamp,
438 if (i == tx_ring->next_to_use &&
439 i == tx_ring->next_to_clean)
440 printk(KERN_CONT " NTC/U\n");
441 else if (i == tx_ring->next_to_use)
442 printk(KERN_CONT " NTU\n");
443 else if (i == tx_ring->next_to_clean)
444 printk(KERN_CONT " NTC\n");
446 printk(KERN_CONT "\n");
448 if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
449 print_hex_dump(KERN_INFO, "",
451 16, 1, phys_to_virt(buffer_info->dma),
452 buffer_info->length, true);
456 /* Print RX Rings Summary */
458 dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
459 printk(KERN_INFO "Queue [NTU] [NTC]\n");
460 for (n = 0; n < adapter->num_rx_queues; n++) {
461 rx_ring = adapter->rx_ring[n];
462 printk(KERN_INFO " %5d %5X %5X\n", n,
463 rx_ring->next_to_use, rx_ring->next_to_clean);
467 if (!netif_msg_rx_status(adapter))
470 dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
472 /* Advanced Receive Descriptor (Read) Format
474 * +-----------------------------------------------------+
475 * 0 | Packet Buffer Address [63:1] |A0/NSE|
476 * +----------------------------------------------+------+
477 * 8 | Header Buffer Address [63:1] | DD |
478 * +-----------------------------------------------------+
481 * Advanced Receive Descriptor (Write-Back) Format
483 * 63 48 47 32 31 30 21 20 17 16 4 3 0
484 * +------------------------------------------------------+
485 * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS |
486 * | Checksum Ident | | | | Type | Type |
487 * +------------------------------------------------------+
488 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
489 * +------------------------------------------------------+
490 * 63 48 47 32 31 20 19 0
493 for (n = 0; n < adapter->num_rx_queues; n++) {
494 rx_ring = adapter->rx_ring[n];
495 printk(KERN_INFO "------------------------------------\n");
496 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
497 printk(KERN_INFO "------------------------------------\n");
498 printk(KERN_INFO "R [desc] [ PktBuf A0] "
499 "[ HeadBuf DD] [bi->dma ] [bi->skb] "
500 "<-- Adv Rx Read format\n");
501 printk(KERN_INFO "RWB[desc] [PcsmIpSHl PtRs] "
502 "[vl er S cks ln] ---------------- [bi->skb] "
503 "<-- Adv Rx Write-Back format\n");
505 for (i = 0; i < rx_ring->count; i++) {
506 struct igb_rx_buffer *buffer_info;
507 buffer_info = &rx_ring->rx_buffer_info[i];
508 rx_desc = IGB_RX_DESC(rx_ring, i);
509 u0 = (struct my_u0 *)rx_desc;
510 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
511 if (staterr & E1000_RXD_STAT_DD) {
512 /* Descriptor Done */
513 printk(KERN_INFO "RWB[0x%03X] %016llX "
514 "%016llX ---------------- %p", i,
519 printk(KERN_INFO "R [0x%03X] %016llX "
520 "%016llX %016llX %p", i,
523 (u64)buffer_info->dma,
526 if (netif_msg_pktdata(adapter)) {
527 print_hex_dump(KERN_INFO, "",
530 phys_to_virt(buffer_info->dma),
531 IGB_RX_HDR_LEN, true);
532 print_hex_dump(KERN_INFO, "",
536 buffer_info->page_dma +
537 buffer_info->page_offset),
542 if (i == rx_ring->next_to_use)
543 printk(KERN_CONT " NTU\n");
544 else if (i == rx_ring->next_to_clean)
545 printk(KERN_CONT " NTC\n");
547 printk(KERN_CONT "\n");
558 * igb_read_clock - read raw cycle counter (to be used by time counter)
560 static cycle_t igb_read_clock(const struct cyclecounter *tc)
562 struct igb_adapter *adapter =
563 container_of(tc, struct igb_adapter, cycles);
564 struct e1000_hw *hw = &adapter->hw;
569 * The timestamp latches on lowest register read. For the 82580
570 * the lowest register is SYSTIMR instead of SYSTIML. However we never
571 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
573 if (hw->mac.type >= e1000_82580) {
574 stamp = rd32(E1000_SYSTIMR) >> 8;
575 shift = IGB_82580_TSYNC_SHIFT;
578 stamp |= (u64)rd32(E1000_SYSTIML) << shift;
579 stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
584 * igb_get_hw_dev - return device
585 * used by hardware layer to print debugging information
587 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
589 struct igb_adapter *adapter = hw->back;
590 return adapter->netdev;
594 * igb_init_module - Driver Registration Routine
596 * igb_init_module is the first routine called when the driver is
597 * loaded. All it does is register with the PCI subsystem.
599 static int __init igb_init_module(void)
602 printk(KERN_INFO "%s - version %s\n",
603 igb_driver_string, igb_driver_version);
605 printk(KERN_INFO "%s\n", igb_copyright);
607 #ifdef CONFIG_IGB_DCA
608 dca_register_notify(&dca_notifier);
610 ret = pci_register_driver(&igb_driver);
614 module_init(igb_init_module);
617 * igb_exit_module - Driver Exit Cleanup Routine
619 * igb_exit_module is called just before the driver is removed
622 static void __exit igb_exit_module(void)
624 #ifdef CONFIG_IGB_DCA
625 dca_unregister_notify(&dca_notifier);
627 pci_unregister_driver(&igb_driver);
630 module_exit(igb_exit_module);
632 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
634 * igb_cache_ring_register - Descriptor ring to register mapping
635 * @adapter: board private structure to initialize
637 * Once we know the feature-set enabled for the device, we'll cache
638 * the register offset the descriptor ring is assigned to.
640 static void igb_cache_ring_register(struct igb_adapter *adapter)
643 u32 rbase_offset = adapter->vfs_allocated_count;
645 switch (adapter->hw.mac.type) {
647 /* The queues are allocated for virtualization such that VF 0
648 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
649 * In order to avoid collision we start at the first free queue
650 * and continue consuming queues in the same sequence
652 if (adapter->vfs_allocated_count) {
653 for (; i < adapter->rss_queues; i++)
654 adapter->rx_ring[i]->reg_idx = rbase_offset +
661 for (; i < adapter->num_rx_queues; i++)
662 adapter->rx_ring[i]->reg_idx = rbase_offset + i;
663 for (; j < adapter->num_tx_queues; j++)
664 adapter->tx_ring[j]->reg_idx = rbase_offset + j;
669 static void igb_free_queues(struct igb_adapter *adapter)
673 for (i = 0; i < adapter->num_tx_queues; i++) {
674 kfree(adapter->tx_ring[i]);
675 adapter->tx_ring[i] = NULL;
677 for (i = 0; i < adapter->num_rx_queues; i++) {
678 kfree(adapter->rx_ring[i]);
679 adapter->rx_ring[i] = NULL;
681 adapter->num_rx_queues = 0;
682 adapter->num_tx_queues = 0;
686 * igb_alloc_queues - Allocate memory for all rings
687 * @adapter: board private structure to initialize
689 * We allocate one ring per queue at run-time since we don't know the
690 * number of queues at compile-time.
692 static int igb_alloc_queues(struct igb_adapter *adapter)
694 struct igb_ring *ring;
696 int orig_node = adapter->node;
698 for (i = 0; i < adapter->num_tx_queues; i++) {
699 if (orig_node == -1) {
700 int cur_node = next_online_node(adapter->node);
701 if (cur_node == MAX_NUMNODES)
702 cur_node = first_online_node;
703 adapter->node = cur_node;
705 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
708 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
711 ring->count = adapter->tx_ring_count;
712 ring->queue_index = i;
713 ring->dev = &adapter->pdev->dev;
714 ring->netdev = adapter->netdev;
715 ring->numa_node = adapter->node;
716 /* For 82575, context index must be unique per ring. */
717 if (adapter->hw.mac.type == e1000_82575)
718 set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
719 adapter->tx_ring[i] = ring;
721 /* Restore the adapter's original node */
722 adapter->node = orig_node;
724 for (i = 0; i < adapter->num_rx_queues; i++) {
725 if (orig_node == -1) {
726 int cur_node = next_online_node(adapter->node);
727 if (cur_node == MAX_NUMNODES)
728 cur_node = first_online_node;
729 adapter->node = cur_node;
731 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
734 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
737 ring->count = adapter->rx_ring_count;
738 ring->queue_index = i;
739 ring->dev = &adapter->pdev->dev;
740 ring->netdev = adapter->netdev;
741 ring->numa_node = adapter->node;
742 /* set flag indicating ring supports SCTP checksum offload */
743 if (adapter->hw.mac.type >= e1000_82576)
744 set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
746 /* On i350, loopback VLAN packets have the tag byte-swapped. */
747 if (adapter->hw.mac.type == e1000_i350)
748 set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
750 adapter->rx_ring[i] = ring;
752 /* Restore the adapter's original node */
753 adapter->node = orig_node;
755 igb_cache_ring_register(adapter);
760 /* Restore the adapter's original node */
761 adapter->node = orig_node;
762 igb_free_queues(adapter);
768 * igb_write_ivar - configure ivar for given MSI-X vector
769 * @hw: pointer to the HW structure
770 * @msix_vector: vector number we are allocating to a given ring
771 * @index: row index of IVAR register to write within IVAR table
772 * @offset: column offset of in IVAR, should be multiple of 8
774 * This function is intended to handle the writing of the IVAR register
775 * for adapters 82576 and newer. The IVAR table consists of 2 columns,
776 * each containing an cause allocation for an Rx and Tx ring, and a
777 * variable number of rows depending on the number of queues supported.
779 static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
780 int index, int offset)
782 u32 ivar = array_rd32(E1000_IVAR0, index);
784 /* clear any bits that are currently set */
785 ivar &= ~((u32)0xFF << offset);
787 /* write vector and valid bit */
788 ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
790 array_wr32(E1000_IVAR0, index, ivar);
793 #define IGB_N0_QUEUE -1
794 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
796 struct igb_adapter *adapter = q_vector->adapter;
797 struct e1000_hw *hw = &adapter->hw;
798 int rx_queue = IGB_N0_QUEUE;
799 int tx_queue = IGB_N0_QUEUE;
802 if (q_vector->rx.ring)
803 rx_queue = q_vector->rx.ring->reg_idx;
804 if (q_vector->tx.ring)
805 tx_queue = q_vector->tx.ring->reg_idx;
807 switch (hw->mac.type) {
809 /* The 82575 assigns vectors using a bitmask, which matches the
810 bitmask for the EICR/EIMS/EIMC registers. To assign one
811 or more queues to a vector, we write the appropriate bits
812 into the MSIXBM register for that vector. */
813 if (rx_queue > IGB_N0_QUEUE)
814 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
815 if (tx_queue > IGB_N0_QUEUE)
816 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
817 if (!adapter->msix_entries && msix_vector == 0)
818 msixbm |= E1000_EIMS_OTHER;
819 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
820 q_vector->eims_value = msixbm;
824 * 82576 uses a table that essentially consists of 2 columns
825 * with 8 rows. The ordering is column-major so we use the
826 * lower 3 bits as the row index, and the 4th bit as the
829 if (rx_queue > IGB_N0_QUEUE)
830 igb_write_ivar(hw, msix_vector,
832 (rx_queue & 0x8) << 1);
833 if (tx_queue > IGB_N0_QUEUE)
834 igb_write_ivar(hw, msix_vector,
836 ((tx_queue & 0x8) << 1) + 8);
837 q_vector->eims_value = 1 << msix_vector;
842 * On 82580 and newer adapters the scheme is similar to 82576
843 * however instead of ordering column-major we have things
844 * ordered row-major. So we traverse the table by using
845 * bit 0 as the column offset, and the remaining bits as the
848 if (rx_queue > IGB_N0_QUEUE)
849 igb_write_ivar(hw, msix_vector,
851 (rx_queue & 0x1) << 4);
852 if (tx_queue > IGB_N0_QUEUE)
853 igb_write_ivar(hw, msix_vector,
855 ((tx_queue & 0x1) << 4) + 8);
856 q_vector->eims_value = 1 << msix_vector;
863 /* add q_vector eims value to global eims_enable_mask */
864 adapter->eims_enable_mask |= q_vector->eims_value;
866 /* configure q_vector to set itr on first interrupt */
867 q_vector->set_itr = 1;
871 * igb_configure_msix - Configure MSI-X hardware
873 * igb_configure_msix sets up the hardware to properly
874 * generate MSI-X interrupts.
876 static void igb_configure_msix(struct igb_adapter *adapter)
880 struct e1000_hw *hw = &adapter->hw;
882 adapter->eims_enable_mask = 0;
884 /* set vector for other causes, i.e. link changes */
885 switch (hw->mac.type) {
887 tmp = rd32(E1000_CTRL_EXT);
888 /* enable MSI-X PBA support*/
889 tmp |= E1000_CTRL_EXT_PBA_CLR;
891 /* Auto-Mask interrupts upon ICR read. */
892 tmp |= E1000_CTRL_EXT_EIAME;
893 tmp |= E1000_CTRL_EXT_IRCA;
895 wr32(E1000_CTRL_EXT, tmp);
897 /* enable msix_other interrupt */
898 array_wr32(E1000_MSIXBM(0), vector++,
900 adapter->eims_other = E1000_EIMS_OTHER;
907 /* Turn on MSI-X capability first, or our settings
908 * won't stick. And it will take days to debug. */
909 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
910 E1000_GPIE_PBA | E1000_GPIE_EIAME |
913 /* enable msix_other interrupt */
914 adapter->eims_other = 1 << vector;
915 tmp = (vector++ | E1000_IVAR_VALID) << 8;
917 wr32(E1000_IVAR_MISC, tmp);
920 /* do nothing, since nothing else supports MSI-X */
922 } /* switch (hw->mac.type) */
924 adapter->eims_enable_mask |= adapter->eims_other;
926 for (i = 0; i < adapter->num_q_vectors; i++)
927 igb_assign_vector(adapter->q_vector[i], vector++);
933 * igb_request_msix - Initialize MSI-X interrupts
935 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
938 static int igb_request_msix(struct igb_adapter *adapter)
940 struct net_device *netdev = adapter->netdev;
941 struct e1000_hw *hw = &adapter->hw;
942 int i, err = 0, vector = 0;
944 err = request_irq(adapter->msix_entries[vector].vector,
945 igb_msix_other, 0, netdev->name, adapter);
950 for (i = 0; i < adapter->num_q_vectors; i++) {
951 struct igb_q_vector *q_vector = adapter->q_vector[i];
953 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
955 if (q_vector->rx.ring && q_vector->tx.ring)
956 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
957 q_vector->rx.ring->queue_index);
958 else if (q_vector->tx.ring)
959 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
960 q_vector->tx.ring->queue_index);
961 else if (q_vector->rx.ring)
962 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
963 q_vector->rx.ring->queue_index);
965 sprintf(q_vector->name, "%s-unused", netdev->name);
967 err = request_irq(adapter->msix_entries[vector].vector,
968 igb_msix_ring, 0, q_vector->name,
975 igb_configure_msix(adapter);
981 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
983 if (adapter->msix_entries) {
984 pci_disable_msix(adapter->pdev);
985 kfree(adapter->msix_entries);
986 adapter->msix_entries = NULL;
987 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
988 pci_disable_msi(adapter->pdev);
993 * igb_free_q_vectors - Free memory allocated for interrupt vectors
994 * @adapter: board private structure to initialize
996 * This function frees the memory allocated to the q_vectors. In addition if
997 * NAPI is enabled it will delete any references to the NAPI struct prior
998 * to freeing the q_vector.
1000 static void igb_free_q_vectors(struct igb_adapter *adapter)
1004 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1005 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1006 adapter->q_vector[v_idx] = NULL;
1009 netif_napi_del(&q_vector->napi);
1012 adapter->num_q_vectors = 0;
1016 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1018 * This function resets the device so that it has 0 rx queues, tx queues, and
1019 * MSI-X interrupts allocated.
1021 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1023 igb_free_queues(adapter);
1024 igb_free_q_vectors(adapter);
1025 igb_reset_interrupt_capability(adapter);
1029 * igb_set_interrupt_capability - set MSI or MSI-X if supported
1031 * Attempt to configure interrupts using the best available
1032 * capabilities of the hardware and kernel.
1034 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1039 /* Number of supported queues. */
1040 adapter->num_rx_queues = adapter->rss_queues;
1041 if (adapter->vfs_allocated_count)
1042 adapter->num_tx_queues = 1;
1044 adapter->num_tx_queues = adapter->rss_queues;
1046 /* start with one vector for every rx queue */
1047 numvecs = adapter->num_rx_queues;
1049 /* if tx handler is separate add 1 for every tx queue */
1050 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1051 numvecs += adapter->num_tx_queues;
1053 /* store the number of vectors reserved for queues */
1054 adapter->num_q_vectors = numvecs;
1056 /* add 1 vector for link status interrupts */
1058 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1060 if (!adapter->msix_entries)
1063 for (i = 0; i < numvecs; i++)
1064 adapter->msix_entries[i].entry = i;
1066 err = pci_enable_msix(adapter->pdev,
1067 adapter->msix_entries,
1072 igb_reset_interrupt_capability(adapter);
1074 /* If we can't do MSI-X, try MSI */
1076 #ifdef CONFIG_PCI_IOV
1077 /* disable SR-IOV for non MSI-X configurations */
1078 if (adapter->vf_data) {
1079 struct e1000_hw *hw = &adapter->hw;
1080 /* disable iov and allow time for transactions to clear */
1081 pci_disable_sriov(adapter->pdev);
1084 kfree(adapter->vf_data);
1085 adapter->vf_data = NULL;
1086 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1089 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1092 adapter->vfs_allocated_count = 0;
1093 adapter->rss_queues = 1;
1094 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1095 adapter->num_rx_queues = 1;
1096 adapter->num_tx_queues = 1;
1097 adapter->num_q_vectors = 1;
1098 if (!pci_enable_msi(adapter->pdev))
1099 adapter->flags |= IGB_FLAG_HAS_MSI;
1101 /* Notify the stack of the (possibly) reduced queue counts. */
1102 netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1103 return netif_set_real_num_rx_queues(adapter->netdev,
1104 adapter->num_rx_queues);
1108 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1109 * @adapter: board private structure to initialize
1111 * We allocate one q_vector per queue interrupt. If allocation fails we
1114 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1116 struct igb_q_vector *q_vector;
1117 struct e1000_hw *hw = &adapter->hw;
1119 int orig_node = adapter->node;
1121 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1122 if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1123 adapter->num_tx_queues)) &&
1124 (adapter->num_rx_queues == v_idx))
1125 adapter->node = orig_node;
1126 if (orig_node == -1) {
1127 int cur_node = next_online_node(adapter->node);
1128 if (cur_node == MAX_NUMNODES)
1129 cur_node = first_online_node;
1130 adapter->node = cur_node;
1132 q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1135 q_vector = kzalloc(sizeof(struct igb_q_vector),
1139 q_vector->adapter = adapter;
1140 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1141 q_vector->itr_val = IGB_START_ITR;
1142 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1143 adapter->q_vector[v_idx] = q_vector;
1145 /* Restore the adapter's original node */
1146 adapter->node = orig_node;
1151 /* Restore the adapter's original node */
1152 adapter->node = orig_node;
1153 igb_free_q_vectors(adapter);
1157 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1158 int ring_idx, int v_idx)
1160 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1162 q_vector->rx.ring = adapter->rx_ring[ring_idx];
1163 q_vector->rx.ring->q_vector = q_vector;
1164 q_vector->rx.count++;
1165 q_vector->itr_val = adapter->rx_itr_setting;
1166 if (q_vector->itr_val && q_vector->itr_val <= 3)
1167 q_vector->itr_val = IGB_START_ITR;
1170 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1171 int ring_idx, int v_idx)
1173 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1175 q_vector->tx.ring = adapter->tx_ring[ring_idx];
1176 q_vector->tx.ring->q_vector = q_vector;
1177 q_vector->tx.count++;
1178 q_vector->itr_val = adapter->tx_itr_setting;
1179 q_vector->tx.work_limit = adapter->tx_work_limit;
1180 if (q_vector->itr_val && q_vector->itr_val <= 3)
1181 q_vector->itr_val = IGB_START_ITR;
1185 * igb_map_ring_to_vector - maps allocated queues to vectors
1187 * This function maps the recently allocated queues to vectors.
1189 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1194 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1195 (adapter->num_q_vectors < adapter->num_tx_queues))
1198 if (adapter->num_q_vectors >=
1199 (adapter->num_rx_queues + adapter->num_tx_queues)) {
1200 for (i = 0; i < adapter->num_rx_queues; i++)
1201 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1202 for (i = 0; i < adapter->num_tx_queues; i++)
1203 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1205 for (i = 0; i < adapter->num_rx_queues; i++) {
1206 if (i < adapter->num_tx_queues)
1207 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1208 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1210 for (; i < adapter->num_tx_queues; i++)
1211 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1217 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1219 * This function initializes the interrupts and allocates all of the queues.
1221 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1223 struct pci_dev *pdev = adapter->pdev;
1226 err = igb_set_interrupt_capability(adapter);
1230 err = igb_alloc_q_vectors(adapter);
1232 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1233 goto err_alloc_q_vectors;
1236 err = igb_alloc_queues(adapter);
1238 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1239 goto err_alloc_queues;
1242 err = igb_map_ring_to_vector(adapter);
1244 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1245 goto err_map_queues;
1251 igb_free_queues(adapter);
1253 igb_free_q_vectors(adapter);
1254 err_alloc_q_vectors:
1255 igb_reset_interrupt_capability(adapter);
1260 * igb_request_irq - initialize interrupts
1262 * Attempts to configure interrupts using the best available
1263 * capabilities of the hardware and kernel.
1265 static int igb_request_irq(struct igb_adapter *adapter)
1267 struct net_device *netdev = adapter->netdev;
1268 struct pci_dev *pdev = adapter->pdev;
1271 if (adapter->msix_entries) {
1272 err = igb_request_msix(adapter);
1275 /* fall back to MSI */
1276 igb_clear_interrupt_scheme(adapter);
1277 if (!pci_enable_msi(pdev))
1278 adapter->flags |= IGB_FLAG_HAS_MSI;
1279 igb_free_all_tx_resources(adapter);
1280 igb_free_all_rx_resources(adapter);
1281 adapter->num_tx_queues = 1;
1282 adapter->num_rx_queues = 1;
1283 adapter->num_q_vectors = 1;
1284 err = igb_alloc_q_vectors(adapter);
1287 "Unable to allocate memory for vectors\n");
1290 err = igb_alloc_queues(adapter);
1293 "Unable to allocate memory for queues\n");
1294 igb_free_q_vectors(adapter);
1297 igb_setup_all_tx_resources(adapter);
1298 igb_setup_all_rx_resources(adapter);
1301 igb_assign_vector(adapter->q_vector[0], 0);
1303 if (adapter->flags & IGB_FLAG_HAS_MSI) {
1304 err = request_irq(pdev->irq, igb_intr_msi, 0,
1305 netdev->name, adapter);
1309 /* fall back to legacy interrupts */
1310 igb_reset_interrupt_capability(adapter);
1311 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1314 err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
1315 netdev->name, adapter);
1318 dev_err(&pdev->dev, "Error %d getting interrupt\n",
1325 static void igb_free_irq(struct igb_adapter *adapter)
1327 if (adapter->msix_entries) {
1330 free_irq(adapter->msix_entries[vector++].vector, adapter);
1332 for (i = 0; i < adapter->num_q_vectors; i++)
1333 free_irq(adapter->msix_entries[vector++].vector,
1334 adapter->q_vector[i]);
1336 free_irq(adapter->pdev->irq, adapter);
1341 * igb_irq_disable - Mask off interrupt generation on the NIC
1342 * @adapter: board private structure
1344 static void igb_irq_disable(struct igb_adapter *adapter)
1346 struct e1000_hw *hw = &adapter->hw;
1349 * we need to be careful when disabling interrupts. The VFs are also
1350 * mapped into these registers and so clearing the bits can cause
1351 * issues on the VF drivers so we only need to clear what we set
1353 if (adapter->msix_entries) {
1354 u32 regval = rd32(E1000_EIAM);
1355 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1356 wr32(E1000_EIMC, adapter->eims_enable_mask);
1357 regval = rd32(E1000_EIAC);
1358 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1362 wr32(E1000_IMC, ~0);
1364 if (adapter->msix_entries) {
1366 for (i = 0; i < adapter->num_q_vectors; i++)
1367 synchronize_irq(adapter->msix_entries[i].vector);
1369 synchronize_irq(adapter->pdev->irq);
1374 * igb_irq_enable - Enable default interrupt generation settings
1375 * @adapter: board private structure
1377 static void igb_irq_enable(struct igb_adapter *adapter)
1379 struct e1000_hw *hw = &adapter->hw;
1381 if (adapter->msix_entries) {
1382 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1383 u32 regval = rd32(E1000_EIAC);
1384 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1385 regval = rd32(E1000_EIAM);
1386 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1387 wr32(E1000_EIMS, adapter->eims_enable_mask);
1388 if (adapter->vfs_allocated_count) {
1389 wr32(E1000_MBVFIMR, 0xFF);
1390 ims |= E1000_IMS_VMMB;
1392 wr32(E1000_IMS, ims);
1394 wr32(E1000_IMS, IMS_ENABLE_MASK |
1396 wr32(E1000_IAM, IMS_ENABLE_MASK |
1401 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1403 struct e1000_hw *hw = &adapter->hw;
1404 u16 vid = adapter->hw.mng_cookie.vlan_id;
1405 u16 old_vid = adapter->mng_vlan_id;
1407 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1408 /* add VID to filter table */
1409 igb_vfta_set(hw, vid, true);
1410 adapter->mng_vlan_id = vid;
1412 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1415 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1417 !test_bit(old_vid, adapter->active_vlans)) {
1418 /* remove VID from filter table */
1419 igb_vfta_set(hw, old_vid, false);
1424 * igb_release_hw_control - release control of the h/w to f/w
1425 * @adapter: address of board private structure
1427 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1428 * For ASF and Pass Through versions of f/w this means that the
1429 * driver is no longer loaded.
1432 static void igb_release_hw_control(struct igb_adapter *adapter)
1434 struct e1000_hw *hw = &adapter->hw;
1437 /* Let firmware take over control of h/w */
1438 ctrl_ext = rd32(E1000_CTRL_EXT);
1439 wr32(E1000_CTRL_EXT,
1440 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1444 * igb_get_hw_control - get control of the h/w from f/w
1445 * @adapter: address of board private structure
1447 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1448 * For ASF and Pass Through versions of f/w this means that
1449 * the driver is loaded.
1452 static void igb_get_hw_control(struct igb_adapter *adapter)
1454 struct e1000_hw *hw = &adapter->hw;
1457 /* Let firmware know the driver has taken over */
1458 ctrl_ext = rd32(E1000_CTRL_EXT);
1459 wr32(E1000_CTRL_EXT,
1460 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1464 * igb_configure - configure the hardware for RX and TX
1465 * @adapter: private board structure
1467 static void igb_configure(struct igb_adapter *adapter)
1469 struct net_device *netdev = adapter->netdev;
1472 igb_get_hw_control(adapter);
1473 igb_set_rx_mode(netdev);
1475 igb_restore_vlan(adapter);
1477 igb_setup_tctl(adapter);
1478 igb_setup_mrqc(adapter);
1479 igb_setup_rctl(adapter);
1481 igb_configure_tx(adapter);
1482 igb_configure_rx(adapter);
1484 igb_rx_fifo_flush_82575(&adapter->hw);
1486 /* call igb_desc_unused which always leaves
1487 * at least 1 descriptor unused to make sure
1488 * next_to_use != next_to_clean */
1489 for (i = 0; i < adapter->num_rx_queues; i++) {
1490 struct igb_ring *ring = adapter->rx_ring[i];
1491 igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1496 * igb_power_up_link - Power up the phy/serdes link
1497 * @adapter: address of board private structure
1499 void igb_power_up_link(struct igb_adapter *adapter)
1501 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1502 igb_power_up_phy_copper(&adapter->hw);
1504 igb_power_up_serdes_link_82575(&adapter->hw);
1508 * igb_power_down_link - Power down the phy/serdes link
1509 * @adapter: address of board private structure
1511 static void igb_power_down_link(struct igb_adapter *adapter)
1513 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1514 igb_power_down_phy_copper_82575(&adapter->hw);
1516 igb_shutdown_serdes_link_82575(&adapter->hw);
1520 * igb_up - Open the interface and prepare it to handle traffic
1521 * @adapter: board private structure
1523 int igb_up(struct igb_adapter *adapter)
1525 struct e1000_hw *hw = &adapter->hw;
1528 /* hardware has been reset, we need to reload some things */
1529 igb_configure(adapter);
1531 clear_bit(__IGB_DOWN, &adapter->state);
1533 for (i = 0; i < adapter->num_q_vectors; i++)
1534 napi_enable(&(adapter->q_vector[i]->napi));
1536 if (adapter->msix_entries)
1537 igb_configure_msix(adapter);
1539 igb_assign_vector(adapter->q_vector[0], 0);
1541 /* Clear any pending interrupts. */
1543 igb_irq_enable(adapter);
1545 /* notify VFs that reset has been completed */
1546 if (adapter->vfs_allocated_count) {
1547 u32 reg_data = rd32(E1000_CTRL_EXT);
1548 reg_data |= E1000_CTRL_EXT_PFRSTD;
1549 wr32(E1000_CTRL_EXT, reg_data);
1552 netif_tx_start_all_queues(adapter->netdev);
1554 /* start the watchdog. */
1555 hw->mac.get_link_status = 1;
1556 schedule_work(&adapter->watchdog_task);
1561 void igb_down(struct igb_adapter *adapter)
1563 struct net_device *netdev = adapter->netdev;
1564 struct e1000_hw *hw = &adapter->hw;
1568 /* signal that we're down so the interrupt handler does not
1569 * reschedule our watchdog timer */
1570 set_bit(__IGB_DOWN, &adapter->state);
1572 /* disable receives in the hardware */
1573 rctl = rd32(E1000_RCTL);
1574 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1575 /* flush and sleep below */
1577 netif_tx_stop_all_queues(netdev);
1579 /* disable transmits in the hardware */
1580 tctl = rd32(E1000_TCTL);
1581 tctl &= ~E1000_TCTL_EN;
1582 wr32(E1000_TCTL, tctl);
1583 /* flush both disables and wait for them to finish */
1587 for (i = 0; i < adapter->num_q_vectors; i++)
1588 napi_disable(&(adapter->q_vector[i]->napi));
1590 igb_irq_disable(adapter);
1592 del_timer_sync(&adapter->watchdog_timer);
1593 del_timer_sync(&adapter->phy_info_timer);
1595 netif_carrier_off(netdev);
1597 /* record the stats before reset*/
1598 spin_lock(&adapter->stats64_lock);
1599 igb_update_stats(adapter, &adapter->stats64);
1600 spin_unlock(&adapter->stats64_lock);
1602 adapter->link_speed = 0;
1603 adapter->link_duplex = 0;
1605 if (!pci_channel_offline(adapter->pdev))
1607 igb_clean_all_tx_rings(adapter);
1608 igb_clean_all_rx_rings(adapter);
1609 #ifdef CONFIG_IGB_DCA
1611 /* since we reset the hardware DCA settings were cleared */
1612 igb_setup_dca(adapter);
1616 void igb_reinit_locked(struct igb_adapter *adapter)
1618 WARN_ON(in_interrupt());
1619 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1623 clear_bit(__IGB_RESETTING, &adapter->state);
1626 void igb_reset(struct igb_adapter *adapter)
1628 struct pci_dev *pdev = adapter->pdev;
1629 struct e1000_hw *hw = &adapter->hw;
1630 struct e1000_mac_info *mac = &hw->mac;
1631 struct e1000_fc_info *fc = &hw->fc;
1632 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1635 /* Repartition Pba for greater than 9k mtu
1636 * To take effect CTRL.RST is required.
1638 switch (mac->type) {
1641 pba = rd32(E1000_RXPBS);
1642 pba = igb_rxpbs_adjust_82580(pba);
1645 pba = rd32(E1000_RXPBS);
1646 pba &= E1000_RXPBS_SIZE_MASK_82576;
1650 pba = E1000_PBA_34K;
1654 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1655 (mac->type < e1000_82576)) {
1656 /* adjust PBA for jumbo frames */
1657 wr32(E1000_PBA, pba);
1659 /* To maintain wire speed transmits, the Tx FIFO should be
1660 * large enough to accommodate two full transmit packets,
1661 * rounded up to the next 1KB and expressed in KB. Likewise,
1662 * the Rx FIFO should be large enough to accommodate at least
1663 * one full receive packet and is similarly rounded up and
1664 * expressed in KB. */
1665 pba = rd32(E1000_PBA);
1666 /* upper 16 bits has Tx packet buffer allocation size in KB */
1667 tx_space = pba >> 16;
1668 /* lower 16 bits has Rx packet buffer allocation size in KB */
1670 /* the tx fifo also stores 16 bytes of information about the tx
1671 * but don't include ethernet FCS because hardware appends it */
1672 min_tx_space = (adapter->max_frame_size +
1673 sizeof(union e1000_adv_tx_desc) -
1675 min_tx_space = ALIGN(min_tx_space, 1024);
1676 min_tx_space >>= 10;
1677 /* software strips receive CRC, so leave room for it */
1678 min_rx_space = adapter->max_frame_size;
1679 min_rx_space = ALIGN(min_rx_space, 1024);
1680 min_rx_space >>= 10;
1682 /* If current Tx allocation is less than the min Tx FIFO size,
1683 * and the min Tx FIFO size is less than the current Rx FIFO
1684 * allocation, take space away from current Rx allocation */
1685 if (tx_space < min_tx_space &&
1686 ((min_tx_space - tx_space) < pba)) {
1687 pba = pba - (min_tx_space - tx_space);
1689 /* if short on rx space, rx wins and must trump tx
1691 if (pba < min_rx_space)
1694 wr32(E1000_PBA, pba);
1697 /* flow control settings */
1698 /* The high water mark must be low enough to fit one full frame
1699 * (or the size used for early receive) above it in the Rx FIFO.
1700 * Set it to the lower of:
1701 * - 90% of the Rx FIFO size, or
1702 * - the full Rx FIFO size minus one full frame */
1703 hwm = min(((pba << 10) * 9 / 10),
1704 ((pba << 10) - 2 * adapter->max_frame_size));
1706 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1707 fc->low_water = fc->high_water - 16;
1708 fc->pause_time = 0xFFFF;
1710 fc->current_mode = fc->requested_mode;
1712 /* disable receive for all VFs and wait one second */
1713 if (adapter->vfs_allocated_count) {
1715 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1716 adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1718 /* ping all the active vfs to let them know we are going down */
1719 igb_ping_all_vfs(adapter);
1721 /* disable transmits and receives */
1722 wr32(E1000_VFRE, 0);
1723 wr32(E1000_VFTE, 0);
1726 /* Allow time for pending master requests to run */
1727 hw->mac.ops.reset_hw(hw);
1730 if (hw->mac.ops.init_hw(hw))
1731 dev_err(&pdev->dev, "Hardware Error\n");
1733 igb_init_dmac(adapter, pba);
1734 if (!netif_running(adapter->netdev))
1735 igb_power_down_link(adapter);
1737 igb_update_mng_vlan(adapter);
1739 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1740 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1742 igb_get_phy_info(hw);
1745 static netdev_features_t igb_fix_features(struct net_device *netdev,
1746 netdev_features_t features)
1749 * Since there is no support for separate rx/tx vlan accel
1750 * enable/disable make sure tx flag is always in same state as rx.
1752 if (features & NETIF_F_HW_VLAN_RX)
1753 features |= NETIF_F_HW_VLAN_TX;
1755 features &= ~NETIF_F_HW_VLAN_TX;
1760 static int igb_set_features(struct net_device *netdev,
1761 netdev_features_t features)
1763 netdev_features_t changed = netdev->features ^ features;
1765 if (changed & NETIF_F_HW_VLAN_RX)
1766 igb_vlan_mode(netdev, features);
1771 static const struct net_device_ops igb_netdev_ops = {
1772 .ndo_open = igb_open,
1773 .ndo_stop = igb_close,
1774 .ndo_start_xmit = igb_xmit_frame,
1775 .ndo_get_stats64 = igb_get_stats64,
1776 .ndo_set_rx_mode = igb_set_rx_mode,
1777 .ndo_set_mac_address = igb_set_mac,
1778 .ndo_change_mtu = igb_change_mtu,
1779 .ndo_do_ioctl = igb_ioctl,
1780 .ndo_tx_timeout = igb_tx_timeout,
1781 .ndo_validate_addr = eth_validate_addr,
1782 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1783 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1784 .ndo_set_vf_mac = igb_ndo_set_vf_mac,
1785 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan,
1786 .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw,
1787 .ndo_get_vf_config = igb_ndo_get_vf_config,
1788 #ifdef CONFIG_NET_POLL_CONTROLLER
1789 .ndo_poll_controller = igb_netpoll,
1791 .ndo_fix_features = igb_fix_features,
1792 .ndo_set_features = igb_set_features,
1796 * igb_probe - Device Initialization Routine
1797 * @pdev: PCI device information struct
1798 * @ent: entry in igb_pci_tbl
1800 * Returns 0 on success, negative on failure
1802 * igb_probe initializes an adapter identified by a pci_dev structure.
1803 * The OS initialization, configuring of the adapter private structure,
1804 * and a hardware reset occur.
1806 static int __devinit igb_probe(struct pci_dev *pdev,
1807 const struct pci_device_id *ent)
1809 struct net_device *netdev;
1810 struct igb_adapter *adapter;
1811 struct e1000_hw *hw;
1812 u16 eeprom_data = 0;
1814 static int global_quad_port_a; /* global quad port a indication */
1815 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1816 unsigned long mmio_start, mmio_len;
1817 int err, pci_using_dac;
1818 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1819 u8 part_str[E1000_PBANUM_LENGTH];
1821 /* Catch broken hardware that put the wrong VF device ID in
1822 * the PCIe SR-IOV capability.
1824 if (pdev->is_virtfn) {
1825 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1826 pci_name(pdev), pdev->vendor, pdev->device);
1830 err = pci_enable_device_mem(pdev);
1835 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1837 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1841 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1843 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1845 dev_err(&pdev->dev, "No usable DMA "
1846 "configuration, aborting\n");
1852 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1858 pci_enable_pcie_error_reporting(pdev);
1860 pci_set_master(pdev);
1861 pci_save_state(pdev);
1864 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1867 goto err_alloc_etherdev;
1869 SET_NETDEV_DEV(netdev, &pdev->dev);
1871 pci_set_drvdata(pdev, netdev);
1872 adapter = netdev_priv(netdev);
1873 adapter->netdev = netdev;
1874 adapter->pdev = pdev;
1877 adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1879 mmio_start = pci_resource_start(pdev, 0);
1880 mmio_len = pci_resource_len(pdev, 0);
1883 hw->hw_addr = ioremap(mmio_start, mmio_len);
1887 netdev->netdev_ops = &igb_netdev_ops;
1888 igb_set_ethtool_ops(netdev);
1889 netdev->watchdog_timeo = 5 * HZ;
1891 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1893 netdev->mem_start = mmio_start;
1894 netdev->mem_end = mmio_start + mmio_len;
1896 /* PCI config space info */
1897 hw->vendor_id = pdev->vendor;
1898 hw->device_id = pdev->device;
1899 hw->revision_id = pdev->revision;
1900 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1901 hw->subsystem_device_id = pdev->subsystem_device;
1903 /* Copy the default MAC, PHY and NVM function pointers */
1904 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1905 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1906 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1907 /* Initialize skew-specific constants */
1908 err = ei->get_invariants(hw);
1912 /* setup the private structure */
1913 err = igb_sw_init(adapter);
1917 igb_get_bus_info_pcie(hw);
1919 hw->phy.autoneg_wait_to_complete = false;
1921 /* Copper options */
1922 if (hw->phy.media_type == e1000_media_type_copper) {
1923 hw->phy.mdix = AUTO_ALL_MODES;
1924 hw->phy.disable_polarity_correction = false;
1925 hw->phy.ms_type = e1000_ms_hw_default;
1928 if (igb_check_reset_block(hw))
1929 dev_info(&pdev->dev,
1930 "PHY reset is blocked due to SOL/IDER session.\n");
1933 * features is initialized to 0 in allocation, it might have bits
1934 * set by igb_sw_init so we should use an or instead of an
1937 netdev->features |= NETIF_F_SG |
1944 NETIF_F_HW_VLAN_RX |
1947 /* copy netdev features into list of user selectable features */
1948 netdev->hw_features |= netdev->features;
1950 /* set this bit last since it cannot be part of hw_features */
1951 netdev->features |= NETIF_F_HW_VLAN_FILTER;
1953 netdev->vlan_features |= NETIF_F_TSO |
1959 if (pci_using_dac) {
1960 netdev->features |= NETIF_F_HIGHDMA;
1961 netdev->vlan_features |= NETIF_F_HIGHDMA;
1964 if (hw->mac.type >= e1000_82576) {
1965 netdev->hw_features |= NETIF_F_SCTP_CSUM;
1966 netdev->features |= NETIF_F_SCTP_CSUM;
1969 netdev->priv_flags |= IFF_UNICAST_FLT;
1971 adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1973 /* before reading the NVM, reset the controller to put the device in a
1974 * known good starting state */
1975 hw->mac.ops.reset_hw(hw);
1977 /* make sure the NVM is good */
1978 if (hw->nvm.ops.validate(hw) < 0) {
1979 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1984 /* copy the MAC address out of the NVM */
1985 if (hw->mac.ops.read_mac_addr(hw))
1986 dev_err(&pdev->dev, "NVM Read Error\n");
1988 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1989 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1991 if (!is_valid_ether_addr(netdev->perm_addr)) {
1992 dev_err(&pdev->dev, "Invalid MAC Address\n");
1997 setup_timer(&adapter->watchdog_timer, igb_watchdog,
1998 (unsigned long) adapter);
1999 setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2000 (unsigned long) adapter);
2002 INIT_WORK(&adapter->reset_task, igb_reset_task);
2003 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2005 /* Initialize link properties that are user-changeable */
2006 adapter->fc_autoneg = true;
2007 hw->mac.autoneg = true;
2008 hw->phy.autoneg_advertised = 0x2f;
2010 hw->fc.requested_mode = e1000_fc_default;
2011 hw->fc.current_mode = e1000_fc_default;
2013 igb_validate_mdi_setting(hw);
2015 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2016 * enable the ACPI Magic Packet filter
2019 if (hw->bus.func == 0)
2020 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2021 else if (hw->mac.type >= e1000_82580)
2022 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2023 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2025 else if (hw->bus.func == 1)
2026 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2028 if (eeprom_data & eeprom_apme_mask)
2029 adapter->eeprom_wol |= E1000_WUFC_MAG;
2031 /* now that we have the eeprom settings, apply the special cases where
2032 * the eeprom may be wrong or the board simply won't support wake on
2033 * lan on a particular port */
2034 switch (pdev->device) {
2035 case E1000_DEV_ID_82575GB_QUAD_COPPER:
2036 adapter->eeprom_wol = 0;
2038 case E1000_DEV_ID_82575EB_FIBER_SERDES:
2039 case E1000_DEV_ID_82576_FIBER:
2040 case E1000_DEV_ID_82576_SERDES:
2041 /* Wake events only supported on port A for dual fiber
2042 * regardless of eeprom setting */
2043 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2044 adapter->eeprom_wol = 0;
2046 case E1000_DEV_ID_82576_QUAD_COPPER:
2047 case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2048 /* if quad port adapter, disable WoL on all but port A */
2049 if (global_quad_port_a != 0)
2050 adapter->eeprom_wol = 0;
2052 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2053 /* Reset for multiple quad port adapters */
2054 if (++global_quad_port_a == 4)
2055 global_quad_port_a = 0;
2059 /* initialize the wol settings based on the eeprom settings */
2060 adapter->wol = adapter->eeprom_wol;
2061 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2063 /* reset the hardware with the new settings */
2066 /* let the f/w know that the h/w is now under the control of the
2068 igb_get_hw_control(adapter);
2070 strcpy(netdev->name, "eth%d");
2071 err = register_netdev(netdev);
2075 /* carrier off reporting is important to ethtool even BEFORE open */
2076 netif_carrier_off(netdev);
2078 #ifdef CONFIG_IGB_DCA
2079 if (dca_add_requester(&pdev->dev) == 0) {
2080 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2081 dev_info(&pdev->dev, "DCA enabled\n");
2082 igb_setup_dca(adapter);
2086 /* do hw tstamp init after resetting */
2087 igb_init_hw_timer(adapter);
2089 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2090 /* print bus type/speed/width info */
2091 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2093 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2094 (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2096 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2097 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2098 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2102 ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2104 strcpy(part_str, "Unknown");
2105 dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2106 dev_info(&pdev->dev,
2107 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2108 adapter->msix_entries ? "MSI-X" :
2109 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2110 adapter->num_rx_queues, adapter->num_tx_queues);
2111 switch (hw->mac.type) {
2113 igb_set_eee_i350(hw);
2121 igb_release_hw_control(adapter);
2123 if (!igb_check_reset_block(hw))
2126 if (hw->flash_address)
2127 iounmap(hw->flash_address);
2129 igb_clear_interrupt_scheme(adapter);
2130 iounmap(hw->hw_addr);
2132 free_netdev(netdev);
2134 pci_release_selected_regions(pdev,
2135 pci_select_bars(pdev, IORESOURCE_MEM));
2138 pci_disable_device(pdev);
2143 * igb_remove - Device Removal Routine
2144 * @pdev: PCI device information struct
2146 * igb_remove is called by the PCI subsystem to alert the driver
2147 * that it should release a PCI device. The could be caused by a
2148 * Hot-Plug event, or because the driver is going to be removed from
2151 static void __devexit igb_remove(struct pci_dev *pdev)
2153 struct net_device *netdev = pci_get_drvdata(pdev);
2154 struct igb_adapter *adapter = netdev_priv(netdev);
2155 struct e1000_hw *hw = &adapter->hw;
2158 * The watchdog timer may be rescheduled, so explicitly
2159 * disable watchdog from being rescheduled.
2161 set_bit(__IGB_DOWN, &adapter->state);
2162 del_timer_sync(&adapter->watchdog_timer);
2163 del_timer_sync(&adapter->phy_info_timer);
2165 cancel_work_sync(&adapter->reset_task);
2166 cancel_work_sync(&adapter->watchdog_task);
2168 #ifdef CONFIG_IGB_DCA
2169 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2170 dev_info(&pdev->dev, "DCA disabled\n");
2171 dca_remove_requester(&pdev->dev);
2172 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2173 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2177 /* Release control of h/w to f/w. If f/w is AMT enabled, this
2178 * would have already happened in close and is redundant. */
2179 igb_release_hw_control(adapter);
2181 unregister_netdev(netdev);
2183 igb_clear_interrupt_scheme(adapter);
2185 #ifdef CONFIG_PCI_IOV
2186 /* reclaim resources allocated to VFs */
2187 if (adapter->vf_data) {
2188 /* disable iov and allow time for transactions to clear */
2189 if (!igb_check_vf_assignment(adapter)) {
2190 pci_disable_sriov(pdev);
2193 dev_info(&pdev->dev, "VF(s) assigned to guests!\n");
2196 kfree(adapter->vf_data);
2197 adapter->vf_data = NULL;
2198 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2201 dev_info(&pdev->dev, "IOV Disabled\n");
2205 iounmap(hw->hw_addr);
2206 if (hw->flash_address)
2207 iounmap(hw->flash_address);
2208 pci_release_selected_regions(pdev,
2209 pci_select_bars(pdev, IORESOURCE_MEM));
2211 kfree(adapter->shadow_vfta);
2212 free_netdev(netdev);
2214 pci_disable_pcie_error_reporting(pdev);
2216 pci_disable_device(pdev);
2220 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2221 * @adapter: board private structure to initialize
2223 * This function initializes the vf specific data storage and then attempts to
2224 * allocate the VFs. The reason for ordering it this way is because it is much
2225 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2226 * the memory for the VFs.
2228 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2230 #ifdef CONFIG_PCI_IOV
2231 struct pci_dev *pdev = adapter->pdev;
2232 int old_vfs = igb_find_enabled_vfs(adapter);
2236 dev_info(&pdev->dev, "%d pre-allocated VFs found - override "
2237 "max_vfs setting of %d\n", old_vfs, max_vfs);
2238 adapter->vfs_allocated_count = old_vfs;
2241 if (!adapter->vfs_allocated_count)
2244 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2245 sizeof(struct vf_data_storage), GFP_KERNEL);
2246 /* if allocation failed then we do not support SR-IOV */
2247 if (!adapter->vf_data) {
2248 adapter->vfs_allocated_count = 0;
2249 dev_err(&pdev->dev, "Unable to allocate memory for VF "
2255 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count))
2258 dev_info(&pdev->dev, "%d VFs allocated\n",
2259 adapter->vfs_allocated_count);
2260 for (i = 0; i < adapter->vfs_allocated_count; i++)
2261 igb_vf_configure(adapter, i);
2263 /* DMA Coalescing is not supported in IOV mode. */
2264 adapter->flags &= ~IGB_FLAG_DMAC;
2267 kfree(adapter->vf_data);
2268 adapter->vf_data = NULL;
2269 adapter->vfs_allocated_count = 0;
2272 #endif /* CONFIG_PCI_IOV */
2276 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2277 * @adapter: board private structure to initialize
2279 * igb_init_hw_timer initializes the function pointer and values for the hw
2280 * timer found in hardware.
2282 static void igb_init_hw_timer(struct igb_adapter *adapter)
2284 struct e1000_hw *hw = &adapter->hw;
2286 switch (hw->mac.type) {
2289 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2290 adapter->cycles.read = igb_read_clock;
2291 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2292 adapter->cycles.mult = 1;
2294 * The 82580 timesync updates the system timer every 8ns by 8ns
2295 * and the value cannot be shifted. Instead we need to shift
2296 * the registers to generate a 64bit timer value. As a result
2297 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2298 * 24 in order to generate a larger value for synchronization.
2300 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2301 /* disable system timer temporarily by setting bit 31 */
2302 wr32(E1000_TSAUXC, 0x80000000);
2305 /* Set registers so that rollover occurs soon to test this. */
2306 wr32(E1000_SYSTIMR, 0x00000000);
2307 wr32(E1000_SYSTIML, 0x80000000);
2308 wr32(E1000_SYSTIMH, 0x000000FF);
2311 /* enable system timer by clearing bit 31 */
2312 wr32(E1000_TSAUXC, 0x0);
2315 timecounter_init(&adapter->clock,
2317 ktime_to_ns(ktime_get_real()));
2319 * Synchronize our NIC clock against system wall clock. NIC
2320 * time stamp reading requires ~3us per sample, each sample
2321 * was pretty stable even under load => only require 10
2322 * samples for each offset comparison.
2324 memset(&adapter->compare, 0, sizeof(adapter->compare));
2325 adapter->compare.source = &adapter->clock;
2326 adapter->compare.target = ktime_get_real;
2327 adapter->compare.num_samples = 10;
2328 timecompare_update(&adapter->compare, 0);
2332 * Initialize hardware timer: we keep it running just in case
2333 * that some program needs it later on.
2335 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2336 adapter->cycles.read = igb_read_clock;
2337 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2338 adapter->cycles.mult = 1;
2340 * Scale the NIC clock cycle by a large factor so that
2341 * relatively small clock corrections can be added or
2342 * subtracted at each clock tick. The drawbacks of a large
2343 * factor are a) that the clock register overflows more quickly
2344 * (not such a big deal) and b) that the increment per tick has
2345 * to fit into 24 bits. As a result we need to use a shift of
2346 * 19 so we can fit a value of 16 into the TIMINCA register.
2348 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2350 (1 << E1000_TIMINCA_16NS_SHIFT) |
2351 (16 << IGB_82576_TSYNC_SHIFT));
2353 /* Set registers so that rollover occurs soon to test this. */
2354 wr32(E1000_SYSTIML, 0x00000000);
2355 wr32(E1000_SYSTIMH, 0xFF800000);
2358 timecounter_init(&adapter->clock,
2360 ktime_to_ns(ktime_get_real()));
2362 * Synchronize our NIC clock against system wall clock. NIC
2363 * time stamp reading requires ~3us per sample, each sample
2364 * was pretty stable even under load => only require 10
2365 * samples for each offset comparison.
2367 memset(&adapter->compare, 0, sizeof(adapter->compare));
2368 adapter->compare.source = &adapter->clock;
2369 adapter->compare.target = ktime_get_real;
2370 adapter->compare.num_samples = 10;
2371 timecompare_update(&adapter->compare, 0);
2374 /* 82575 does not support timesync */
2382 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2383 * @adapter: board private structure to initialize
2385 * igb_sw_init initializes the Adapter private data structure.
2386 * Fields are initialized based on PCI device information and
2387 * OS network device settings (MTU size).
2389 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2391 struct e1000_hw *hw = &adapter->hw;
2392 struct net_device *netdev = adapter->netdev;
2393 struct pci_dev *pdev = adapter->pdev;
2395 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2397 /* set default ring sizes */
2398 adapter->tx_ring_count = IGB_DEFAULT_TXD;
2399 adapter->rx_ring_count = IGB_DEFAULT_RXD;
2401 /* set default ITR values */
2402 adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2403 adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2405 /* set default work limits */
2406 adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2408 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2410 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2414 spin_lock_init(&adapter->stats64_lock);
2415 #ifdef CONFIG_PCI_IOV
2416 switch (hw->mac.type) {
2420 dev_warn(&pdev->dev,
2421 "Maximum of 7 VFs per PF, using max\n");
2422 adapter->vfs_allocated_count = 7;
2424 adapter->vfs_allocated_count = max_vfs;
2429 #endif /* CONFIG_PCI_IOV */
2430 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2431 /* i350 cannot do RSS and SR-IOV at the same time */
2432 if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2433 adapter->rss_queues = 1;
2436 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2437 * then we should combine the queues into a queue pair in order to
2438 * conserve interrupts due to limited supply
2440 if ((adapter->rss_queues > 4) ||
2441 ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2442 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2444 /* Setup and initialize a copy of the hw vlan table array */
2445 adapter->shadow_vfta = kzalloc(sizeof(u32) *
2446 E1000_VLAN_FILTER_TBL_SIZE,
2449 /* This call may decrease the number of queues */
2450 if (igb_init_interrupt_scheme(adapter)) {
2451 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2455 igb_probe_vfs(adapter);
2457 /* Explicitly disable IRQ since the NIC can be in any state. */
2458 igb_irq_disable(adapter);
2460 if (hw->mac.type == e1000_i350)
2461 adapter->flags &= ~IGB_FLAG_DMAC;
2463 set_bit(__IGB_DOWN, &adapter->state);
2468 * igb_open - Called when a network interface is made active
2469 * @netdev: network interface device structure
2471 * Returns 0 on success, negative value on failure
2473 * The open entry point is called when a network interface is made
2474 * active by the system (IFF_UP). At this point all resources needed
2475 * for transmit and receive operations are allocated, the interrupt
2476 * handler is registered with the OS, the watchdog timer is started,
2477 * and the stack is notified that the interface is ready.
2479 static int igb_open(struct net_device *netdev)
2481 struct igb_adapter *adapter = netdev_priv(netdev);
2482 struct e1000_hw *hw = &adapter->hw;
2486 /* disallow open during test */
2487 if (test_bit(__IGB_TESTING, &adapter->state))
2490 netif_carrier_off(netdev);
2492 /* allocate transmit descriptors */
2493 err = igb_setup_all_tx_resources(adapter);
2497 /* allocate receive descriptors */
2498 err = igb_setup_all_rx_resources(adapter);
2502 igb_power_up_link(adapter);
2504 /* before we allocate an interrupt, we must be ready to handle it.
2505 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2506 * as soon as we call pci_request_irq, so we have to setup our
2507 * clean_rx handler before we do so. */
2508 igb_configure(adapter);
2510 err = igb_request_irq(adapter);
2514 /* From here on the code is the same as igb_up() */
2515 clear_bit(__IGB_DOWN, &adapter->state);
2517 for (i = 0; i < adapter->num_q_vectors; i++)
2518 napi_enable(&(adapter->q_vector[i]->napi));
2520 /* Clear any pending interrupts. */
2523 igb_irq_enable(adapter);
2525 /* notify VFs that reset has been completed */
2526 if (adapter->vfs_allocated_count) {
2527 u32 reg_data = rd32(E1000_CTRL_EXT);
2528 reg_data |= E1000_CTRL_EXT_PFRSTD;
2529 wr32(E1000_CTRL_EXT, reg_data);
2532 netif_tx_start_all_queues(netdev);
2534 /* start the watchdog. */
2535 hw->mac.get_link_status = 1;
2536 schedule_work(&adapter->watchdog_task);
2541 igb_release_hw_control(adapter);
2542 igb_power_down_link(adapter);
2543 igb_free_all_rx_resources(adapter);
2545 igb_free_all_tx_resources(adapter);
2553 * igb_close - Disables a network interface
2554 * @netdev: network interface device structure
2556 * Returns 0, this is not allowed to fail
2558 * The close entry point is called when an interface is de-activated
2559 * by the OS. The hardware is still under the driver's control, but
2560 * needs to be disabled. A global MAC reset is issued to stop the
2561 * hardware, and all transmit and receive resources are freed.
2563 static int igb_close(struct net_device *netdev)
2565 struct igb_adapter *adapter = netdev_priv(netdev);
2567 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2570 igb_free_irq(adapter);
2572 igb_free_all_tx_resources(adapter);
2573 igb_free_all_rx_resources(adapter);
2579 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2580 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2582 * Return 0 on success, negative on failure
2584 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2586 struct device *dev = tx_ring->dev;
2587 int orig_node = dev_to_node(dev);
2590 size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2591 tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2592 if (!tx_ring->tx_buffer_info)
2593 tx_ring->tx_buffer_info = vzalloc(size);
2594 if (!tx_ring->tx_buffer_info)
2597 /* round up to nearest 4K */
2598 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2599 tx_ring->size = ALIGN(tx_ring->size, 4096);
2601 set_dev_node(dev, tx_ring->numa_node);
2602 tx_ring->desc = dma_alloc_coherent(dev,
2606 set_dev_node(dev, orig_node);
2608 tx_ring->desc = dma_alloc_coherent(dev,
2616 tx_ring->next_to_use = 0;
2617 tx_ring->next_to_clean = 0;
2622 vfree(tx_ring->tx_buffer_info);
2624 "Unable to allocate memory for the transmit descriptor ring\n");
2629 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2630 * (Descriptors) for all queues
2631 * @adapter: board private structure
2633 * Return 0 on success, negative on failure
2635 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2637 struct pci_dev *pdev = adapter->pdev;
2640 for (i = 0; i < adapter->num_tx_queues; i++) {
2641 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2644 "Allocation for Tx Queue %u failed\n", i);
2645 for (i--; i >= 0; i--)
2646 igb_free_tx_resources(adapter->tx_ring[i]);
2655 * igb_setup_tctl - configure the transmit control registers
2656 * @adapter: Board private structure
2658 void igb_setup_tctl(struct igb_adapter *adapter)
2660 struct e1000_hw *hw = &adapter->hw;
2663 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2664 wr32(E1000_TXDCTL(0), 0);
2666 /* Program the Transmit Control Register */
2667 tctl = rd32(E1000_TCTL);
2668 tctl &= ~E1000_TCTL_CT;
2669 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2670 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2672 igb_config_collision_dist(hw);
2674 /* Enable transmits */
2675 tctl |= E1000_TCTL_EN;
2677 wr32(E1000_TCTL, tctl);
2681 * igb_configure_tx_ring - Configure transmit ring after Reset
2682 * @adapter: board private structure
2683 * @ring: tx ring to configure
2685 * Configure a transmit ring after a reset.
2687 void igb_configure_tx_ring(struct igb_adapter *adapter,
2688 struct igb_ring *ring)
2690 struct e1000_hw *hw = &adapter->hw;
2692 u64 tdba = ring->dma;
2693 int reg_idx = ring->reg_idx;
2695 /* disable the queue */
2696 wr32(E1000_TXDCTL(reg_idx), 0);
2700 wr32(E1000_TDLEN(reg_idx),
2701 ring->count * sizeof(union e1000_adv_tx_desc));
2702 wr32(E1000_TDBAL(reg_idx),
2703 tdba & 0x00000000ffffffffULL);
2704 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2706 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2707 wr32(E1000_TDH(reg_idx), 0);
2708 writel(0, ring->tail);
2710 txdctl |= IGB_TX_PTHRESH;
2711 txdctl |= IGB_TX_HTHRESH << 8;
2712 txdctl |= IGB_TX_WTHRESH << 16;
2714 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2715 wr32(E1000_TXDCTL(reg_idx), txdctl);
2719 * igb_configure_tx - Configure transmit Unit after Reset
2720 * @adapter: board private structure
2722 * Configure the Tx unit of the MAC after a reset.
2724 static void igb_configure_tx(struct igb_adapter *adapter)
2728 for (i = 0; i < adapter->num_tx_queues; i++)
2729 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2733 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2734 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2736 * Returns 0 on success, negative on failure
2738 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2740 struct device *dev = rx_ring->dev;
2741 int orig_node = dev_to_node(dev);
2744 size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2745 rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2746 if (!rx_ring->rx_buffer_info)
2747 rx_ring->rx_buffer_info = vzalloc(size);
2748 if (!rx_ring->rx_buffer_info)
2751 desc_len = sizeof(union e1000_adv_rx_desc);
2753 /* Round up to nearest 4K */
2754 rx_ring->size = rx_ring->count * desc_len;
2755 rx_ring->size = ALIGN(rx_ring->size, 4096);
2757 set_dev_node(dev, rx_ring->numa_node);
2758 rx_ring->desc = dma_alloc_coherent(dev,
2762 set_dev_node(dev, orig_node);
2764 rx_ring->desc = dma_alloc_coherent(dev,
2772 rx_ring->next_to_clean = 0;
2773 rx_ring->next_to_use = 0;
2778 vfree(rx_ring->rx_buffer_info);
2779 rx_ring->rx_buffer_info = NULL;
2780 dev_err(dev, "Unable to allocate memory for the receive descriptor"
2786 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2787 * (Descriptors) for all queues
2788 * @adapter: board private structure
2790 * Return 0 on success, negative on failure
2792 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2794 struct pci_dev *pdev = adapter->pdev;
2797 for (i = 0; i < adapter->num_rx_queues; i++) {
2798 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2801 "Allocation for Rx Queue %u failed\n", i);
2802 for (i--; i >= 0; i--)
2803 igb_free_rx_resources(adapter->rx_ring[i]);
2812 * igb_setup_mrqc - configure the multiple receive queue control registers
2813 * @adapter: Board private structure
2815 static void igb_setup_mrqc(struct igb_adapter *adapter)
2817 struct e1000_hw *hw = &adapter->hw;
2819 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2824 static const u8 rsshash[40] = {
2825 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2826 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2827 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2828 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2830 /* Fill out hash function seeds */
2831 for (j = 0; j < 10; j++) {
2832 u32 rsskey = rsshash[(j * 4)];
2833 rsskey |= rsshash[(j * 4) + 1] << 8;
2834 rsskey |= rsshash[(j * 4) + 2] << 16;
2835 rsskey |= rsshash[(j * 4) + 3] << 24;
2836 array_wr32(E1000_RSSRK(0), j, rsskey);
2839 num_rx_queues = adapter->rss_queues;
2841 if (adapter->vfs_allocated_count) {
2842 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2843 switch (hw->mac.type) {
2860 if (hw->mac.type == e1000_82575)
2864 for (j = 0; j < (32 * 4); j++) {
2865 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2867 reta.bytes[j & 3] |= num_rx_queues << shift2;
2869 wr32(E1000_RETA(j >> 2), reta.dword);
2873 * Disable raw packet checksumming so that RSS hash is placed in
2874 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2875 * offloads as they are enabled by default
2877 rxcsum = rd32(E1000_RXCSUM);
2878 rxcsum |= E1000_RXCSUM_PCSD;
2880 if (adapter->hw.mac.type >= e1000_82576)
2881 /* Enable Receive Checksum Offload for SCTP */
2882 rxcsum |= E1000_RXCSUM_CRCOFL;
2884 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2885 wr32(E1000_RXCSUM, rxcsum);
2887 /* If VMDq is enabled then we set the appropriate mode for that, else
2888 * we default to RSS so that an RSS hash is calculated per packet even
2889 * if we are only using one queue */
2890 if (adapter->vfs_allocated_count) {
2891 if (hw->mac.type > e1000_82575) {
2892 /* Set the default pool for the PF's first queue */
2893 u32 vtctl = rd32(E1000_VT_CTL);
2894 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2895 E1000_VT_CTL_DISABLE_DEF_POOL);
2896 vtctl |= adapter->vfs_allocated_count <<
2897 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2898 wr32(E1000_VT_CTL, vtctl);
2900 if (adapter->rss_queues > 1)
2901 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2903 mrqc = E1000_MRQC_ENABLE_VMDQ;
2905 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2907 igb_vmm_control(adapter);
2910 * Generate RSS hash based on TCP port numbers and/or
2911 * IPv4/v6 src and dst addresses since UDP cannot be
2912 * hashed reliably due to IP fragmentation
2914 mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2915 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2916 E1000_MRQC_RSS_FIELD_IPV6 |
2917 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2918 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2920 wr32(E1000_MRQC, mrqc);
2924 * igb_setup_rctl - configure the receive control registers
2925 * @adapter: Board private structure
2927 void igb_setup_rctl(struct igb_adapter *adapter)
2929 struct e1000_hw *hw = &adapter->hw;
2932 rctl = rd32(E1000_RCTL);
2934 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2935 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2937 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2938 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2941 * enable stripping of CRC. It's unlikely this will break BMC
2942 * redirection as it did with e1000. Newer features require
2943 * that the HW strips the CRC.
2945 rctl |= E1000_RCTL_SECRC;
2947 /* disable store bad packets and clear size bits. */
2948 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2950 /* enable LPE to prevent packets larger than max_frame_size */
2951 rctl |= E1000_RCTL_LPE;
2953 /* disable queue 0 to prevent tail write w/o re-config */
2954 wr32(E1000_RXDCTL(0), 0);
2956 /* Attention!!! For SR-IOV PF driver operations you must enable
2957 * queue drop for all VF and PF queues to prevent head of line blocking
2958 * if an un-trusted VF does not provide descriptors to hardware.
2960 if (adapter->vfs_allocated_count) {
2961 /* set all queue drop enable bits */
2962 wr32(E1000_QDE, ALL_QUEUES);
2965 wr32(E1000_RCTL, rctl);
2968 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2971 struct e1000_hw *hw = &adapter->hw;
2974 /* if it isn't the PF check to see if VFs are enabled and
2975 * increase the size to support vlan tags */
2976 if (vfn < adapter->vfs_allocated_count &&
2977 adapter->vf_data[vfn].vlans_enabled)
2978 size += VLAN_TAG_SIZE;
2980 vmolr = rd32(E1000_VMOLR(vfn));
2981 vmolr &= ~E1000_VMOLR_RLPML_MASK;
2982 vmolr |= size | E1000_VMOLR_LPE;
2983 wr32(E1000_VMOLR(vfn), vmolr);
2989 * igb_rlpml_set - set maximum receive packet size
2990 * @adapter: board private structure
2992 * Configure maximum receivable packet size.
2994 static void igb_rlpml_set(struct igb_adapter *adapter)
2996 u32 max_frame_size = adapter->max_frame_size;
2997 struct e1000_hw *hw = &adapter->hw;
2998 u16 pf_id = adapter->vfs_allocated_count;
3001 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3003 * If we're in VMDQ or SR-IOV mode, then set global RLPML
3004 * to our max jumbo frame size, in case we need to enable
3005 * jumbo frames on one of the rings later.
3006 * This will not pass over-length frames into the default
3007 * queue because it's gated by the VMOLR.RLPML.
3009 max_frame_size = MAX_JUMBO_FRAME_SIZE;
3012 wr32(E1000_RLPML, max_frame_size);
3015 static inline void igb_set_vmolr(struct igb_adapter *adapter,
3018 struct e1000_hw *hw = &adapter->hw;
3022 * This register exists only on 82576 and newer so if we are older then
3023 * we should exit and do nothing
3025 if (hw->mac.type < e1000_82576)
3028 vmolr = rd32(E1000_VMOLR(vfn));
3029 vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */
3031 vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
3033 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3035 /* clear all bits that might not be set */
3036 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3038 if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3039 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3041 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3044 if (vfn <= adapter->vfs_allocated_count)
3045 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
3047 wr32(E1000_VMOLR(vfn), vmolr);
3051 * igb_configure_rx_ring - Configure a receive ring after Reset
3052 * @adapter: board private structure
3053 * @ring: receive ring to be configured
3055 * Configure the Rx unit of the MAC after a reset.
3057 void igb_configure_rx_ring(struct igb_adapter *adapter,
3058 struct igb_ring *ring)
3060 struct e1000_hw *hw = &adapter->hw;
3061 u64 rdba = ring->dma;
3062 int reg_idx = ring->reg_idx;
3063 u32 srrctl = 0, rxdctl = 0;
3065 /* disable the queue */
3066 wr32(E1000_RXDCTL(reg_idx), 0);
3068 /* Set DMA base address registers */
3069 wr32(E1000_RDBAL(reg_idx),
3070 rdba & 0x00000000ffffffffULL);
3071 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3072 wr32(E1000_RDLEN(reg_idx),
3073 ring->count * sizeof(union e1000_adv_rx_desc));
3075 /* initialize head and tail */
3076 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3077 wr32(E1000_RDH(reg_idx), 0);
3078 writel(0, ring->tail);
3080 /* set descriptor configuration */
3081 srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3082 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3083 srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3085 srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3087 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3088 if (hw->mac.type >= e1000_82580)
3089 srrctl |= E1000_SRRCTL_TIMESTAMP;
3090 /* Only set Drop Enable if we are supporting multiple queues */
3091 if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3092 srrctl |= E1000_SRRCTL_DROP_EN;
3094 wr32(E1000_SRRCTL(reg_idx), srrctl);
3096 /* set filtering for VMDQ pools */
3097 igb_set_vmolr(adapter, reg_idx & 0x7, true);
3099 rxdctl |= IGB_RX_PTHRESH;
3100 rxdctl |= IGB_RX_HTHRESH << 8;
3101 rxdctl |= IGB_RX_WTHRESH << 16;
3103 /* enable receive descriptor fetching */
3104 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3105 wr32(E1000_RXDCTL(reg_idx), rxdctl);
3109 * igb_configure_rx - Configure receive Unit after Reset
3110 * @adapter: board private structure
3112 * Configure the Rx unit of the MAC after a reset.
3114 static void igb_configure_rx(struct igb_adapter *adapter)
3118 /* set UTA to appropriate mode */
3119 igb_set_uta(adapter);
3121 /* set the correct pool for the PF default MAC address in entry 0 */
3122 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3123 adapter->vfs_allocated_count);
3125 /* Setup the HW Rx Head and Tail Descriptor Pointers and
3126 * the Base and Length of the Rx Descriptor Ring */
3127 for (i = 0; i < adapter->num_rx_queues; i++)
3128 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3132 * igb_free_tx_resources - Free Tx Resources per Queue
3133 * @tx_ring: Tx descriptor ring for a specific queue
3135 * Free all transmit software resources
3137 void igb_free_tx_resources(struct igb_ring *tx_ring)
3139 igb_clean_tx_ring(tx_ring);
3141 vfree(tx_ring->tx_buffer_info);
3142 tx_ring->tx_buffer_info = NULL;
3144 /* if not set, then don't free */
3148 dma_free_coherent(tx_ring->dev, tx_ring->size,
3149 tx_ring->desc, tx_ring->dma);
3151 tx_ring->desc = NULL;
3155 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3156 * @adapter: board private structure
3158 * Free all transmit software resources
3160 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3164 for (i = 0; i < adapter->num_tx_queues; i++)
3165 igb_free_tx_resources(adapter->tx_ring[i]);
3168 void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3169 struct igb_tx_buffer *tx_buffer)
3171 if (tx_buffer->skb) {
3172 dev_kfree_skb_any(tx_buffer->skb);
3174 dma_unmap_single(ring->dev,
3178 } else if (tx_buffer->dma) {
3179 dma_unmap_page(ring->dev,
3184 tx_buffer->next_to_watch = NULL;
3185 tx_buffer->skb = NULL;
3187 /* buffer_info must be completely set up in the transmit path */
3191 * igb_clean_tx_ring - Free Tx Buffers
3192 * @tx_ring: ring to be cleaned
3194 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3196 struct igb_tx_buffer *buffer_info;
3200 if (!tx_ring->tx_buffer_info)
3202 /* Free all the Tx ring sk_buffs */
3204 for (i = 0; i < tx_ring->count; i++) {
3205 buffer_info = &tx_ring->tx_buffer_info[i];
3206 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3209 size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3210 memset(tx_ring->tx_buffer_info, 0, size);
3212 /* Zero out the descriptor ring */
3213 memset(tx_ring->desc, 0, tx_ring->size);
3215 tx_ring->next_to_use = 0;
3216 tx_ring->next_to_clean = 0;
3220 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3221 * @adapter: board private structure
3223 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3227 for (i = 0; i < adapter->num_tx_queues; i++)
3228 igb_clean_tx_ring(adapter->tx_ring[i]);
3232 * igb_free_rx_resources - Free Rx Resources
3233 * @rx_ring: ring to clean the resources from
3235 * Free all receive software resources
3237 void igb_free_rx_resources(struct igb_ring *rx_ring)
3239 igb_clean_rx_ring(rx_ring);
3241 vfree(rx_ring->rx_buffer_info);
3242 rx_ring->rx_buffer_info = NULL;
3244 /* if not set, then don't free */
3248 dma_free_coherent(rx_ring->dev, rx_ring->size,
3249 rx_ring->desc, rx_ring->dma);
3251 rx_ring->desc = NULL;
3255 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3256 * @adapter: board private structure
3258 * Free all receive software resources
3260 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3264 for (i = 0; i < adapter->num_rx_queues; i++)
3265 igb_free_rx_resources(adapter->rx_ring[i]);
3269 * igb_clean_rx_ring - Free Rx Buffers per Queue
3270 * @rx_ring: ring to free buffers from
3272 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3277 if (!rx_ring->rx_buffer_info)
3280 /* Free all the Rx ring sk_buffs */
3281 for (i = 0; i < rx_ring->count; i++) {
3282 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3283 if (buffer_info->dma) {
3284 dma_unmap_single(rx_ring->dev,
3288 buffer_info->dma = 0;
3291 if (buffer_info->skb) {
3292 dev_kfree_skb(buffer_info->skb);
3293 buffer_info->skb = NULL;
3295 if (buffer_info->page_dma) {
3296 dma_unmap_page(rx_ring->dev,
3297 buffer_info->page_dma,
3300 buffer_info->page_dma = 0;
3302 if (buffer_info->page) {
3303 put_page(buffer_info->page);
3304 buffer_info->page = NULL;
3305 buffer_info->page_offset = 0;
3309 size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3310 memset(rx_ring->rx_buffer_info, 0, size);
3312 /* Zero out the descriptor ring */
3313 memset(rx_ring->desc, 0, rx_ring->size);
3315 rx_ring->next_to_clean = 0;
3316 rx_ring->next_to_use = 0;
3320 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3321 * @adapter: board private structure
3323 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3327 for (i = 0; i < adapter->num_rx_queues; i++)
3328 igb_clean_rx_ring(adapter->rx_ring[i]);
3332 * igb_set_mac - Change the Ethernet Address of the NIC
3333 * @netdev: network interface device structure
3334 * @p: pointer to an address structure
3336 * Returns 0 on success, negative on failure
3338 static int igb_set_mac(struct net_device *netdev, void *p)
3340 struct igb_adapter *adapter = netdev_priv(netdev);
3341 struct e1000_hw *hw = &adapter->hw;
3342 struct sockaddr *addr = p;
3344 if (!is_valid_ether_addr(addr->sa_data))
3345 return -EADDRNOTAVAIL;
3347 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3348 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3350 /* set the correct pool for the new PF MAC address in entry 0 */
3351 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3352 adapter->vfs_allocated_count);
3358 * igb_write_mc_addr_list - write multicast addresses to MTA
3359 * @netdev: network interface device structure
3361 * Writes multicast address list to the MTA hash table.
3362 * Returns: -ENOMEM on failure
3363 * 0 on no addresses written
3364 * X on writing X addresses to MTA
3366 static int igb_write_mc_addr_list(struct net_device *netdev)
3368 struct igb_adapter *adapter = netdev_priv(netdev);
3369 struct e1000_hw *hw = &adapter->hw;
3370 struct netdev_hw_addr *ha;
3374 if (netdev_mc_empty(netdev)) {
3375 /* nothing to program, so clear mc list */
3376 igb_update_mc_addr_list(hw, NULL, 0);
3377 igb_restore_vf_multicasts(adapter);
3381 mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3385 /* The shared function expects a packed array of only addresses. */
3387 netdev_for_each_mc_addr(ha, netdev)
3388 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3390 igb_update_mc_addr_list(hw, mta_list, i);
3393 return netdev_mc_count(netdev);
3397 * igb_write_uc_addr_list - write unicast addresses to RAR table
3398 * @netdev: network interface device structure
3400 * Writes unicast address list to the RAR table.
3401 * Returns: -ENOMEM on failure/insufficient address space
3402 * 0 on no addresses written
3403 * X on writing X addresses to the RAR table
3405 static int igb_write_uc_addr_list(struct net_device *netdev)
3407 struct igb_adapter *adapter = netdev_priv(netdev);
3408 struct e1000_hw *hw = &adapter->hw;
3409 unsigned int vfn = adapter->vfs_allocated_count;
3410 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3413 /* return ENOMEM indicating insufficient memory for addresses */
3414 if (netdev_uc_count(netdev) > rar_entries)
3417 if (!netdev_uc_empty(netdev) && rar_entries) {
3418 struct netdev_hw_addr *ha;
3420 netdev_for_each_uc_addr(ha, netdev) {
3423 igb_rar_set_qsel(adapter, ha->addr,
3429 /* write the addresses in reverse order to avoid write combining */
3430 for (; rar_entries > 0 ; rar_entries--) {
3431 wr32(E1000_RAH(rar_entries), 0);
3432 wr32(E1000_RAL(rar_entries), 0);
3440 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3441 * @netdev: network interface device structure
3443 * The set_rx_mode entry point is called whenever the unicast or multicast
3444 * address lists or the network interface flags are updated. This routine is
3445 * responsible for configuring the hardware for proper unicast, multicast,
3446 * promiscuous mode, and all-multi behavior.
3448 static void igb_set_rx_mode(struct net_device *netdev)
3450 struct igb_adapter *adapter = netdev_priv(netdev);
3451 struct e1000_hw *hw = &adapter->hw;
3452 unsigned int vfn = adapter->vfs_allocated_count;
3453 u32 rctl, vmolr = 0;
3456 /* Check for Promiscuous and All Multicast modes */
3457 rctl = rd32(E1000_RCTL);
3459 /* clear the effected bits */
3460 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3462 if (netdev->flags & IFF_PROMISC) {
3463 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3464 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3466 if (netdev->flags & IFF_ALLMULTI) {
3467 rctl |= E1000_RCTL_MPE;
3468 vmolr |= E1000_VMOLR_MPME;
3471 * Write addresses to the MTA, if the attempt fails
3472 * then we should just turn on promiscuous mode so
3473 * that we can at least receive multicast traffic
3475 count = igb_write_mc_addr_list(netdev);
3477 rctl |= E1000_RCTL_MPE;
3478 vmolr |= E1000_VMOLR_MPME;
3480 vmolr |= E1000_VMOLR_ROMPE;
3484 * Write addresses to available RAR registers, if there is not
3485 * sufficient space to store all the addresses then enable
3486 * unicast promiscuous mode
3488 count = igb_write_uc_addr_list(netdev);
3490 rctl |= E1000_RCTL_UPE;
3491 vmolr |= E1000_VMOLR_ROPE;
3493 rctl |= E1000_RCTL_VFE;
3495 wr32(E1000_RCTL, rctl);
3498 * In order to support SR-IOV and eventually VMDq it is necessary to set
3499 * the VMOLR to enable the appropriate modes. Without this workaround
3500 * we will have issues with VLAN tag stripping not being done for frames
3501 * that are only arriving because we are the default pool
3503 if (hw->mac.type < e1000_82576)
3506 vmolr |= rd32(E1000_VMOLR(vfn)) &
3507 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3508 wr32(E1000_VMOLR(vfn), vmolr);
3509 igb_restore_vf_multicasts(adapter);
3512 static void igb_check_wvbr(struct igb_adapter *adapter)
3514 struct e1000_hw *hw = &adapter->hw;
3517 switch (hw->mac.type) {
3520 if (!(wvbr = rd32(E1000_WVBR)))
3527 adapter->wvbr |= wvbr;
3530 #define IGB_STAGGERED_QUEUE_OFFSET 8
3532 static void igb_spoof_check(struct igb_adapter *adapter)
3539 for(j = 0; j < adapter->vfs_allocated_count; j++) {
3540 if (adapter->wvbr & (1 << j) ||
3541 adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3542 dev_warn(&adapter->pdev->dev,
3543 "Spoof event(s) detected on VF %d\n", j);
3546 (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3551 /* Need to wait a few seconds after link up to get diagnostic information from
3553 static void igb_update_phy_info(unsigned long data)
3555 struct igb_adapter *adapter = (struct igb_adapter *) data;
3556 igb_get_phy_info(&adapter->hw);
3560 * igb_has_link - check shared code for link and determine up/down
3561 * @adapter: pointer to driver private info
3563 bool igb_has_link(struct igb_adapter *adapter)
3565 struct e1000_hw *hw = &adapter->hw;
3566 bool link_active = false;
3569 /* get_link_status is set on LSC (link status) interrupt or
3570 * rx sequence error interrupt. get_link_status will stay
3571 * false until the e1000_check_for_link establishes link
3572 * for copper adapters ONLY
3574 switch (hw->phy.media_type) {
3575 case e1000_media_type_copper:
3576 if (hw->mac.get_link_status) {
3577 ret_val = hw->mac.ops.check_for_link(hw);
3578 link_active = !hw->mac.get_link_status;
3583 case e1000_media_type_internal_serdes:
3584 ret_val = hw->mac.ops.check_for_link(hw);
3585 link_active = hw->mac.serdes_has_link;
3588 case e1000_media_type_unknown:
3595 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3598 u32 ctrl_ext, thstat;
3600 /* check for thermal sensor event on i350, copper only */
3601 if (hw->mac.type == e1000_i350) {
3602 thstat = rd32(E1000_THSTAT);
3603 ctrl_ext = rd32(E1000_CTRL_EXT);
3605 if ((hw->phy.media_type == e1000_media_type_copper) &&
3606 !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3607 ret = !!(thstat & event);
3615 * igb_watchdog - Timer Call-back
3616 * @data: pointer to adapter cast into an unsigned long
3618 static void igb_watchdog(unsigned long data)
3620 struct igb_adapter *adapter = (struct igb_adapter *)data;
3621 /* Do the rest outside of interrupt context */
3622 schedule_work(&adapter->watchdog_task);
3625 static void igb_watchdog_task(struct work_struct *work)
3627 struct igb_adapter *adapter = container_of(work,
3630 struct e1000_hw *hw = &adapter->hw;
3631 struct net_device *netdev = adapter->netdev;
3635 link = igb_has_link(adapter);
3637 if (!netif_carrier_ok(netdev)) {
3639 hw->mac.ops.get_speed_and_duplex(hw,
3640 &adapter->link_speed,
3641 &adapter->link_duplex);
3643 ctrl = rd32(E1000_CTRL);
3644 /* Links status message must follow this format */
3645 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3646 "Flow Control: %s\n",
3648 adapter->link_speed,
3649 adapter->link_duplex == FULL_DUPLEX ?
3650 "Full Duplex" : "Half Duplex",
3651 ((ctrl & E1000_CTRL_TFCE) &&
3652 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3653 ((ctrl & E1000_CTRL_RFCE) ? "RX" :
3654 ((ctrl & E1000_CTRL_TFCE) ? "TX" : "None")));
3656 /* check for thermal sensor event */
3657 if (igb_thermal_sensor_event(hw, E1000_THSTAT_LINK_THROTTLE)) {
3658 printk(KERN_INFO "igb: %s The network adapter "
3659 "link speed was downshifted "
3660 "because it overheated.\n",
3664 /* adjust timeout factor according to speed/duplex */
3665 adapter->tx_timeout_factor = 1;
3666 switch (adapter->link_speed) {
3668 adapter->tx_timeout_factor = 14;
3671 /* maybe add some timeout factor ? */
3675 netif_carrier_on(netdev);
3677 igb_ping_all_vfs(adapter);
3678 igb_check_vf_rate_limit(adapter);
3680 /* link state has changed, schedule phy info update */
3681 if (!test_bit(__IGB_DOWN, &adapter->state))
3682 mod_timer(&adapter->phy_info_timer,
3683 round_jiffies(jiffies + 2 * HZ));
3686 if (netif_carrier_ok(netdev)) {
3687 adapter->link_speed = 0;
3688 adapter->link_duplex = 0;
3690 /* check for thermal sensor event */
3691 if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) {
3692 printk(KERN_ERR "igb: %s The network adapter "
3693 "was stopped because it "
3698 /* Links status message must follow this format */
3699 printk(KERN_INFO "igb: %s NIC Link is Down\n",
3701 netif_carrier_off(netdev);
3703 igb_ping_all_vfs(adapter);
3705 /* link state has changed, schedule phy info update */
3706 if (!test_bit(__IGB_DOWN, &adapter->state))
3707 mod_timer(&adapter->phy_info_timer,
3708 round_jiffies(jiffies + 2 * HZ));
3712 spin_lock(&adapter->stats64_lock);
3713 igb_update_stats(adapter, &adapter->stats64);
3714 spin_unlock(&adapter->stats64_lock);
3716 for (i = 0; i < adapter->num_tx_queues; i++) {
3717 struct igb_ring *tx_ring = adapter->tx_ring[i];
3718 if (!netif_carrier_ok(netdev)) {
3719 /* We've lost link, so the controller stops DMA,
3720 * but we've got queued Tx work that's never going
3721 * to get done, so reset controller to flush Tx.
3722 * (Do the reset outside of interrupt context). */
3723 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3724 adapter->tx_timeout_count++;
3725 schedule_work(&adapter->reset_task);
3726 /* return immediately since reset is imminent */
3731 /* Force detection of hung controller every watchdog period */
3732 set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
3735 /* Cause software interrupt to ensure rx ring is cleaned */
3736 if (adapter->msix_entries) {
3738 for (i = 0; i < adapter->num_q_vectors; i++)
3739 eics |= adapter->q_vector[i]->eims_value;
3740 wr32(E1000_EICS, eics);
3742 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3745 igb_spoof_check(adapter);
3747 /* Reset the timer */
3748 if (!test_bit(__IGB_DOWN, &adapter->state))
3749 mod_timer(&adapter->watchdog_timer,
3750 round_jiffies(jiffies + 2 * HZ));
3753 enum latency_range {
3757 latency_invalid = 255
3761 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3763 * Stores a new ITR value based on strictly on packet size. This
3764 * algorithm is less sophisticated than that used in igb_update_itr,
3765 * due to the difficulty of synchronizing statistics across multiple
3766 * receive rings. The divisors and thresholds used by this function
3767 * were determined based on theoretical maximum wire speed and testing
3768 * data, in order to minimize response time while increasing bulk
3770 * This functionality is controlled by the InterruptThrottleRate module
3771 * parameter (see igb_param.c)
3772 * NOTE: This function is called only when operating in a multiqueue
3773 * receive environment.
3774 * @q_vector: pointer to q_vector
3776 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3778 int new_val = q_vector->itr_val;
3779 int avg_wire_size = 0;
3780 struct igb_adapter *adapter = q_vector->adapter;
3781 unsigned int packets;
3783 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3784 * ints/sec - ITR timer value of 120 ticks.
3786 if (adapter->link_speed != SPEED_1000) {
3787 new_val = IGB_4K_ITR;
3791 packets = q_vector->rx.total_packets;
3793 avg_wire_size = q_vector->rx.total_bytes / packets;
3795 packets = q_vector->tx.total_packets;
3797 avg_wire_size = max_t(u32, avg_wire_size,
3798 q_vector->tx.total_bytes / packets);
3800 /* if avg_wire_size isn't set no work was done */
3804 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3805 avg_wire_size += 24;
3807 /* Don't starve jumbo frames */
3808 avg_wire_size = min(avg_wire_size, 3000);
3810 /* Give a little boost to mid-size frames */
3811 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3812 new_val = avg_wire_size / 3;
3814 new_val = avg_wire_size / 2;
3816 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3817 if (new_val < IGB_20K_ITR &&
3818 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3819 (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3820 new_val = IGB_20K_ITR;
3823 if (new_val != q_vector->itr_val) {
3824 q_vector->itr_val = new_val;
3825 q_vector->set_itr = 1;
3828 q_vector->rx.total_bytes = 0;
3829 q_vector->rx.total_packets = 0;
3830 q_vector->tx.total_bytes = 0;
3831 q_vector->tx.total_packets = 0;
3835 * igb_update_itr - update the dynamic ITR value based on statistics
3836 * Stores a new ITR value based on packets and byte
3837 * counts during the last interrupt. The advantage of per interrupt
3838 * computation is faster updates and more accurate ITR for the current
3839 * traffic pattern. Constants in this function were computed
3840 * based on theoretical maximum wire speed and thresholds were set based
3841 * on testing data as well as attempting to minimize response time
3842 * while increasing bulk throughput.
3843 * this functionality is controlled by the InterruptThrottleRate module
3844 * parameter (see igb_param.c)
3845 * NOTE: These calculations are only valid when operating in a single-
3846 * queue environment.
3847 * @q_vector: pointer to q_vector
3848 * @ring_container: ring info to update the itr for
3850 static void igb_update_itr(struct igb_q_vector *q_vector,
3851 struct igb_ring_container *ring_container)
3853 unsigned int packets = ring_container->total_packets;
3854 unsigned int bytes = ring_container->total_bytes;
3855 u8 itrval = ring_container->itr;
3857 /* no packets, exit with status unchanged */
3862 case lowest_latency:
3863 /* handle TSO and jumbo frames */
3864 if (bytes/packets > 8000)
3865 itrval = bulk_latency;
3866 else if ((packets < 5) && (bytes > 512))
3867 itrval = low_latency;
3869 case low_latency: /* 50 usec aka 20000 ints/s */
3870 if (bytes > 10000) {
3871 /* this if handles the TSO accounting */
3872 if (bytes/packets > 8000) {
3873 itrval = bulk_latency;
3874 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3875 itrval = bulk_latency;
3876 } else if ((packets > 35)) {
3877 itrval = lowest_latency;
3879 } else if (bytes/packets > 2000) {
3880 itrval = bulk_latency;
3881 } else if (packets <= 2 && bytes < 512) {
3882 itrval = lowest_latency;
3885 case bulk_latency: /* 250 usec aka 4000 ints/s */
3886 if (bytes > 25000) {
3888 itrval = low_latency;
3889 } else if (bytes < 1500) {
3890 itrval = low_latency;
3895 /* clear work counters since we have the values we need */
3896 ring_container->total_bytes = 0;
3897 ring_container->total_packets = 0;
3899 /* write updated itr to ring container */
3900 ring_container->itr = itrval;
3903 static void igb_set_itr(struct igb_q_vector *q_vector)
3905 struct igb_adapter *adapter = q_vector->adapter;
3906 u32 new_itr = q_vector->itr_val;
3909 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3910 if (adapter->link_speed != SPEED_1000) {
3912 new_itr = IGB_4K_ITR;
3916 igb_update_itr(q_vector, &q_vector->tx);
3917 igb_update_itr(q_vector, &q_vector->rx);
3919 current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
3921 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3922 if (current_itr == lowest_latency &&
3923 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3924 (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3925 current_itr = low_latency;
3927 switch (current_itr) {
3928 /* counts and packets in update_itr are dependent on these numbers */
3929 case lowest_latency:
3930 new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
3933 new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
3936 new_itr = IGB_4K_ITR; /* 4,000 ints/sec */
3943 if (new_itr != q_vector->itr_val) {
3944 /* this attempts to bias the interrupt rate towards Bulk
3945 * by adding intermediate steps when interrupt rate is
3947 new_itr = new_itr > q_vector->itr_val ?
3948 max((new_itr * q_vector->itr_val) /
3949 (new_itr + (q_vector->itr_val >> 2)),
3952 /* Don't write the value here; it resets the adapter's
3953 * internal timer, and causes us to delay far longer than
3954 * we should between interrupts. Instead, we write the ITR
3955 * value at the beginning of the next interrupt so the timing
3956 * ends up being correct.
3958 q_vector->itr_val = new_itr;
3959 q_vector->set_itr = 1;
3963 void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
3964 u32 type_tucmd, u32 mss_l4len_idx)
3966 struct e1000_adv_tx_context_desc *context_desc;
3967 u16 i = tx_ring->next_to_use;
3969 context_desc = IGB_TX_CTXTDESC(tx_ring, i);
3972 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
3974 /* set bits to identify this as an advanced context descriptor */
3975 type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3977 /* For 82575, context index must be unique per ring. */
3978 if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
3979 mss_l4len_idx |= tx_ring->reg_idx << 4;
3981 context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens);
3982 context_desc->seqnum_seed = 0;
3983 context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd);
3984 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3987 static int igb_tso(struct igb_ring *tx_ring,
3988 struct igb_tx_buffer *first,
3991 struct sk_buff *skb = first->skb;
3992 u32 vlan_macip_lens, type_tucmd;
3993 u32 mss_l4len_idx, l4len;
3995 if (!skb_is_gso(skb))
3998 if (skb_header_cloned(skb)) {
3999 int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4004 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4005 type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4007 if (first->protocol == __constant_htons(ETH_P_IP)) {
4008 struct iphdr *iph = ip_hdr(skb);
4011 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4015 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4016 first->tx_flags |= IGB_TX_FLAGS_TSO |
4019 } else if (skb_is_gso_v6(skb)) {
4020 ipv6_hdr(skb)->payload_len = 0;
4021 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4022 &ipv6_hdr(skb)->daddr,
4024 first->tx_flags |= IGB_TX_FLAGS_TSO |
4028 /* compute header lengths */
4029 l4len = tcp_hdrlen(skb);
4030 *hdr_len = skb_transport_offset(skb) + l4len;
4032 /* update gso size and bytecount with header size */
4033 first->gso_segs = skb_shinfo(skb)->gso_segs;
4034 first->bytecount += (first->gso_segs - 1) * *hdr_len;
4037 mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4038 mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4040 /* VLAN MACLEN IPLEN */
4041 vlan_macip_lens = skb_network_header_len(skb);
4042 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4043 vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4045 igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4050 static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4052 struct sk_buff *skb = first->skb;
4053 u32 vlan_macip_lens = 0;
4054 u32 mss_l4len_idx = 0;
4057 if (skb->ip_summed != CHECKSUM_PARTIAL) {
4058 if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4062 switch (first->protocol) {
4063 case __constant_htons(ETH_P_IP):
4064 vlan_macip_lens |= skb_network_header_len(skb);
4065 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4066 l4_hdr = ip_hdr(skb)->protocol;
4068 case __constant_htons(ETH_P_IPV6):
4069 vlan_macip_lens |= skb_network_header_len(skb);
4070 l4_hdr = ipv6_hdr(skb)->nexthdr;
4073 if (unlikely(net_ratelimit())) {
4074 dev_warn(tx_ring->dev,
4075 "partial checksum but proto=%x!\n",
4083 type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4084 mss_l4len_idx = tcp_hdrlen(skb) <<
4085 E1000_ADVTXD_L4LEN_SHIFT;
4088 type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4089 mss_l4len_idx = sizeof(struct sctphdr) <<
4090 E1000_ADVTXD_L4LEN_SHIFT;
4093 mss_l4len_idx = sizeof(struct udphdr) <<
4094 E1000_ADVTXD_L4LEN_SHIFT;
4097 if (unlikely(net_ratelimit())) {
4098 dev_warn(tx_ring->dev,
4099 "partial checksum but l4 proto=%x!\n",
4105 /* update TX checksum flag */
4106 first->tx_flags |= IGB_TX_FLAGS_CSUM;
4109 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4110 vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4112 igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4115 static __le32 igb_tx_cmd_type(u32 tx_flags)
4117 /* set type for advanced descriptor with frame checksum insertion */
4118 __le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4119 E1000_ADVTXD_DCMD_IFCS |
4120 E1000_ADVTXD_DCMD_DEXT);
4122 /* set HW vlan bit if vlan is present */
4123 if (tx_flags & IGB_TX_FLAGS_VLAN)
4124 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4126 /* set timestamp bit if present */
4127 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4128 cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4130 /* set segmentation bits for TSO */
4131 if (tx_flags & IGB_TX_FLAGS_TSO)
4132 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4137 static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4138 union e1000_adv_tx_desc *tx_desc,
4139 u32 tx_flags, unsigned int paylen)
4141 u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4143 /* 82575 requires a unique index per ring if any offload is enabled */
4144 if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4145 test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4146 olinfo_status |= tx_ring->reg_idx << 4;
4148 /* insert L4 checksum */
4149 if (tx_flags & IGB_TX_FLAGS_CSUM) {
4150 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4152 /* insert IPv4 checksum */
4153 if (tx_flags & IGB_TX_FLAGS_IPV4)
4154 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4157 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4161 * The largest size we can write to the descriptor is 65535. In order to
4162 * maintain a power of two alignment we have to limit ourselves to 32K.
4164 #define IGB_MAX_TXD_PWR 15
4165 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
4167 static void igb_tx_map(struct igb_ring *tx_ring,
4168 struct igb_tx_buffer *first,
4171 struct sk_buff *skb = first->skb;
4172 struct igb_tx_buffer *tx_buffer_info;
4173 union e1000_adv_tx_desc *tx_desc;
4175 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4176 unsigned int data_len = skb->data_len;
4177 unsigned int size = skb_headlen(skb);
4178 unsigned int paylen = skb->len - hdr_len;
4180 u32 tx_flags = first->tx_flags;
4181 u16 i = tx_ring->next_to_use;
4183 tx_desc = IGB_TX_DESC(tx_ring, i);
4185 igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4186 cmd_type = igb_tx_cmd_type(tx_flags);
4188 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4189 if (dma_mapping_error(tx_ring->dev, dma))
4192 /* record length, and DMA address */
4193 first->length = size;
4195 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4198 while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4199 tx_desc->read.cmd_type_len =
4200 cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4204 if (i == tx_ring->count) {
4205 tx_desc = IGB_TX_DESC(tx_ring, 0);
4209 dma += IGB_MAX_DATA_PER_TXD;
4210 size -= IGB_MAX_DATA_PER_TXD;
4212 tx_desc->read.olinfo_status = 0;
4213 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4216 if (likely(!data_len))
4219 tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4223 if (i == tx_ring->count) {
4224 tx_desc = IGB_TX_DESC(tx_ring, 0);
4228 size = skb_frag_size(frag);
4231 dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4232 size, DMA_TO_DEVICE);
4233 if (dma_mapping_error(tx_ring->dev, dma))
4236 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4237 tx_buffer_info->length = size;
4238 tx_buffer_info->dma = dma;
4240 tx_desc->read.olinfo_status = 0;
4241 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4246 /* write last descriptor with RS and EOP bits */
4247 cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4248 tx_desc->read.cmd_type_len = cmd_type;
4250 /* set the timestamp */
4251 first->time_stamp = jiffies;
4254 * Force memory writes to complete before letting h/w know there
4255 * are new descriptors to fetch. (Only applicable for weak-ordered
4256 * memory model archs, such as IA-64).
4258 * We also need this memory barrier to make certain all of the
4259 * status bits have been updated before next_to_watch is written.
4263 /* set next_to_watch value indicating a packet is present */
4264 first->next_to_watch = tx_desc;
4267 if (i == tx_ring->count)
4270 tx_ring->next_to_use = i;
4272 writel(i, tx_ring->tail);
4274 /* we need this if more than one processor can write to our tail
4275 * at a time, it syncronizes IO on IA64/Altix systems */
4281 dev_err(tx_ring->dev, "TX DMA map failed\n");
4283 /* clear dma mappings for failed tx_buffer_info map */
4285 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4286 igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4287 if (tx_buffer_info == first)
4294 tx_ring->next_to_use = i;
4297 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4299 struct net_device *netdev = tx_ring->netdev;
4301 netif_stop_subqueue(netdev, tx_ring->queue_index);
4303 /* Herbert's original patch had:
4304 * smp_mb__after_netif_stop_queue();
4305 * but since that doesn't exist yet, just open code it. */
4308 /* We need to check again in a case another CPU has just
4309 * made room available. */
4310 if (igb_desc_unused(tx_ring) < size)
4314 netif_wake_subqueue(netdev, tx_ring->queue_index);
4316 u64_stats_update_begin(&tx_ring->tx_syncp2);
4317 tx_ring->tx_stats.restart_queue2++;
4318 u64_stats_update_end(&tx_ring->tx_syncp2);
4323 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4325 if (igb_desc_unused(tx_ring) >= size)
4327 return __igb_maybe_stop_tx(tx_ring, size);
4330 netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4331 struct igb_ring *tx_ring)
4333 struct igb_tx_buffer *first;
4336 __be16 protocol = vlan_get_protocol(skb);
4339 /* need: 1 descriptor per page,
4340 * + 2 desc gap to keep tail from touching head,
4341 * + 1 desc for skb->data,
4342 * + 1 desc for context descriptor,
4343 * otherwise try next time */
4344 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4345 /* this is a hard error */
4346 return NETDEV_TX_BUSY;
4349 /* record the location of the first descriptor for this packet */
4350 first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4352 first->bytecount = skb->len;
4353 first->gso_segs = 1;
4355 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4356 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4357 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4360 if (vlan_tx_tag_present(skb)) {
4361 tx_flags |= IGB_TX_FLAGS_VLAN;
4362 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4365 /* record initial flags and protocol */
4366 first->tx_flags = tx_flags;
4367 first->protocol = protocol;
4369 tso = igb_tso(tx_ring, first, &hdr_len);
4373 igb_tx_csum(tx_ring, first);
4375 igb_tx_map(tx_ring, first, hdr_len);
4377 /* Make sure there is space in the ring for the next send. */
4378 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4380 return NETDEV_TX_OK;
4383 igb_unmap_and_free_tx_resource(tx_ring, first);
4385 return NETDEV_TX_OK;
4388 static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4389 struct sk_buff *skb)
4391 unsigned int r_idx = skb->queue_mapping;
4393 if (r_idx >= adapter->num_tx_queues)
4394 r_idx = r_idx % adapter->num_tx_queues;
4396 return adapter->tx_ring[r_idx];
4399 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4400 struct net_device *netdev)
4402 struct igb_adapter *adapter = netdev_priv(netdev);
4404 if (test_bit(__IGB_DOWN, &adapter->state)) {
4405 dev_kfree_skb_any(skb);
4406 return NETDEV_TX_OK;
4409 if (skb->len <= 0) {
4410 dev_kfree_skb_any(skb);
4411 return NETDEV_TX_OK;
4415 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4416 * in order to meet this minimum size requirement.
4418 if (skb->len < 17) {
4419 if (skb_padto(skb, 17))
4420 return NETDEV_TX_OK;
4424 return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4428 * igb_tx_timeout - Respond to a Tx Hang
4429 * @netdev: network interface device structure
4431 static void igb_tx_timeout(struct net_device *netdev)
4433 struct igb_adapter *adapter = netdev_priv(netdev);
4434 struct e1000_hw *hw = &adapter->hw;
4436 /* Do the reset outside of interrupt context */
4437 adapter->tx_timeout_count++;
4439 if (hw->mac.type >= e1000_82580)
4440 hw->dev_spec._82575.global_device_reset = true;
4442 schedule_work(&adapter->reset_task);
4444 (adapter->eims_enable_mask & ~adapter->eims_other));
4447 static void igb_reset_task(struct work_struct *work)
4449 struct igb_adapter *adapter;
4450 adapter = container_of(work, struct igb_adapter, reset_task);
4453 netdev_err(adapter->netdev, "Reset adapter\n");
4454 igb_reinit_locked(adapter);
4458 * igb_get_stats64 - Get System Network Statistics
4459 * @netdev: network interface device structure
4460 * @stats: rtnl_link_stats64 pointer
4463 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4464 struct rtnl_link_stats64 *stats)
4466 struct igb_adapter *adapter = netdev_priv(netdev);
4468 spin_lock(&adapter->stats64_lock);
4469 igb_update_stats(adapter, &adapter->stats64);
4470 memcpy(stats, &adapter->stats64, sizeof(*stats));
4471 spin_unlock(&adapter->stats64_lock);
4477 * igb_change_mtu - Change the Maximum Transfer Unit
4478 * @netdev: network interface device structure
4479 * @new_mtu: new value for maximum frame size
4481 * Returns 0 on success, negative on failure
4483 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4485 struct igb_adapter *adapter = netdev_priv(netdev);
4486 struct pci_dev *pdev = adapter->pdev;
4487 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4489 if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4490 dev_err(&pdev->dev, "Invalid MTU setting\n");
4494 #define MAX_STD_JUMBO_FRAME_SIZE 9238
4495 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4496 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4500 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4503 /* igb_down has a dependency on max_frame_size */
4504 adapter->max_frame_size = max_frame;
4506 if (netif_running(netdev))
4509 dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4510 netdev->mtu, new_mtu);
4511 netdev->mtu = new_mtu;
4513 if (netif_running(netdev))
4518 clear_bit(__IGB_RESETTING, &adapter->state);
4524 * igb_update_stats - Update the board statistics counters
4525 * @adapter: board private structure
4528 void igb_update_stats(struct igb_adapter *adapter,
4529 struct rtnl_link_stats64 *net_stats)
4531 struct e1000_hw *hw = &adapter->hw;
4532 struct pci_dev *pdev = adapter->pdev;
4538 u64 _bytes, _packets;
4540 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4543 * Prevent stats update while adapter is being reset, or if the pci
4544 * connection is down.
4546 if (adapter->link_speed == 0)
4548 if (pci_channel_offline(pdev))
4553 for (i = 0; i < adapter->num_rx_queues; i++) {
4554 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4555 struct igb_ring *ring = adapter->rx_ring[i];
4557 ring->rx_stats.drops += rqdpc_tmp;
4558 net_stats->rx_fifo_errors += rqdpc_tmp;
4561 start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4562 _bytes = ring->rx_stats.bytes;
4563 _packets = ring->rx_stats.packets;
4564 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4566 packets += _packets;
4569 net_stats->rx_bytes = bytes;
4570 net_stats->rx_packets = packets;
4574 for (i = 0; i < adapter->num_tx_queues; i++) {
4575 struct igb_ring *ring = adapter->tx_ring[i];
4577 start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4578 _bytes = ring->tx_stats.bytes;
4579 _packets = ring->tx_stats.packets;
4580 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4582 packets += _packets;
4584 net_stats->tx_bytes = bytes;
4585 net_stats->tx_packets = packets;
4587 /* read stats registers */
4588 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4589 adapter->stats.gprc += rd32(E1000_GPRC);
4590 adapter->stats.gorc += rd32(E1000_GORCL);
4591 rd32(E1000_GORCH); /* clear GORCL */
4592 adapter->stats.bprc += rd32(E1000_BPRC);
4593 adapter->stats.mprc += rd32(E1000_MPRC);
4594 adapter->stats.roc += rd32(E1000_ROC);
4596 adapter->stats.prc64 += rd32(E1000_PRC64);
4597 adapter->stats.prc127 += rd32(E1000_PRC127);
4598 adapter->stats.prc255 += rd32(E1000_PRC255);
4599 adapter->stats.prc511 += rd32(E1000_PRC511);
4600 adapter->stats.prc1023 += rd32(E1000_PRC1023);
4601 adapter->stats.prc1522 += rd32(E1000_PRC1522);
4602 adapter->stats.symerrs += rd32(E1000_SYMERRS);
4603 adapter->stats.sec += rd32(E1000_SEC);
4605 mpc = rd32(E1000_MPC);
4606 adapter->stats.mpc += mpc;
4607 net_stats->rx_fifo_errors += mpc;
4608 adapter->stats.scc += rd32(E1000_SCC);
4609 adapter->stats.ecol += rd32(E1000_ECOL);
4610 adapter->stats.mcc += rd32(E1000_MCC);
4611 adapter->stats.latecol += rd32(E1000_LATECOL);
4612 adapter->stats.dc += rd32(E1000_DC);
4613 adapter->stats.rlec += rd32(E1000_RLEC);
4614 adapter->stats.xonrxc += rd32(E1000_XONRXC);
4615 adapter->stats.xontxc += rd32(E1000_XONTXC);
4616 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4617 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4618 adapter->stats.fcruc += rd32(E1000_FCRUC);
4619 adapter->stats.gptc += rd32(E1000_GPTC);
4620 adapter->stats.gotc += rd32(E1000_GOTCL);
4621 rd32(E1000_GOTCH); /* clear GOTCL */
4622 adapter->stats.rnbc += rd32(E1000_RNBC);
4623 adapter->stats.ruc += rd32(E1000_RUC);
4624 adapter->stats.rfc += rd32(E1000_RFC);
4625 adapter->stats.rjc += rd32(E1000_RJC);
4626 adapter->stats.tor += rd32(E1000_TORH);
4627 adapter->stats.tot += rd32(E1000_TOTH);
4628 adapter->stats.tpr += rd32(E1000_TPR);
4630 adapter->stats.ptc64 += rd32(E1000_PTC64);
4631 adapter->stats.ptc127 += rd32(E1000_PTC127);
4632 adapter->stats.ptc255 += rd32(E1000_PTC255);
4633 adapter->stats.ptc511 += rd32(E1000_PTC511);
4634 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4635 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4637 adapter->stats.mptc += rd32(E1000_MPTC);
4638 adapter->stats.bptc += rd32(E1000_BPTC);
4640 adapter->stats.tpt += rd32(E1000_TPT);
4641 adapter->stats.colc += rd32(E1000_COLC);
4643 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4644 /* read internal phy specific stats */
4645 reg = rd32(E1000_CTRL_EXT);
4646 if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4647 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4648 adapter->stats.tncrs += rd32(E1000_TNCRS);
4651 adapter->stats.tsctc += rd32(E1000_TSCTC);
4652 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4654 adapter->stats.iac += rd32(E1000_IAC);
4655 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4656 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4657 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4658 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4659 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4660 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4661 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4662 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4664 /* Fill out the OS statistics structure */
4665 net_stats->multicast = adapter->stats.mprc;
4666 net_stats->collisions = adapter->stats.colc;
4670 /* RLEC on some newer hardware can be incorrect so build
4671 * our own version based on RUC and ROC */
4672 net_stats->rx_errors = adapter->stats.rxerrc +
4673 adapter->stats.crcerrs + adapter->stats.algnerrc +
4674 adapter->stats.ruc + adapter->stats.roc +
4675 adapter->stats.cexterr;
4676 net_stats->rx_length_errors = adapter->stats.ruc +
4678 net_stats->rx_crc_errors = adapter->stats.crcerrs;
4679 net_stats->rx_frame_errors = adapter->stats.algnerrc;
4680 net_stats->rx_missed_errors = adapter->stats.mpc;
4683 net_stats->tx_errors = adapter->stats.ecol +
4684 adapter->stats.latecol;
4685 net_stats->tx_aborted_errors = adapter->stats.ecol;
4686 net_stats->tx_window_errors = adapter->stats.latecol;
4687 net_stats->tx_carrier_errors = adapter->stats.tncrs;
4689 /* Tx Dropped needs to be maintained elsewhere */
4692 if (hw->phy.media_type == e1000_media_type_copper) {
4693 if ((adapter->link_speed == SPEED_1000) &&
4694 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4695 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4696 adapter->phy_stats.idle_errors += phy_tmp;
4700 /* Management Stats */
4701 adapter->stats.mgptc += rd32(E1000_MGTPTC);
4702 adapter->stats.mgprc += rd32(E1000_MGTPRC);
4703 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4706 reg = rd32(E1000_MANC);
4707 if (reg & E1000_MANC_EN_BMC2OS) {
4708 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4709 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4710 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4711 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4715 static irqreturn_t igb_msix_other(int irq, void *data)
4717 struct igb_adapter *adapter = data;
4718 struct e1000_hw *hw = &adapter->hw;
4719 u32 icr = rd32(E1000_ICR);
4720 /* reading ICR causes bit 31 of EICR to be cleared */
4722 if (icr & E1000_ICR_DRSTA)
4723 schedule_work(&adapter->reset_task);
4725 if (icr & E1000_ICR_DOUTSYNC) {
4726 /* HW is reporting DMA is out of sync */
4727 adapter->stats.doosync++;
4728 /* The DMA Out of Sync is also indication of a spoof event
4729 * in IOV mode. Check the Wrong VM Behavior register to
4730 * see if it is really a spoof event. */
4731 igb_check_wvbr(adapter);
4734 /* Check for a mailbox event */
4735 if (icr & E1000_ICR_VMMB)
4736 igb_msg_task(adapter);
4738 if (icr & E1000_ICR_LSC) {
4739 hw->mac.get_link_status = 1;
4740 /* guard against interrupt when we're going down */
4741 if (!test_bit(__IGB_DOWN, &adapter->state))
4742 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4745 wr32(E1000_EIMS, adapter->eims_other);
4750 static void igb_write_itr(struct igb_q_vector *q_vector)
4752 struct igb_adapter *adapter = q_vector->adapter;
4753 u32 itr_val = q_vector->itr_val & 0x7FFC;
4755 if (!q_vector->set_itr)
4761 if (adapter->hw.mac.type == e1000_82575)
4762 itr_val |= itr_val << 16;
4764 itr_val |= E1000_EITR_CNT_IGNR;
4766 writel(itr_val, q_vector->itr_register);
4767 q_vector->set_itr = 0;
4770 static irqreturn_t igb_msix_ring(int irq, void *data)
4772 struct igb_q_vector *q_vector = data;
4774 /* Write the ITR value calculated from the previous interrupt. */
4775 igb_write_itr(q_vector);
4777 napi_schedule(&q_vector->napi);
4782 #ifdef CONFIG_IGB_DCA
4783 static void igb_update_dca(struct igb_q_vector *q_vector)
4785 struct igb_adapter *adapter = q_vector->adapter;
4786 struct e1000_hw *hw = &adapter->hw;
4787 int cpu = get_cpu();
4789 if (q_vector->cpu == cpu)
4792 if (q_vector->tx.ring) {
4793 int q = q_vector->tx.ring->reg_idx;
4794 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4795 if (hw->mac.type == e1000_82575) {
4796 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4797 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4799 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4800 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4801 E1000_DCA_TXCTRL_CPUID_SHIFT;
4803 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4804 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4806 if (q_vector->rx.ring) {
4807 int q = q_vector->rx.ring->reg_idx;
4808 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4809 if (hw->mac.type == e1000_82575) {
4810 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4811 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4813 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4814 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4815 E1000_DCA_RXCTRL_CPUID_SHIFT;
4817 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4818 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4819 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4820 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4822 q_vector->cpu = cpu;
4827 static void igb_setup_dca(struct igb_adapter *adapter)
4829 struct e1000_hw *hw = &adapter->hw;
4832 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4835 /* Always use CB2 mode, difference is masked in the CB driver. */
4836 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4838 for (i = 0; i < adapter->num_q_vectors; i++) {
4839 adapter->q_vector[i]->cpu = -1;
4840 igb_update_dca(adapter->q_vector[i]);
4844 static int __igb_notify_dca(struct device *dev, void *data)
4846 struct net_device *netdev = dev_get_drvdata(dev);
4847 struct igb_adapter *adapter = netdev_priv(netdev);
4848 struct pci_dev *pdev = adapter->pdev;
4849 struct e1000_hw *hw = &adapter->hw;
4850 unsigned long event = *(unsigned long *)data;
4853 case DCA_PROVIDER_ADD:
4854 /* if already enabled, don't do it again */
4855 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4857 if (dca_add_requester(dev) == 0) {
4858 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4859 dev_info(&pdev->dev, "DCA enabled\n");
4860 igb_setup_dca(adapter);
4863 /* Fall Through since DCA is disabled. */
4864 case DCA_PROVIDER_REMOVE:
4865 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4866 /* without this a class_device is left
4867 * hanging around in the sysfs model */
4868 dca_remove_requester(dev);
4869 dev_info(&pdev->dev, "DCA disabled\n");
4870 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4871 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4879 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4884 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4887 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4889 #endif /* CONFIG_IGB_DCA */
4891 #ifdef CONFIG_PCI_IOV
4892 static int igb_vf_configure(struct igb_adapter *adapter, int vf)
4894 unsigned char mac_addr[ETH_ALEN];
4895 struct pci_dev *pdev = adapter->pdev;
4896 struct e1000_hw *hw = &adapter->hw;
4897 struct pci_dev *pvfdev;
4898 unsigned int device_id;
4901 random_ether_addr(mac_addr);
4902 igb_set_vf_mac(adapter, vf, mac_addr);
4904 switch (adapter->hw.mac.type) {
4906 device_id = IGB_82576_VF_DEV_ID;
4907 /* VF Stride for 82576 is 2 */
4908 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) |
4912 device_id = IGB_I350_VF_DEV_ID;
4913 /* VF Stride for I350 is 4 */
4914 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) |
4923 pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
4925 if (pvfdev->devfn == thisvf_devfn)
4927 pvfdev = pci_get_device(hw->vendor_id,
4932 adapter->vf_data[vf].vfdev = pvfdev;
4935 "Couldn't find pci dev ptr for VF %4.4x\n",
4937 return pvfdev != NULL;
4940 static int igb_find_enabled_vfs(struct igb_adapter *adapter)
4942 struct e1000_hw *hw = &adapter->hw;
4943 struct pci_dev *pdev = adapter->pdev;
4944 struct pci_dev *pvfdev;
4947 unsigned int device_id;
4950 switch (adapter->hw.mac.type) {
4952 device_id = IGB_82576_VF_DEV_ID;
4953 /* VF Stride for 82576 is 2 */
4957 device_id = IGB_I350_VF_DEV_ID;
4958 /* VF Stride for I350 is 4 */
4967 vf_devfn = pdev->devfn + 0x80;
4968 pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
4970 if (pvfdev->devfn == vf_devfn)
4972 vf_devfn += vf_stride;
4973 pvfdev = pci_get_device(hw->vendor_id,
4980 static int igb_check_vf_assignment(struct igb_adapter *adapter)
4983 for (i = 0; i < adapter->vfs_allocated_count; i++) {
4984 if (adapter->vf_data[i].vfdev) {
4985 if (adapter->vf_data[i].vfdev->dev_flags &
4986 PCI_DEV_FLAGS_ASSIGNED)
4994 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4996 struct e1000_hw *hw = &adapter->hw;
5000 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
5001 ping = E1000_PF_CONTROL_MSG;
5002 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
5003 ping |= E1000_VT_MSGTYPE_CTS;
5004 igb_write_mbx(hw, &ping, 1, i);
5008 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5010 struct e1000_hw *hw = &adapter->hw;
5011 u32 vmolr = rd32(E1000_VMOLR(vf));
5012 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5014 vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
5015 IGB_VF_FLAG_MULTI_PROMISC);
5016 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5018 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
5019 vmolr |= E1000_VMOLR_MPME;
5020 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
5021 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
5024 * if we have hashes and we are clearing a multicast promisc
5025 * flag we need to write the hashes to the MTA as this step
5026 * was previously skipped
5028 if (vf_data->num_vf_mc_hashes > 30) {
5029 vmolr |= E1000_VMOLR_MPME;
5030 } else if (vf_data->num_vf_mc_hashes) {
5032 vmolr |= E1000_VMOLR_ROMPE;
5033 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5034 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5038 wr32(E1000_VMOLR(vf), vmolr);
5040 /* there are flags left unprocessed, likely not supported */
5041 if (*msgbuf & E1000_VT_MSGINFO_MASK)
5048 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5049 u32 *msgbuf, u32 vf)
5051 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5052 u16 *hash_list = (u16 *)&msgbuf[1];
5053 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5056 /* salt away the number of multicast addresses assigned
5057 * to this VF for later use to restore when the PF multi cast
5060 vf_data->num_vf_mc_hashes = n;
5062 /* only up to 30 hash values supported */
5066 /* store the hashes for later use */
5067 for (i = 0; i < n; i++)
5068 vf_data->vf_mc_hashes[i] = hash_list[i];
5070 /* Flush and reset the mta with the new values */
5071 igb_set_rx_mode(adapter->netdev);
5076 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5078 struct e1000_hw *hw = &adapter->hw;
5079 struct vf_data_storage *vf_data;
5082 for (i = 0; i < adapter->vfs_allocated_count; i++) {
5083 u32 vmolr = rd32(E1000_VMOLR(i));
5084 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5086 vf_data = &adapter->vf_data[i];
5088 if ((vf_data->num_vf_mc_hashes > 30) ||
5089 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5090 vmolr |= E1000_VMOLR_MPME;
5091 } else if (vf_data->num_vf_mc_hashes) {
5092 vmolr |= E1000_VMOLR_ROMPE;
5093 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5094 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5096 wr32(E1000_VMOLR(i), vmolr);
5100 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5102 struct e1000_hw *hw = &adapter->hw;
5103 u32 pool_mask, reg, vid;
5106 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5108 /* Find the vlan filter for this id */
5109 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5110 reg = rd32(E1000_VLVF(i));
5112 /* remove the vf from the pool */
5115 /* if pool is empty then remove entry from vfta */
5116 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5117 (reg & E1000_VLVF_VLANID_ENABLE)) {
5119 vid = reg & E1000_VLVF_VLANID_MASK;
5120 igb_vfta_set(hw, vid, false);
5123 wr32(E1000_VLVF(i), reg);
5126 adapter->vf_data[vf].vlans_enabled = 0;
5129 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5131 struct e1000_hw *hw = &adapter->hw;
5134 /* The vlvf table only exists on 82576 hardware and newer */
5135 if (hw->mac.type < e1000_82576)
5138 /* we only need to do this if VMDq is enabled */
5139 if (!adapter->vfs_allocated_count)
5142 /* Find the vlan filter for this id */
5143 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5144 reg = rd32(E1000_VLVF(i));
5145 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5146 vid == (reg & E1000_VLVF_VLANID_MASK))
5151 if (i == E1000_VLVF_ARRAY_SIZE) {
5152 /* Did not find a matching VLAN ID entry that was
5153 * enabled. Search for a free filter entry, i.e.
5154 * one without the enable bit set
5156 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5157 reg = rd32(E1000_VLVF(i));
5158 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5162 if (i < E1000_VLVF_ARRAY_SIZE) {
5163 /* Found an enabled/available entry */
5164 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5166 /* if !enabled we need to set this up in vfta */
5167 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5168 /* add VID to filter table */
5169 igb_vfta_set(hw, vid, true);
5170 reg |= E1000_VLVF_VLANID_ENABLE;
5172 reg &= ~E1000_VLVF_VLANID_MASK;
5174 wr32(E1000_VLVF(i), reg);
5176 /* do not modify RLPML for PF devices */
5177 if (vf >= adapter->vfs_allocated_count)
5180 if (!adapter->vf_data[vf].vlans_enabled) {
5182 reg = rd32(E1000_VMOLR(vf));
5183 size = reg & E1000_VMOLR_RLPML_MASK;
5185 reg &= ~E1000_VMOLR_RLPML_MASK;
5187 wr32(E1000_VMOLR(vf), reg);
5190 adapter->vf_data[vf].vlans_enabled++;
5193 if (i < E1000_VLVF_ARRAY_SIZE) {
5194 /* remove vf from the pool */
5195 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5196 /* if pool is empty then remove entry from vfta */
5197 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5199 igb_vfta_set(hw, vid, false);
5201 wr32(E1000_VLVF(i), reg);
5203 /* do not modify RLPML for PF devices */
5204 if (vf >= adapter->vfs_allocated_count)
5207 adapter->vf_data[vf].vlans_enabled--;
5208 if (!adapter->vf_data[vf].vlans_enabled) {
5210 reg = rd32(E1000_VMOLR(vf));
5211 size = reg & E1000_VMOLR_RLPML_MASK;
5213 reg &= ~E1000_VMOLR_RLPML_MASK;
5215 wr32(E1000_VMOLR(vf), reg);
5222 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5224 struct e1000_hw *hw = &adapter->hw;
5227 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5229 wr32(E1000_VMVIR(vf), 0);
5232 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5233 int vf, u16 vlan, u8 qos)
5236 struct igb_adapter *adapter = netdev_priv(netdev);
5238 if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5241 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5244 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5245 igb_set_vmolr(adapter, vf, !vlan);
5246 adapter->vf_data[vf].pf_vlan = vlan;
5247 adapter->vf_data[vf].pf_qos = qos;
5248 dev_info(&adapter->pdev->dev,
5249 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5250 if (test_bit(__IGB_DOWN, &adapter->state)) {
5251 dev_warn(&adapter->pdev->dev,
5252 "The VF VLAN has been set,"
5253 " but the PF device is not up.\n");
5254 dev_warn(&adapter->pdev->dev,
5255 "Bring the PF device up before"
5256 " attempting to use the VF device.\n");
5259 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5261 igb_set_vmvir(adapter, vlan, vf);
5262 igb_set_vmolr(adapter, vf, true);
5263 adapter->vf_data[vf].pf_vlan = 0;
5264 adapter->vf_data[vf].pf_qos = 0;
5270 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5272 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5273 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5275 return igb_vlvf_set(adapter, vid, add, vf);
5278 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5280 /* clear flags - except flag that indicates PF has set the MAC */
5281 adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5282 adapter->vf_data[vf].last_nack = jiffies;
5284 /* reset offloads to defaults */
5285 igb_set_vmolr(adapter, vf, true);
5287 /* reset vlans for device */
5288 igb_clear_vf_vfta(adapter, vf);
5289 if (adapter->vf_data[vf].pf_vlan)
5290 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5291 adapter->vf_data[vf].pf_vlan,
5292 adapter->vf_data[vf].pf_qos);
5294 igb_clear_vf_vfta(adapter, vf);
5296 /* reset multicast table array for vf */
5297 adapter->vf_data[vf].num_vf_mc_hashes = 0;
5299 /* Flush and reset the mta with the new values */
5300 igb_set_rx_mode(adapter->netdev);
5303 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5305 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5307 /* generate a new mac address as we were hotplug removed/added */
5308 if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5309 random_ether_addr(vf_mac);
5311 /* process remaining reset events */
5312 igb_vf_reset(adapter, vf);
5315 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5317 struct e1000_hw *hw = &adapter->hw;
5318 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5319 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5321 u8 *addr = (u8 *)(&msgbuf[1]);
5323 /* process all the same items cleared in a function level reset */
5324 igb_vf_reset(adapter, vf);
5326 /* set vf mac address */
5327 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5329 /* enable transmit and receive for vf */
5330 reg = rd32(E1000_VFTE);
5331 wr32(E1000_VFTE, reg | (1 << vf));
5332 reg = rd32(E1000_VFRE);
5333 wr32(E1000_VFRE, reg | (1 << vf));
5335 adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5337 /* reply to reset with ack and vf mac address */
5338 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5339 memcpy(addr, vf_mac, 6);
5340 igb_write_mbx(hw, msgbuf, 3, vf);
5343 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5346 * The VF MAC Address is stored in a packed array of bytes
5347 * starting at the second 32 bit word of the msg array
5349 unsigned char *addr = (char *)&msg[1];
5352 if (is_valid_ether_addr(addr))
5353 err = igb_set_vf_mac(adapter, vf, addr);
5358 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5360 struct e1000_hw *hw = &adapter->hw;
5361 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5362 u32 msg = E1000_VT_MSGTYPE_NACK;
5364 /* if device isn't clear to send it shouldn't be reading either */
5365 if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5366 time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5367 igb_write_mbx(hw, &msg, 1, vf);
5368 vf_data->last_nack = jiffies;
5372 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5374 struct pci_dev *pdev = adapter->pdev;
5375 u32 msgbuf[E1000_VFMAILBOX_SIZE];
5376 struct e1000_hw *hw = &adapter->hw;
5377 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5380 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5383 /* if receive failed revoke VF CTS stats and restart init */
5384 dev_err(&pdev->dev, "Error receiving message from VF\n");
5385 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5386 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5391 /* this is a message we already processed, do nothing */
5392 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5396 * until the vf completes a reset it should not be
5397 * allowed to start any configuration.
5400 if (msgbuf[0] == E1000_VF_RESET) {
5401 igb_vf_reset_msg(adapter, vf);
5405 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5406 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5412 switch ((msgbuf[0] & 0xFFFF)) {
5413 case E1000_VF_SET_MAC_ADDR:
5415 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5416 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5418 dev_warn(&pdev->dev,
5419 "VF %d attempted to override administratively "
5420 "set MAC address\nReload the VF driver to "
5421 "resume operations\n", vf);
5423 case E1000_VF_SET_PROMISC:
5424 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5426 case E1000_VF_SET_MULTICAST:
5427 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5429 case E1000_VF_SET_LPE:
5430 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5432 case E1000_VF_SET_VLAN:
5434 if (vf_data->pf_vlan)
5435 dev_warn(&pdev->dev,
5436 "VF %d attempted to override administratively "
5437 "set VLAN tag\nReload the VF driver to "
5438 "resume operations\n", vf);
5440 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5443 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5448 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5450 /* notify the VF of the results of what it sent us */
5452 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5454 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5456 igb_write_mbx(hw, msgbuf, 1, vf);
5459 static void igb_msg_task(struct igb_adapter *adapter)
5461 struct e1000_hw *hw = &adapter->hw;
5464 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5465 /* process any reset requests */
5466 if (!igb_check_for_rst(hw, vf))
5467 igb_vf_reset_event(adapter, vf);
5469 /* process any messages pending */
5470 if (!igb_check_for_msg(hw, vf))
5471 igb_rcv_msg_from_vf(adapter, vf);
5473 /* process any acks */
5474 if (!igb_check_for_ack(hw, vf))
5475 igb_rcv_ack_from_vf(adapter, vf);
5480 * igb_set_uta - Set unicast filter table address
5481 * @adapter: board private structure
5483 * The unicast table address is a register array of 32-bit registers.
5484 * The table is meant to be used in a way similar to how the MTA is used
5485 * however due to certain limitations in the hardware it is necessary to
5486 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5487 * enable bit to allow vlan tag stripping when promiscuous mode is enabled
5489 static void igb_set_uta(struct igb_adapter *adapter)
5491 struct e1000_hw *hw = &adapter->hw;
5494 /* The UTA table only exists on 82576 hardware and newer */
5495 if (hw->mac.type < e1000_82576)
5498 /* we only need to do this if VMDq is enabled */
5499 if (!adapter->vfs_allocated_count)
5502 for (i = 0; i < hw->mac.uta_reg_count; i++)
5503 array_wr32(E1000_UTA, i, ~0);
5507 * igb_intr_msi - Interrupt Handler
5508 * @irq: interrupt number
5509 * @data: pointer to a network interface device structure
5511 static irqreturn_t igb_intr_msi(int irq, void *data)
5513 struct igb_adapter *adapter = data;
5514 struct igb_q_vector *q_vector = adapter->q_vector[0];
5515 struct e1000_hw *hw = &adapter->hw;
5516 /* read ICR disables interrupts using IAM */
5517 u32 icr = rd32(E1000_ICR);
5519 igb_write_itr(q_vector);
5521 if (icr & E1000_ICR_DRSTA)
5522 schedule_work(&adapter->reset_task);
5524 if (icr & E1000_ICR_DOUTSYNC) {
5525 /* HW is reporting DMA is out of sync */
5526 adapter->stats.doosync++;
5529 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5530 hw->mac.get_link_status = 1;
5531 if (!test_bit(__IGB_DOWN, &adapter->state))
5532 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5535 napi_schedule(&q_vector->napi);
5541 * igb_intr - Legacy Interrupt Handler
5542 * @irq: interrupt number
5543 * @data: pointer to a network interface device structure
5545 static irqreturn_t igb_intr(int irq, void *data)
5547 struct igb_adapter *adapter = data;
5548 struct igb_q_vector *q_vector = adapter->q_vector[0];
5549 struct e1000_hw *hw = &adapter->hw;
5550 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
5551 * need for the IMC write */
5552 u32 icr = rd32(E1000_ICR);
5554 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5555 * not set, then the adapter didn't send an interrupt */
5556 if (!(icr & E1000_ICR_INT_ASSERTED))
5559 igb_write_itr(q_vector);
5561 if (icr & E1000_ICR_DRSTA)
5562 schedule_work(&adapter->reset_task);
5564 if (icr & E1000_ICR_DOUTSYNC) {
5565 /* HW is reporting DMA is out of sync */
5566 adapter->stats.doosync++;
5569 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5570 hw->mac.get_link_status = 1;
5571 /* guard against interrupt when we're going down */
5572 if (!test_bit(__IGB_DOWN, &adapter->state))
5573 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5576 napi_schedule(&q_vector->napi);
5581 void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5583 struct igb_adapter *adapter = q_vector->adapter;
5584 struct e1000_hw *hw = &adapter->hw;
5586 if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
5587 (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
5588 if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
5589 igb_set_itr(q_vector);
5591 igb_update_ring_itr(q_vector);
5594 if (!test_bit(__IGB_DOWN, &adapter->state)) {
5595 if (adapter->msix_entries)
5596 wr32(E1000_EIMS, q_vector->eims_value);
5598 igb_irq_enable(adapter);
5603 * igb_poll - NAPI Rx polling callback
5604 * @napi: napi polling structure
5605 * @budget: count of how many packets we should handle
5607 static int igb_poll(struct napi_struct *napi, int budget)
5609 struct igb_q_vector *q_vector = container_of(napi,
5610 struct igb_q_vector,
5612 bool clean_complete = true;
5614 #ifdef CONFIG_IGB_DCA
5615 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5616 igb_update_dca(q_vector);
5618 if (q_vector->tx.ring)
5619 clean_complete = igb_clean_tx_irq(q_vector);
5621 if (q_vector->rx.ring)
5622 clean_complete &= igb_clean_rx_irq(q_vector, budget);
5624 /* If all work not completed, return budget and keep polling */
5625 if (!clean_complete)
5628 /* If not enough Rx work done, exit the polling mode */
5629 napi_complete(napi);
5630 igb_ring_irq_enable(q_vector);
5636 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5637 * @adapter: board private structure
5638 * @shhwtstamps: timestamp structure to update
5639 * @regval: unsigned 64bit system time value.
5641 * We need to convert the system time value stored in the RX/TXSTMP registers
5642 * into a hwtstamp which can be used by the upper level timestamping functions
5644 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5645 struct skb_shared_hwtstamps *shhwtstamps,
5651 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5652 * 24 to match clock shift we setup earlier.
5654 if (adapter->hw.mac.type >= e1000_82580)
5655 regval <<= IGB_82580_TSYNC_SHIFT;
5657 ns = timecounter_cyc2time(&adapter->clock, regval);
5658 timecompare_update(&adapter->compare, ns);
5659 memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5660 shhwtstamps->hwtstamp = ns_to_ktime(ns);
5661 shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5665 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5666 * @q_vector: pointer to q_vector containing needed info
5667 * @buffer: pointer to igb_tx_buffer structure
5669 * If we were asked to do hardware stamping and such a time stamp is
5670 * available, then it must have been for this skb here because we only
5671 * allow only one such packet into the queue.
5673 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5674 struct igb_tx_buffer *buffer_info)
5676 struct igb_adapter *adapter = q_vector->adapter;
5677 struct e1000_hw *hw = &adapter->hw;
5678 struct skb_shared_hwtstamps shhwtstamps;
5681 /* if skb does not support hw timestamp or TX stamp not valid exit */
5682 if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5683 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5686 regval = rd32(E1000_TXSTMPL);
5687 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5689 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5690 skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5694 * igb_clean_tx_irq - Reclaim resources after transmit completes
5695 * @q_vector: pointer to q_vector containing needed info
5696 * returns true if ring is completely cleaned
5698 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5700 struct igb_adapter *adapter = q_vector->adapter;
5701 struct igb_ring *tx_ring = q_vector->tx.ring;
5702 struct igb_tx_buffer *tx_buffer;
5703 union e1000_adv_tx_desc *tx_desc, *eop_desc;
5704 unsigned int total_bytes = 0, total_packets = 0;
5705 unsigned int budget = q_vector->tx.work_limit;
5706 unsigned int i = tx_ring->next_to_clean;
5708 if (test_bit(__IGB_DOWN, &adapter->state))
5711 tx_buffer = &tx_ring->tx_buffer_info[i];
5712 tx_desc = IGB_TX_DESC(tx_ring, i);
5713 i -= tx_ring->count;
5715 for (; budget; budget--) {
5716 eop_desc = tx_buffer->next_to_watch;
5718 /* prevent any other reads prior to eop_desc */
5721 /* if next_to_watch is not set then there is no work pending */
5725 /* if DD is not set pending work has not been completed */
5726 if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5729 /* clear next_to_watch to prevent false hangs */
5730 tx_buffer->next_to_watch = NULL;
5732 /* update the statistics for this packet */
5733 total_bytes += tx_buffer->bytecount;
5734 total_packets += tx_buffer->gso_segs;
5736 /* retrieve hardware timestamp */
5737 igb_tx_hwtstamp(q_vector, tx_buffer);
5740 dev_kfree_skb_any(tx_buffer->skb);
5741 tx_buffer->skb = NULL;
5743 /* unmap skb header data */
5744 dma_unmap_single(tx_ring->dev,
5749 /* clear last DMA location and unmap remaining buffers */
5750 while (tx_desc != eop_desc) {
5757 i -= tx_ring->count;
5758 tx_buffer = tx_ring->tx_buffer_info;
5759 tx_desc = IGB_TX_DESC(tx_ring, 0);
5762 /* unmap any remaining paged data */
5763 if (tx_buffer->dma) {
5764 dma_unmap_page(tx_ring->dev,
5771 /* clear last DMA location */
5774 /* move us one more past the eop_desc for start of next pkt */
5779 i -= tx_ring->count;
5780 tx_buffer = tx_ring->tx_buffer_info;
5781 tx_desc = IGB_TX_DESC(tx_ring, 0);
5785 i += tx_ring->count;
5786 tx_ring->next_to_clean = i;
5787 u64_stats_update_begin(&tx_ring->tx_syncp);
5788 tx_ring->tx_stats.bytes += total_bytes;
5789 tx_ring->tx_stats.packets += total_packets;
5790 u64_stats_update_end(&tx_ring->tx_syncp);
5791 q_vector->tx.total_bytes += total_bytes;
5792 q_vector->tx.total_packets += total_packets;
5794 if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
5795 struct e1000_hw *hw = &adapter->hw;
5797 eop_desc = tx_buffer->next_to_watch;
5799 /* Detect a transmit hang in hardware, this serializes the
5800 * check with the clearing of time_stamp and movement of i */
5801 clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5803 time_after(jiffies, tx_buffer->time_stamp +
5804 (adapter->tx_timeout_factor * HZ)) &&
5805 !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5807 /* detected Tx unit hang */
5808 dev_err(tx_ring->dev,
5809 "Detected Tx Unit Hang\n"
5813 " next_to_use <%x>\n"
5814 " next_to_clean <%x>\n"
5815 "buffer_info[next_to_clean]\n"
5816 " time_stamp <%lx>\n"
5817 " next_to_watch <%p>\n"
5819 " desc.status <%x>\n",
5820 tx_ring->queue_index,
5821 rd32(E1000_TDH(tx_ring->reg_idx)),
5822 readl(tx_ring->tail),
5823 tx_ring->next_to_use,
5824 tx_ring->next_to_clean,
5825 tx_buffer->time_stamp,
5828 eop_desc->wb.status);
5829 netif_stop_subqueue(tx_ring->netdev,
5830 tx_ring->queue_index);
5832 /* we are about to reset, no point in enabling stuff */
5837 if (unlikely(total_packets &&
5838 netif_carrier_ok(tx_ring->netdev) &&
5839 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5840 /* Make sure that anybody stopping the queue after this
5841 * sees the new next_to_clean.
5844 if (__netif_subqueue_stopped(tx_ring->netdev,
5845 tx_ring->queue_index) &&
5846 !(test_bit(__IGB_DOWN, &adapter->state))) {
5847 netif_wake_subqueue(tx_ring->netdev,
5848 tx_ring->queue_index);
5850 u64_stats_update_begin(&tx_ring->tx_syncp);
5851 tx_ring->tx_stats.restart_queue++;
5852 u64_stats_update_end(&tx_ring->tx_syncp);
5859 static inline void igb_rx_checksum(struct igb_ring *ring,
5860 union e1000_adv_rx_desc *rx_desc,
5861 struct sk_buff *skb)
5863 skb_checksum_none_assert(skb);
5865 /* Ignore Checksum bit is set */
5866 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
5869 /* Rx checksum disabled via ethtool */
5870 if (!(ring->netdev->features & NETIF_F_RXCSUM))
5873 /* TCP/UDP checksum error bit is set */
5874 if (igb_test_staterr(rx_desc,
5875 E1000_RXDEXT_STATERR_TCPE |
5876 E1000_RXDEXT_STATERR_IPE)) {
5878 * work around errata with sctp packets where the TCPE aka
5879 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5880 * packets, (aka let the stack check the crc32c)
5882 if (!((skb->len == 60) &&
5883 test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5884 u64_stats_update_begin(&ring->rx_syncp);
5885 ring->rx_stats.csum_err++;
5886 u64_stats_update_end(&ring->rx_syncp);
5888 /* let the stack verify checksum errors */
5891 /* It must be a TCP or UDP packet with a valid checksum */
5892 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
5893 E1000_RXD_STAT_UDPCS))
5894 skb->ip_summed = CHECKSUM_UNNECESSARY;
5896 dev_dbg(ring->dev, "cksum success: bits %08X\n",
5897 le32_to_cpu(rx_desc->wb.upper.status_error));
5900 static inline void igb_rx_hash(struct igb_ring *ring,
5901 union e1000_adv_rx_desc *rx_desc,
5902 struct sk_buff *skb)
5904 if (ring->netdev->features & NETIF_F_RXHASH)
5905 skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
5908 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
5909 union e1000_adv_rx_desc *rx_desc,
5910 struct sk_buff *skb)
5912 struct igb_adapter *adapter = q_vector->adapter;
5913 struct e1000_hw *hw = &adapter->hw;
5916 if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
5917 E1000_RXDADV_STAT_TS))
5921 * If this bit is set, then the RX registers contain the time stamp. No
5922 * other packet will be time stamped until we read these registers, so
5923 * read the registers to make them available again. Because only one
5924 * packet can be time stamped at a time, we know that the register
5925 * values must belong to this one here and therefore we don't need to
5926 * compare any of the additional attributes stored for it.
5928 * If nothing went wrong, then it should have a shared tx_flags that we
5929 * can turn into a skb_shared_hwtstamps.
5931 if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
5932 u32 *stamp = (u32 *)skb->data;
5933 regval = le32_to_cpu(*(stamp + 2));
5934 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5935 skb_pull(skb, IGB_TS_HDR_LEN);
5937 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5940 regval = rd32(E1000_RXSTMPL);
5941 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5944 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5947 static void igb_rx_vlan(struct igb_ring *ring,
5948 union e1000_adv_rx_desc *rx_desc,
5949 struct sk_buff *skb)
5951 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
5953 if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
5954 test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags))
5955 vid = be16_to_cpu(rx_desc->wb.upper.vlan);
5957 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
5959 __vlan_hwaccel_put_tag(skb, vid);
5963 static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
5965 /* HW will not DMA in data larger than the given buffer, even if it
5966 * parses the (NFS, of course) header to be larger. In that case, it
5967 * fills the header buffer and spills the rest into the page.
5969 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5970 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5971 if (hlen > IGB_RX_HDR_LEN)
5972 hlen = IGB_RX_HDR_LEN;
5976 static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
5978 struct igb_ring *rx_ring = q_vector->rx.ring;
5979 union e1000_adv_rx_desc *rx_desc;
5980 const int current_node = numa_node_id();
5981 unsigned int total_bytes = 0, total_packets = 0;
5982 u16 cleaned_count = igb_desc_unused(rx_ring);
5983 u16 i = rx_ring->next_to_clean;
5985 rx_desc = IGB_RX_DESC(rx_ring, i);
5987 while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
5988 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
5989 struct sk_buff *skb = buffer_info->skb;
5990 union e1000_adv_rx_desc *next_rxd;
5992 buffer_info->skb = NULL;
5993 prefetch(skb->data);
5996 if (i == rx_ring->count)
5999 next_rxd = IGB_RX_DESC(rx_ring, i);
6003 * This memory barrier is needed to keep us from reading
6004 * any other fields out of the rx_desc until we know the
6005 * RXD_STAT_DD bit is set
6009 if (!skb_is_nonlinear(skb)) {
6010 __skb_put(skb, igb_get_hlen(rx_desc));
6011 dma_unmap_single(rx_ring->dev, buffer_info->dma,
6014 buffer_info->dma = 0;
6017 if (rx_desc->wb.upper.length) {
6018 u16 length = le16_to_cpu(rx_desc->wb.upper.length);
6020 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
6022 buffer_info->page_offset,
6026 skb->data_len += length;
6027 skb->truesize += PAGE_SIZE / 2;
6029 if ((page_count(buffer_info->page) != 1) ||
6030 (page_to_nid(buffer_info->page) != current_node))
6031 buffer_info->page = NULL;
6033 get_page(buffer_info->page);
6035 dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
6036 PAGE_SIZE / 2, DMA_FROM_DEVICE);
6037 buffer_info->page_dma = 0;
6040 if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
6041 struct igb_rx_buffer *next_buffer;
6042 next_buffer = &rx_ring->rx_buffer_info[i];
6043 buffer_info->skb = next_buffer->skb;
6044 buffer_info->dma = next_buffer->dma;
6045 next_buffer->skb = skb;
6046 next_buffer->dma = 0;
6050 if (igb_test_staterr(rx_desc,
6051 E1000_RXDEXT_ERR_FRAME_ERR_MASK)) {
6052 dev_kfree_skb_any(skb);
6056 igb_rx_hwtstamp(q_vector, rx_desc, skb);
6057 igb_rx_hash(rx_ring, rx_desc, skb);
6058 igb_rx_checksum(rx_ring, rx_desc, skb);
6059 igb_rx_vlan(rx_ring, rx_desc, skb);
6061 total_bytes += skb->len;
6064 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
6066 napi_gro_receive(&q_vector->napi, skb);
6074 /* return some buffers to hardware, one at a time is too slow */
6075 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
6076 igb_alloc_rx_buffers(rx_ring, cleaned_count);
6080 /* use prefetched values */
6084 rx_ring->next_to_clean = i;
6085 u64_stats_update_begin(&rx_ring->rx_syncp);
6086 rx_ring->rx_stats.packets += total_packets;
6087 rx_ring->rx_stats.bytes += total_bytes;
6088 u64_stats_update_end(&rx_ring->rx_syncp);
6089 q_vector->rx.total_packets += total_packets;
6090 q_vector->rx.total_bytes += total_bytes;
6093 igb_alloc_rx_buffers(rx_ring, cleaned_count);
6098 static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6099 struct igb_rx_buffer *bi)
6101 struct sk_buff *skb = bi->skb;
6102 dma_addr_t dma = bi->dma;
6108 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6112 rx_ring->rx_stats.alloc_failed++;
6116 /* initialize skb for ring */
6117 skb_record_rx_queue(skb, rx_ring->queue_index);
6120 dma = dma_map_single(rx_ring->dev, skb->data,
6121 IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6123 if (dma_mapping_error(rx_ring->dev, dma)) {
6124 rx_ring->rx_stats.alloc_failed++;
6132 static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6133 struct igb_rx_buffer *bi)
6135 struct page *page = bi->page;
6136 dma_addr_t page_dma = bi->page_dma;
6137 unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6143 page = netdev_alloc_page(rx_ring->netdev);
6145 if (unlikely(!page)) {
6146 rx_ring->rx_stats.alloc_failed++;
6151 page_dma = dma_map_page(rx_ring->dev, page,
6152 page_offset, PAGE_SIZE / 2,
6155 if (dma_mapping_error(rx_ring->dev, page_dma)) {
6156 rx_ring->rx_stats.alloc_failed++;
6160 bi->page_dma = page_dma;
6161 bi->page_offset = page_offset;
6166 * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6167 * @adapter: address of board private structure
6169 void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6171 union e1000_adv_rx_desc *rx_desc;
6172 struct igb_rx_buffer *bi;
6173 u16 i = rx_ring->next_to_use;
6175 rx_desc = IGB_RX_DESC(rx_ring, i);
6176 bi = &rx_ring->rx_buffer_info[i];
6177 i -= rx_ring->count;
6179 while (cleaned_count--) {
6180 if (!igb_alloc_mapped_skb(rx_ring, bi))
6183 /* Refresh the desc even if buffer_addrs didn't change
6184 * because each write-back erases this info. */
6185 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6187 if (!igb_alloc_mapped_page(rx_ring, bi))
6190 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6196 rx_desc = IGB_RX_DESC(rx_ring, 0);
6197 bi = rx_ring->rx_buffer_info;
6198 i -= rx_ring->count;
6201 /* clear the hdr_addr for the next_to_use descriptor */
6202 rx_desc->read.hdr_addr = 0;
6205 i += rx_ring->count;
6207 if (rx_ring->next_to_use != i) {
6208 rx_ring->next_to_use = i;
6210 /* Force memory writes to complete before letting h/w
6211 * know there are new descriptors to fetch. (Only
6212 * applicable for weak-ordered memory model archs,
6213 * such as IA-64). */
6215 writel(i, rx_ring->tail);
6225 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6227 struct igb_adapter *adapter = netdev_priv(netdev);
6228 struct mii_ioctl_data *data = if_mii(ifr);
6230 if (adapter->hw.phy.media_type != e1000_media_type_copper)
6235 data->phy_id = adapter->hw.phy.addr;
6238 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6250 * igb_hwtstamp_ioctl - control hardware time stamping
6255 * Outgoing time stamping can be enabled and disabled. Play nice and
6256 * disable it when requested, although it shouldn't case any overhead
6257 * when no packet needs it. At most one packet in the queue may be
6258 * marked for time stamping, otherwise it would be impossible to tell
6259 * for sure to which packet the hardware time stamp belongs.
6261 * Incoming time stamping has to be configured via the hardware
6262 * filters. Not all combinations are supported, in particular event
6263 * type has to be specified. Matching the kind of event packet is
6264 * not supported, with the exception of "all V2 events regardless of
6268 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6269 struct ifreq *ifr, int cmd)
6271 struct igb_adapter *adapter = netdev_priv(netdev);
6272 struct e1000_hw *hw = &adapter->hw;
6273 struct hwtstamp_config config;
6274 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6275 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6276 u32 tsync_rx_cfg = 0;
6281 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6284 /* reserved for future extensions */
6288 switch (config.tx_type) {
6289 case HWTSTAMP_TX_OFF:
6291 case HWTSTAMP_TX_ON:
6297 switch (config.rx_filter) {
6298 case HWTSTAMP_FILTER_NONE:
6301 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6302 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6303 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6304 case HWTSTAMP_FILTER_ALL:
6306 * register TSYNCRXCFG must be set, therefore it is not
6307 * possible to time stamp both Sync and Delay_Req messages
6308 * => fall back to time stamping all packets
6310 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6311 config.rx_filter = HWTSTAMP_FILTER_ALL;
6313 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6314 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6315 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6318 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6319 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6320 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6323 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6324 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6325 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6326 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6329 config.rx_filter = HWTSTAMP_FILTER_SOME;
6331 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6332 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6333 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6334 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6337 config.rx_filter = HWTSTAMP_FILTER_SOME;
6339 case HWTSTAMP_FILTER_PTP_V2_EVENT:
6340 case HWTSTAMP_FILTER_PTP_V2_SYNC:
6341 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6342 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6343 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6351 if (hw->mac.type == e1000_82575) {
6352 if (tsync_rx_ctl | tsync_tx_ctl)
6358 * Per-packet timestamping only works if all packets are
6359 * timestamped, so enable timestamping in all packets as
6360 * long as one rx filter was configured.
6362 if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
6363 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6364 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6367 /* enable/disable TX */
6368 regval = rd32(E1000_TSYNCTXCTL);
6369 regval &= ~E1000_TSYNCTXCTL_ENABLED;
6370 regval |= tsync_tx_ctl;
6371 wr32(E1000_TSYNCTXCTL, regval);
6373 /* enable/disable RX */
6374 regval = rd32(E1000_TSYNCRXCTL);
6375 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6376 regval |= tsync_rx_ctl;
6377 wr32(E1000_TSYNCRXCTL, regval);
6379 /* define which PTP packets are time stamped */
6380 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6382 /* define ethertype filter for timestamped packets */
6385 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6386 E1000_ETQF_1588 | /* enable timestamping */
6387 ETH_P_1588)); /* 1588 eth protocol type */
6389 wr32(E1000_ETQF(3), 0);
6391 #define PTP_PORT 319
6392 /* L4 Queue Filter[3]: filter by destination port and protocol */
6394 u32 ftqf = (IPPROTO_UDP /* UDP */
6395 | E1000_FTQF_VF_BP /* VF not compared */
6396 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6397 | E1000_FTQF_MASK); /* mask all inputs */
6398 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6400 wr32(E1000_IMIR(3), htons(PTP_PORT));
6401 wr32(E1000_IMIREXT(3),
6402 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6403 if (hw->mac.type == e1000_82576) {
6404 /* enable source port check */
6405 wr32(E1000_SPQF(3), htons(PTP_PORT));
6406 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6408 wr32(E1000_FTQF(3), ftqf);
6410 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6414 adapter->hwtstamp_config = config;
6416 /* clear TX/RX time stamp registers, just to be sure */
6417 regval = rd32(E1000_TXSTMPH);
6418 regval = rd32(E1000_RXSTMPH);
6420 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6430 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6436 return igb_mii_ioctl(netdev, ifr, cmd);
6438 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6444 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6446 struct igb_adapter *adapter = hw->back;
6449 cap_offset = adapter->pdev->pcie_cap;
6451 return -E1000_ERR_CONFIG;
6453 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6458 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6460 struct igb_adapter *adapter = hw->back;
6463 cap_offset = adapter->pdev->pcie_cap;
6465 return -E1000_ERR_CONFIG;
6467 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6472 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features)
6474 struct igb_adapter *adapter = netdev_priv(netdev);
6475 struct e1000_hw *hw = &adapter->hw;
6477 bool enable = !!(features & NETIF_F_HW_VLAN_RX);
6480 /* enable VLAN tag insert/strip */
6481 ctrl = rd32(E1000_CTRL);
6482 ctrl |= E1000_CTRL_VME;
6483 wr32(E1000_CTRL, ctrl);
6485 /* Disable CFI check */
6486 rctl = rd32(E1000_RCTL);
6487 rctl &= ~E1000_RCTL_CFIEN;
6488 wr32(E1000_RCTL, rctl);
6490 /* disable VLAN tag insert/strip */
6491 ctrl = rd32(E1000_CTRL);
6492 ctrl &= ~E1000_CTRL_VME;
6493 wr32(E1000_CTRL, ctrl);
6496 igb_rlpml_set(adapter);
6499 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6501 struct igb_adapter *adapter = netdev_priv(netdev);
6502 struct e1000_hw *hw = &adapter->hw;
6503 int pf_id = adapter->vfs_allocated_count;
6505 /* attempt to add filter to vlvf array */
6506 igb_vlvf_set(adapter, vid, true, pf_id);
6508 /* add the filter since PF can receive vlans w/o entry in vlvf */
6509 igb_vfta_set(hw, vid, true);
6511 set_bit(vid, adapter->active_vlans);
6514 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6516 struct igb_adapter *adapter = netdev_priv(netdev);
6517 struct e1000_hw *hw = &adapter->hw;
6518 int pf_id = adapter->vfs_allocated_count;
6521 /* remove vlan from VLVF table array */
6522 err = igb_vlvf_set(adapter, vid, false, pf_id);
6524 /* if vid was not present in VLVF just remove it from table */
6526 igb_vfta_set(hw, vid, false);
6528 clear_bit(vid, adapter->active_vlans);
6531 static void igb_restore_vlan(struct igb_adapter *adapter)
6535 igb_vlan_mode(adapter->netdev, adapter->netdev->features);
6537 for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6538 igb_vlan_rx_add_vid(adapter->netdev, vid);
6541 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6543 struct pci_dev *pdev = adapter->pdev;
6544 struct e1000_mac_info *mac = &adapter->hw.mac;
6548 /* Make sure dplx is at most 1 bit and lsb of speed is not set
6549 * for the switch() below to work */
6550 if ((spd & 1) || (dplx & ~1))
6553 /* Fiber NIC's only allow 1000 Gbps Full duplex */
6554 if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6555 spd != SPEED_1000 &&
6556 dplx != DUPLEX_FULL)
6559 switch (spd + dplx) {
6560 case SPEED_10 + DUPLEX_HALF:
6561 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6563 case SPEED_10 + DUPLEX_FULL:
6564 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6566 case SPEED_100 + DUPLEX_HALF:
6567 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6569 case SPEED_100 + DUPLEX_FULL:
6570 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6572 case SPEED_1000 + DUPLEX_FULL:
6574 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6576 case SPEED_1000 + DUPLEX_HALF: /* not supported */
6583 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6587 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6589 struct net_device *netdev = pci_get_drvdata(pdev);
6590 struct igb_adapter *adapter = netdev_priv(netdev);
6591 struct e1000_hw *hw = &adapter->hw;
6592 u32 ctrl, rctl, status;
6593 u32 wufc = adapter->wol;
6598 netif_device_detach(netdev);
6600 if (netif_running(netdev))
6603 igb_clear_interrupt_scheme(adapter);
6606 retval = pci_save_state(pdev);
6611 status = rd32(E1000_STATUS);
6612 if (status & E1000_STATUS_LU)
6613 wufc &= ~E1000_WUFC_LNKC;
6616 igb_setup_rctl(adapter);
6617 igb_set_rx_mode(netdev);
6619 /* turn on all-multi mode if wake on multicast is enabled */
6620 if (wufc & E1000_WUFC_MC) {
6621 rctl = rd32(E1000_RCTL);
6622 rctl |= E1000_RCTL_MPE;
6623 wr32(E1000_RCTL, rctl);
6626 ctrl = rd32(E1000_CTRL);
6627 /* advertise wake from D3Cold */
6628 #define E1000_CTRL_ADVD3WUC 0x00100000
6629 /* phy power management enable */
6630 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6631 ctrl |= E1000_CTRL_ADVD3WUC;
6632 wr32(E1000_CTRL, ctrl);
6634 /* Allow time for pending master requests to run */
6635 igb_disable_pcie_master(hw);
6637 wr32(E1000_WUC, E1000_WUC_PME_EN);
6638 wr32(E1000_WUFC, wufc);
6641 wr32(E1000_WUFC, 0);
6644 *enable_wake = wufc || adapter->en_mng_pt;
6646 igb_power_down_link(adapter);
6648 igb_power_up_link(adapter);
6650 /* Release control of h/w to f/w. If f/w is AMT enabled, this
6651 * would have already happened in close and is redundant. */
6652 igb_release_hw_control(adapter);
6654 pci_disable_device(pdev);
6660 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6665 retval = __igb_shutdown(pdev, &wake);
6670 pci_prepare_to_sleep(pdev);
6672 pci_wake_from_d3(pdev, false);
6673 pci_set_power_state(pdev, PCI_D3hot);
6679 static int igb_resume(struct pci_dev *pdev)
6681 struct net_device *netdev = pci_get_drvdata(pdev);
6682 struct igb_adapter *adapter = netdev_priv(netdev);
6683 struct e1000_hw *hw = &adapter->hw;
6686 pci_set_power_state(pdev, PCI_D0);
6687 pci_restore_state(pdev);
6688 pci_save_state(pdev);
6690 err = pci_enable_device_mem(pdev);
6693 "igb: Cannot enable PCI device from suspend\n");
6696 pci_set_master(pdev);
6698 pci_enable_wake(pdev, PCI_D3hot, 0);
6699 pci_enable_wake(pdev, PCI_D3cold, 0);
6701 if (igb_init_interrupt_scheme(adapter)) {
6702 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6708 /* let the f/w know that the h/w is now under the control of the
6710 igb_get_hw_control(adapter);
6712 wr32(E1000_WUS, ~0);
6714 if (netif_running(netdev)) {
6715 err = igb_open(netdev);
6720 netif_device_attach(netdev);
6726 static void igb_shutdown(struct pci_dev *pdev)
6730 __igb_shutdown(pdev, &wake);
6732 if (system_state == SYSTEM_POWER_OFF) {
6733 pci_wake_from_d3(pdev, wake);
6734 pci_set_power_state(pdev, PCI_D3hot);
6738 #ifdef CONFIG_NET_POLL_CONTROLLER
6740 * Polling 'interrupt' - used by things like netconsole to send skbs
6741 * without having to re-enable interrupts. It's not called while
6742 * the interrupt routine is executing.
6744 static void igb_netpoll(struct net_device *netdev)
6746 struct igb_adapter *adapter = netdev_priv(netdev);
6747 struct e1000_hw *hw = &adapter->hw;
6748 struct igb_q_vector *q_vector;
6751 for (i = 0; i < adapter->num_q_vectors; i++) {
6752 q_vector = adapter->q_vector[i];
6753 if (adapter->msix_entries)
6754 wr32(E1000_EIMC, q_vector->eims_value);
6756 igb_irq_disable(adapter);
6757 napi_schedule(&q_vector->napi);
6760 #endif /* CONFIG_NET_POLL_CONTROLLER */
6763 * igb_io_error_detected - called when PCI error is detected
6764 * @pdev: Pointer to PCI device
6765 * @state: The current pci connection state
6767 * This function is called after a PCI bus error affecting
6768 * this device has been detected.
6770 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6771 pci_channel_state_t state)
6773 struct net_device *netdev = pci_get_drvdata(pdev);
6774 struct igb_adapter *adapter = netdev_priv(netdev);
6776 netif_device_detach(netdev);
6778 if (state == pci_channel_io_perm_failure)
6779 return PCI_ERS_RESULT_DISCONNECT;
6781 if (netif_running(netdev))
6783 pci_disable_device(pdev);
6785 /* Request a slot slot reset. */
6786 return PCI_ERS_RESULT_NEED_RESET;
6790 * igb_io_slot_reset - called after the pci bus has been reset.
6791 * @pdev: Pointer to PCI device
6793 * Restart the card from scratch, as if from a cold-boot. Implementation
6794 * resembles the first-half of the igb_resume routine.
6796 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6798 struct net_device *netdev = pci_get_drvdata(pdev);
6799 struct igb_adapter *adapter = netdev_priv(netdev);
6800 struct e1000_hw *hw = &adapter->hw;
6801 pci_ers_result_t result;
6804 if (pci_enable_device_mem(pdev)) {
6806 "Cannot re-enable PCI device after reset.\n");
6807 result = PCI_ERS_RESULT_DISCONNECT;
6809 pci_set_master(pdev);
6810 pci_restore_state(pdev);
6811 pci_save_state(pdev);
6813 pci_enable_wake(pdev, PCI_D3hot, 0);
6814 pci_enable_wake(pdev, PCI_D3cold, 0);
6817 wr32(E1000_WUS, ~0);
6818 result = PCI_ERS_RESULT_RECOVERED;
6821 err = pci_cleanup_aer_uncorrect_error_status(pdev);
6823 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6824 "failed 0x%0x\n", err);
6825 /* non-fatal, continue */
6832 * igb_io_resume - called when traffic can start flowing again.
6833 * @pdev: Pointer to PCI device
6835 * This callback is called when the error recovery driver tells us that
6836 * its OK to resume normal operation. Implementation resembles the
6837 * second-half of the igb_resume routine.
6839 static void igb_io_resume(struct pci_dev *pdev)
6841 struct net_device *netdev = pci_get_drvdata(pdev);
6842 struct igb_adapter *adapter = netdev_priv(netdev);
6844 if (netif_running(netdev)) {
6845 if (igb_up(adapter)) {
6846 dev_err(&pdev->dev, "igb_up failed after reset\n");
6851 netif_device_attach(netdev);
6853 /* let the f/w know that the h/w is now under the control of the
6855 igb_get_hw_control(adapter);
6858 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6861 u32 rar_low, rar_high;
6862 struct e1000_hw *hw = &adapter->hw;
6864 /* HW expects these in little endian so we reverse the byte order
6865 * from network order (big endian) to little endian
6867 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6868 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6869 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6871 /* Indicate to hardware the Address is Valid. */
6872 rar_high |= E1000_RAH_AV;
6874 if (hw->mac.type == e1000_82575)
6875 rar_high |= E1000_RAH_POOL_1 * qsel;
6877 rar_high |= E1000_RAH_POOL_1 << qsel;
6879 wr32(E1000_RAL(index), rar_low);
6881 wr32(E1000_RAH(index), rar_high);
6885 static int igb_set_vf_mac(struct igb_adapter *adapter,
6886 int vf, unsigned char *mac_addr)
6888 struct e1000_hw *hw = &adapter->hw;
6889 /* VF MAC addresses start at end of receive addresses and moves
6890 * torwards the first, as a result a collision should not be possible */
6891 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6893 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6895 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6900 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6902 struct igb_adapter *adapter = netdev_priv(netdev);
6903 if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6905 adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6906 dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6907 dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6908 " change effective.");
6909 if (test_bit(__IGB_DOWN, &adapter->state)) {
6910 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6911 " but the PF device is not up.\n");
6912 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6913 " attempting to use the VF device.\n");
6915 return igb_set_vf_mac(adapter, vf, mac);
6918 static int igb_link_mbps(int internal_link_speed)
6920 switch (internal_link_speed) {
6930 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6937 /* Calculate the rate factor values to set */
6938 rf_int = link_speed / tx_rate;
6939 rf_dec = (link_speed - (rf_int * tx_rate));
6940 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6942 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6943 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6944 E1000_RTTBCNRC_RF_INT_MASK);
6945 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6950 wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6951 wr32(E1000_RTTBCNRC, bcnrc_val);
6954 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6956 int actual_link_speed, i;
6957 bool reset_rate = false;
6959 /* VF TX rate limit was not set or not supported */
6960 if ((adapter->vf_rate_link_speed == 0) ||
6961 (adapter->hw.mac.type != e1000_82576))
6964 actual_link_speed = igb_link_mbps(adapter->link_speed);
6965 if (actual_link_speed != adapter->vf_rate_link_speed) {
6967 adapter->vf_rate_link_speed = 0;
6968 dev_info(&adapter->pdev->dev,
6969 "Link speed has been changed. VF Transmit "
6970 "rate is disabled\n");
6973 for (i = 0; i < adapter->vfs_allocated_count; i++) {
6975 adapter->vf_data[i].tx_rate = 0;
6977 igb_set_vf_rate_limit(&adapter->hw, i,
6978 adapter->vf_data[i].tx_rate,
6983 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6985 struct igb_adapter *adapter = netdev_priv(netdev);
6986 struct e1000_hw *hw = &adapter->hw;
6987 int actual_link_speed;
6989 if (hw->mac.type != e1000_82576)
6992 actual_link_speed = igb_link_mbps(adapter->link_speed);
6993 if ((vf >= adapter->vfs_allocated_count) ||
6994 (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6995 (tx_rate < 0) || (tx_rate > actual_link_speed))
6998 adapter->vf_rate_link_speed = actual_link_speed;
6999 adapter->vf_data[vf].tx_rate = (u16)tx_rate;
7000 igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
7005 static int igb_ndo_get_vf_config(struct net_device *netdev,
7006 int vf, struct ifla_vf_info *ivi)
7008 struct igb_adapter *adapter = netdev_priv(netdev);
7009 if (vf >= adapter->vfs_allocated_count)
7012 memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
7013 ivi->tx_rate = adapter->vf_data[vf].tx_rate;
7014 ivi->vlan = adapter->vf_data[vf].pf_vlan;
7015 ivi->qos = adapter->vf_data[vf].pf_qos;
7019 static void igb_vmm_control(struct igb_adapter *adapter)
7021 struct e1000_hw *hw = &adapter->hw;
7024 switch (hw->mac.type) {
7027 /* replication is not supported for 82575 */
7030 /* notify HW that the MAC is adding vlan tags */
7031 reg = rd32(E1000_DTXCTL);
7032 reg |= E1000_DTXCTL_VLAN_ADDED;
7033 wr32(E1000_DTXCTL, reg);
7035 /* enable replication vlan tag stripping */
7036 reg = rd32(E1000_RPLOLR);
7037 reg |= E1000_RPLOLR_STRVLAN;
7038 wr32(E1000_RPLOLR, reg);
7040 /* none of the above registers are supported by i350 */
7044 if (adapter->vfs_allocated_count) {
7045 igb_vmdq_set_loopback_pf(hw, true);
7046 igb_vmdq_set_replication_pf(hw, true);
7047 igb_vmdq_set_anti_spoofing_pf(hw, true,
7048 adapter->vfs_allocated_count);
7050 igb_vmdq_set_loopback_pf(hw, false);
7051 igb_vmdq_set_replication_pf(hw, false);
7055 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
7057 struct e1000_hw *hw = &adapter->hw;
7061 if (hw->mac.type > e1000_82580) {
7062 if (adapter->flags & IGB_FLAG_DMAC) {
7065 /* force threshold to 0. */
7066 wr32(E1000_DMCTXTH, 0);
7069 * DMA Coalescing high water mark needs to be higher
7070 * than the RX threshold. set hwm to PBA - 2 * max
7073 hwm = pba - (2 * adapter->max_frame_size);
7074 reg = rd32(E1000_DMACR);
7075 reg &= ~E1000_DMACR_DMACTHR_MASK;
7078 reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
7079 & E1000_DMACR_DMACTHR_MASK);
7081 /* transition to L0x or L1 if available..*/
7082 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
7084 /* watchdog timer= +-1000 usec in 32usec intervals */
7086 wr32(E1000_DMACR, reg);
7089 * no lower threshold to disable
7090 * coalescing(smart fifb)-UTRESH=0
7092 wr32(E1000_DMCRTRH, 0);
7093 wr32(E1000_FCRTC, hwm);
7095 reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);
7097 wr32(E1000_DMCTLX, reg);
7100 * free space in tx packet buffer to wake from
7103 wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
7104 (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
7107 * make low power state decision controlled
7110 reg = rd32(E1000_PCIEMISC);
7111 reg &= ~E1000_PCIEMISC_LX_DECISION;
7112 wr32(E1000_PCIEMISC, reg);
7113 } /* endif adapter->dmac is not disabled */
7114 } else if (hw->mac.type == e1000_82580) {
7115 u32 reg = rd32(E1000_PCIEMISC);
7116 wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION);
7117 wr32(E1000_DMACR, 0);