1 /*******************************************************************************
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2009 Intel Corporation.
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
26 *******************************************************************************/
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <net/checksum.h>
36 #include <net/ip6_checksum.h>
37 #include <linux/net_tstamp.h>
38 #include <linux/mii.h>
39 #include <linux/ethtool.h>
40 #include <linux/if_vlan.h>
41 #include <linux/pci.h>
42 #include <linux/pci-aspm.h>
43 #include <linux/delay.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_ether.h>
46 #include <linux/aer.h>
48 #include <linux/dca.h>
52 #define DRV_VERSION "2.1.0-k2"
53 char igb_driver_name[] = "igb";
54 char igb_driver_version[] = DRV_VERSION;
55 static const char igb_driver_string[] =
56 "Intel(R) Gigabit Ethernet Network Driver";
57 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
59 static const struct e1000_info *igb_info_tbl[] = {
60 [board_82575] = &e1000_82575_info,
63 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
64 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
65 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
66 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
67 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
68 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
69 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
70 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
71 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
72 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
73 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
74 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
75 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
76 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
77 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
78 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
79 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
80 /* required last entry */
84 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
86 void igb_reset(struct igb_adapter *);
87 static int igb_setup_all_tx_resources(struct igb_adapter *);
88 static int igb_setup_all_rx_resources(struct igb_adapter *);
89 static void igb_free_all_tx_resources(struct igb_adapter *);
90 static void igb_free_all_rx_resources(struct igb_adapter *);
91 static void igb_setup_mrqc(struct igb_adapter *);
92 void igb_update_stats(struct igb_adapter *);
93 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
94 static void __devexit igb_remove(struct pci_dev *pdev);
95 static int igb_sw_init(struct igb_adapter *);
96 static int igb_open(struct net_device *);
97 static int igb_close(struct net_device *);
98 static void igb_configure_tx(struct igb_adapter *);
99 static void igb_configure_rx(struct igb_adapter *);
100 static void igb_clean_all_tx_rings(struct igb_adapter *);
101 static void igb_clean_all_rx_rings(struct igb_adapter *);
102 static void igb_clean_tx_ring(struct igb_ring *);
103 static void igb_clean_rx_ring(struct igb_ring *);
104 static void igb_set_rx_mode(struct net_device *);
105 static void igb_update_phy_info(unsigned long);
106 static void igb_watchdog(unsigned long);
107 static void igb_watchdog_task(struct work_struct *);
108 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
109 static struct net_device_stats *igb_get_stats(struct net_device *);
110 static int igb_change_mtu(struct net_device *, int);
111 static int igb_set_mac(struct net_device *, void *);
112 static void igb_set_uta(struct igb_adapter *adapter);
113 static irqreturn_t igb_intr(int irq, void *);
114 static irqreturn_t igb_intr_msi(int irq, void *);
115 static irqreturn_t igb_msix_other(int irq, void *);
116 static irqreturn_t igb_msix_ring(int irq, void *);
117 #ifdef CONFIG_IGB_DCA
118 static void igb_update_dca(struct igb_q_vector *);
119 static void igb_setup_dca(struct igb_adapter *);
120 #endif /* CONFIG_IGB_DCA */
121 static bool igb_clean_tx_irq(struct igb_q_vector *);
122 static int igb_poll(struct napi_struct *, int);
123 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
124 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
125 static void igb_tx_timeout(struct net_device *);
126 static void igb_reset_task(struct work_struct *);
127 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
128 static void igb_vlan_rx_add_vid(struct net_device *, u16);
129 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
130 static void igb_restore_vlan(struct igb_adapter *);
131 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
132 static void igb_ping_all_vfs(struct igb_adapter *);
133 static void igb_msg_task(struct igb_adapter *);
134 static void igb_vmm_control(struct igb_adapter *);
135 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
136 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
137 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
138 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
139 int vf, u16 vlan, u8 qos);
140 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
141 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
142 struct ifla_vf_info *ivi);
145 static int igb_suspend(struct pci_dev *, pm_message_t);
146 static int igb_resume(struct pci_dev *);
148 static void igb_shutdown(struct pci_dev *);
149 #ifdef CONFIG_IGB_DCA
150 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
151 static struct notifier_block dca_notifier = {
152 .notifier_call = igb_notify_dca,
157 #ifdef CONFIG_NET_POLL_CONTROLLER
158 /* for netdump / net console */
159 static void igb_netpoll(struct net_device *);
161 #ifdef CONFIG_PCI_IOV
162 static unsigned int max_vfs = 0;
163 module_param(max_vfs, uint, 0);
164 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
165 "per physical function");
166 #endif /* CONFIG_PCI_IOV */
168 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
169 pci_channel_state_t);
170 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
171 static void igb_io_resume(struct pci_dev *);
173 static struct pci_error_handlers igb_err_handler = {
174 .error_detected = igb_io_error_detected,
175 .slot_reset = igb_io_slot_reset,
176 .resume = igb_io_resume,
180 static struct pci_driver igb_driver = {
181 .name = igb_driver_name,
182 .id_table = igb_pci_tbl,
184 .remove = __devexit_p(igb_remove),
186 /* Power Managment Hooks */
187 .suspend = igb_suspend,
188 .resume = igb_resume,
190 .shutdown = igb_shutdown,
191 .err_handler = &igb_err_handler
194 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
195 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
196 MODULE_LICENSE("GPL");
197 MODULE_VERSION(DRV_VERSION);
200 * igb_read_clock - read raw cycle counter (to be used by time counter)
202 static cycle_t igb_read_clock(const struct cyclecounter *tc)
204 struct igb_adapter *adapter =
205 container_of(tc, struct igb_adapter, cycles);
206 struct e1000_hw *hw = &adapter->hw;
211 * The timestamp latches on lowest register read. For the 82580
212 * the lowest register is SYSTIMR instead of SYSTIML. However we never
213 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
215 if (hw->mac.type == e1000_82580) {
216 stamp = rd32(E1000_SYSTIMR) >> 8;
217 shift = IGB_82580_TSYNC_SHIFT;
220 stamp |= (u64)rd32(E1000_SYSTIML) << shift;
221 stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
227 * igb_get_hw_dev_name - return device name string
228 * used by hardware layer to print debugging information
230 char *igb_get_hw_dev_name(struct e1000_hw *hw)
232 struct igb_adapter *adapter = hw->back;
233 return adapter->netdev->name;
237 * igb_get_time_str - format current NIC and system time as string
239 static char *igb_get_time_str(struct igb_adapter *adapter,
242 cycle_t hw = adapter->cycles.read(&adapter->cycles);
243 struct timespec nic = ns_to_timespec(timecounter_read(&adapter->clock));
245 struct timespec delta;
246 getnstimeofday(&sys);
248 delta = timespec_sub(nic, sys);
251 "HW %llu, NIC %ld.%09lus, SYS %ld.%09lus, NIC-SYS %lds + %09luns",
253 (long)nic.tv_sec, nic.tv_nsec,
254 (long)sys.tv_sec, sys.tv_nsec,
255 (long)delta.tv_sec, delta.tv_nsec);
262 * igb_init_module - Driver Registration Routine
264 * igb_init_module is the first routine called when the driver is
265 * loaded. All it does is register with the PCI subsystem.
267 static int __init igb_init_module(void)
270 printk(KERN_INFO "%s - version %s\n",
271 igb_driver_string, igb_driver_version);
273 printk(KERN_INFO "%s\n", igb_copyright);
275 #ifdef CONFIG_IGB_DCA
276 dca_register_notify(&dca_notifier);
278 ret = pci_register_driver(&igb_driver);
282 module_init(igb_init_module);
285 * igb_exit_module - Driver Exit Cleanup Routine
287 * igb_exit_module is called just before the driver is removed
290 static void __exit igb_exit_module(void)
292 #ifdef CONFIG_IGB_DCA
293 dca_unregister_notify(&dca_notifier);
295 pci_unregister_driver(&igb_driver);
298 module_exit(igb_exit_module);
300 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
302 * igb_cache_ring_register - Descriptor ring to register mapping
303 * @adapter: board private structure to initialize
305 * Once we know the feature-set enabled for the device, we'll cache
306 * the register offset the descriptor ring is assigned to.
308 static void igb_cache_ring_register(struct igb_adapter *adapter)
311 u32 rbase_offset = adapter->vfs_allocated_count;
313 switch (adapter->hw.mac.type) {
315 /* The queues are allocated for virtualization such that VF 0
316 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
317 * In order to avoid collision we start at the first free queue
318 * and continue consuming queues in the same sequence
320 if (adapter->vfs_allocated_count) {
321 for (; i < adapter->rss_queues; i++)
322 adapter->rx_ring[i]->reg_idx = rbase_offset +
324 for (; j < adapter->rss_queues; j++)
325 adapter->tx_ring[j]->reg_idx = rbase_offset +
331 for (; i < adapter->num_rx_queues; i++)
332 adapter->rx_ring[i]->reg_idx = rbase_offset + i;
333 for (; j < adapter->num_tx_queues; j++)
334 adapter->tx_ring[j]->reg_idx = rbase_offset + j;
339 static void igb_free_queues(struct igb_adapter *adapter)
343 for (i = 0; i < adapter->num_tx_queues; i++) {
344 kfree(adapter->tx_ring[i]);
345 adapter->tx_ring[i] = NULL;
347 for (i = 0; i < adapter->num_rx_queues; i++) {
348 kfree(adapter->rx_ring[i]);
349 adapter->rx_ring[i] = NULL;
351 adapter->num_rx_queues = 0;
352 adapter->num_tx_queues = 0;
356 * igb_alloc_queues - Allocate memory for all rings
357 * @adapter: board private structure to initialize
359 * We allocate one ring per queue at run-time since we don't know the
360 * number of queues at compile-time.
362 static int igb_alloc_queues(struct igb_adapter *adapter)
364 struct igb_ring *ring;
367 for (i = 0; i < adapter->num_tx_queues; i++) {
368 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
371 ring->count = adapter->tx_ring_count;
372 ring->queue_index = i;
373 ring->pdev = adapter->pdev;
374 ring->netdev = adapter->netdev;
375 /* For 82575, context index must be unique per ring. */
376 if (adapter->hw.mac.type == e1000_82575)
377 ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
378 adapter->tx_ring[i] = ring;
381 for (i = 0; i < adapter->num_rx_queues; i++) {
382 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
385 ring->count = adapter->rx_ring_count;
386 ring->queue_index = i;
387 ring->pdev = adapter->pdev;
388 ring->netdev = adapter->netdev;
389 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
390 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
391 /* set flag indicating ring supports SCTP checksum offload */
392 if (adapter->hw.mac.type >= e1000_82576)
393 ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
394 adapter->rx_ring[i] = ring;
397 igb_cache_ring_register(adapter);
402 igb_free_queues(adapter);
407 #define IGB_N0_QUEUE -1
408 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
411 struct igb_adapter *adapter = q_vector->adapter;
412 struct e1000_hw *hw = &adapter->hw;
414 int rx_queue = IGB_N0_QUEUE;
415 int tx_queue = IGB_N0_QUEUE;
417 if (q_vector->rx_ring)
418 rx_queue = q_vector->rx_ring->reg_idx;
419 if (q_vector->tx_ring)
420 tx_queue = q_vector->tx_ring->reg_idx;
422 switch (hw->mac.type) {
424 /* The 82575 assigns vectors using a bitmask, which matches the
425 bitmask for the EICR/EIMS/EIMC registers. To assign one
426 or more queues to a vector, we write the appropriate bits
427 into the MSIXBM register for that vector. */
428 if (rx_queue > IGB_N0_QUEUE)
429 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
430 if (tx_queue > IGB_N0_QUEUE)
431 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
432 if (!adapter->msix_entries && msix_vector == 0)
433 msixbm |= E1000_EIMS_OTHER;
434 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
435 q_vector->eims_value = msixbm;
438 /* 82576 uses a table-based method for assigning vectors.
439 Each queue has a single entry in the table to which we write
440 a vector number along with a "valid" bit. Sadly, the layout
441 of the table is somewhat counterintuitive. */
442 if (rx_queue > IGB_N0_QUEUE) {
443 index = (rx_queue & 0x7);
444 ivar = array_rd32(E1000_IVAR0, index);
446 /* vector goes into low byte of register */
447 ivar = ivar & 0xFFFFFF00;
448 ivar |= msix_vector | E1000_IVAR_VALID;
450 /* vector goes into third byte of register */
451 ivar = ivar & 0xFF00FFFF;
452 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
454 array_wr32(E1000_IVAR0, index, ivar);
456 if (tx_queue > IGB_N0_QUEUE) {
457 index = (tx_queue & 0x7);
458 ivar = array_rd32(E1000_IVAR0, index);
460 /* vector goes into second byte of register */
461 ivar = ivar & 0xFFFF00FF;
462 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
464 /* vector goes into high byte of register */
465 ivar = ivar & 0x00FFFFFF;
466 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
468 array_wr32(E1000_IVAR0, index, ivar);
470 q_vector->eims_value = 1 << msix_vector;
473 /* 82580 uses the same table-based approach as 82576 but has fewer
474 entries as a result we carry over for queues greater than 4. */
475 if (rx_queue > IGB_N0_QUEUE) {
476 index = (rx_queue >> 1);
477 ivar = array_rd32(E1000_IVAR0, index);
478 if (rx_queue & 0x1) {
479 /* vector goes into third byte of register */
480 ivar = ivar & 0xFF00FFFF;
481 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
483 /* vector goes into low byte of register */
484 ivar = ivar & 0xFFFFFF00;
485 ivar |= msix_vector | E1000_IVAR_VALID;
487 array_wr32(E1000_IVAR0, index, ivar);
489 if (tx_queue > IGB_N0_QUEUE) {
490 index = (tx_queue >> 1);
491 ivar = array_rd32(E1000_IVAR0, index);
492 if (tx_queue & 0x1) {
493 /* vector goes into high byte of register */
494 ivar = ivar & 0x00FFFFFF;
495 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
497 /* vector goes into second byte of register */
498 ivar = ivar & 0xFFFF00FF;
499 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
501 array_wr32(E1000_IVAR0, index, ivar);
503 q_vector->eims_value = 1 << msix_vector;
510 /* add q_vector eims value to global eims_enable_mask */
511 adapter->eims_enable_mask |= q_vector->eims_value;
513 /* configure q_vector to set itr on first interrupt */
514 q_vector->set_itr = 1;
518 * igb_configure_msix - Configure MSI-X hardware
520 * igb_configure_msix sets up the hardware to properly
521 * generate MSI-X interrupts.
523 static void igb_configure_msix(struct igb_adapter *adapter)
527 struct e1000_hw *hw = &adapter->hw;
529 adapter->eims_enable_mask = 0;
531 /* set vector for other causes, i.e. link changes */
532 switch (hw->mac.type) {
534 tmp = rd32(E1000_CTRL_EXT);
535 /* enable MSI-X PBA support*/
536 tmp |= E1000_CTRL_EXT_PBA_CLR;
538 /* Auto-Mask interrupts upon ICR read. */
539 tmp |= E1000_CTRL_EXT_EIAME;
540 tmp |= E1000_CTRL_EXT_IRCA;
542 wr32(E1000_CTRL_EXT, tmp);
544 /* enable msix_other interrupt */
545 array_wr32(E1000_MSIXBM(0), vector++,
547 adapter->eims_other = E1000_EIMS_OTHER;
553 /* Turn on MSI-X capability first, or our settings
554 * won't stick. And it will take days to debug. */
555 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
556 E1000_GPIE_PBA | E1000_GPIE_EIAME |
559 /* enable msix_other interrupt */
560 adapter->eims_other = 1 << vector;
561 tmp = (vector++ | E1000_IVAR_VALID) << 8;
563 wr32(E1000_IVAR_MISC, tmp);
566 /* do nothing, since nothing else supports MSI-X */
568 } /* switch (hw->mac.type) */
570 adapter->eims_enable_mask |= adapter->eims_other;
572 for (i = 0; i < adapter->num_q_vectors; i++)
573 igb_assign_vector(adapter->q_vector[i], vector++);
579 * igb_request_msix - Initialize MSI-X interrupts
581 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
584 static int igb_request_msix(struct igb_adapter *adapter)
586 struct net_device *netdev = adapter->netdev;
587 struct e1000_hw *hw = &adapter->hw;
588 int i, err = 0, vector = 0;
590 err = request_irq(adapter->msix_entries[vector].vector,
591 igb_msix_other, 0, netdev->name, adapter);
596 for (i = 0; i < adapter->num_q_vectors; i++) {
597 struct igb_q_vector *q_vector = adapter->q_vector[i];
599 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
601 if (q_vector->rx_ring && q_vector->tx_ring)
602 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
603 q_vector->rx_ring->queue_index);
604 else if (q_vector->tx_ring)
605 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
606 q_vector->tx_ring->queue_index);
607 else if (q_vector->rx_ring)
608 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
609 q_vector->rx_ring->queue_index);
611 sprintf(q_vector->name, "%s-unused", netdev->name);
613 err = request_irq(adapter->msix_entries[vector].vector,
614 igb_msix_ring, 0, q_vector->name,
621 igb_configure_msix(adapter);
627 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
629 if (adapter->msix_entries) {
630 pci_disable_msix(adapter->pdev);
631 kfree(adapter->msix_entries);
632 adapter->msix_entries = NULL;
633 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
634 pci_disable_msi(adapter->pdev);
639 * igb_free_q_vectors - Free memory allocated for interrupt vectors
640 * @adapter: board private structure to initialize
642 * This function frees the memory allocated to the q_vectors. In addition if
643 * NAPI is enabled it will delete any references to the NAPI struct prior
644 * to freeing the q_vector.
646 static void igb_free_q_vectors(struct igb_adapter *adapter)
650 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
651 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
652 adapter->q_vector[v_idx] = NULL;
655 netif_napi_del(&q_vector->napi);
658 adapter->num_q_vectors = 0;
662 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
664 * This function resets the device so that it has 0 rx queues, tx queues, and
665 * MSI-X interrupts allocated.
667 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
669 igb_free_queues(adapter);
670 igb_free_q_vectors(adapter);
671 igb_reset_interrupt_capability(adapter);
675 * igb_set_interrupt_capability - set MSI or MSI-X if supported
677 * Attempt to configure interrupts using the best available
678 * capabilities of the hardware and kernel.
680 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
685 /* Number of supported queues. */
686 adapter->num_rx_queues = adapter->rss_queues;
687 adapter->num_tx_queues = adapter->rss_queues;
689 /* start with one vector for every rx queue */
690 numvecs = adapter->num_rx_queues;
692 /* if tx handler is separate add 1 for every tx queue */
693 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
694 numvecs += adapter->num_tx_queues;
696 /* store the number of vectors reserved for queues */
697 adapter->num_q_vectors = numvecs;
699 /* add 1 vector for link status interrupts */
701 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
703 if (!adapter->msix_entries)
706 for (i = 0; i < numvecs; i++)
707 adapter->msix_entries[i].entry = i;
709 err = pci_enable_msix(adapter->pdev,
710 adapter->msix_entries,
715 igb_reset_interrupt_capability(adapter);
717 /* If we can't do MSI-X, try MSI */
719 #ifdef CONFIG_PCI_IOV
720 /* disable SR-IOV for non MSI-X configurations */
721 if (adapter->vf_data) {
722 struct e1000_hw *hw = &adapter->hw;
723 /* disable iov and allow time for transactions to clear */
724 pci_disable_sriov(adapter->pdev);
727 kfree(adapter->vf_data);
728 adapter->vf_data = NULL;
729 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
731 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
734 adapter->vfs_allocated_count = 0;
735 adapter->rss_queues = 1;
736 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
737 adapter->num_rx_queues = 1;
738 adapter->num_tx_queues = 1;
739 adapter->num_q_vectors = 1;
740 if (!pci_enable_msi(adapter->pdev))
741 adapter->flags |= IGB_FLAG_HAS_MSI;
743 /* Notify the stack of the (possibly) reduced Tx Queue count. */
744 adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
749 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
750 * @adapter: board private structure to initialize
752 * We allocate one q_vector per queue interrupt. If allocation fails we
755 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
757 struct igb_q_vector *q_vector;
758 struct e1000_hw *hw = &adapter->hw;
761 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
762 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
765 q_vector->adapter = adapter;
766 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
767 q_vector->itr_val = IGB_START_ITR;
768 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
769 adapter->q_vector[v_idx] = q_vector;
774 igb_free_q_vectors(adapter);
778 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
779 int ring_idx, int v_idx)
781 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
783 q_vector->rx_ring = adapter->rx_ring[ring_idx];
784 q_vector->rx_ring->q_vector = q_vector;
785 q_vector->itr_val = adapter->rx_itr_setting;
786 if (q_vector->itr_val && q_vector->itr_val <= 3)
787 q_vector->itr_val = IGB_START_ITR;
790 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
791 int ring_idx, int v_idx)
793 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
795 q_vector->tx_ring = adapter->tx_ring[ring_idx];
796 q_vector->tx_ring->q_vector = q_vector;
797 q_vector->itr_val = adapter->tx_itr_setting;
798 if (q_vector->itr_val && q_vector->itr_val <= 3)
799 q_vector->itr_val = IGB_START_ITR;
803 * igb_map_ring_to_vector - maps allocated queues to vectors
805 * This function maps the recently allocated queues to vectors.
807 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
812 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
813 (adapter->num_q_vectors < adapter->num_tx_queues))
816 if (adapter->num_q_vectors >=
817 (adapter->num_rx_queues + adapter->num_tx_queues)) {
818 for (i = 0; i < adapter->num_rx_queues; i++)
819 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
820 for (i = 0; i < adapter->num_tx_queues; i++)
821 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
823 for (i = 0; i < adapter->num_rx_queues; i++) {
824 if (i < adapter->num_tx_queues)
825 igb_map_tx_ring_to_vector(adapter, i, v_idx);
826 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
828 for (; i < adapter->num_tx_queues; i++)
829 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
835 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
837 * This function initializes the interrupts and allocates all of the queues.
839 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
841 struct pci_dev *pdev = adapter->pdev;
844 igb_set_interrupt_capability(adapter);
846 err = igb_alloc_q_vectors(adapter);
848 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
849 goto err_alloc_q_vectors;
852 err = igb_alloc_queues(adapter);
854 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
855 goto err_alloc_queues;
858 err = igb_map_ring_to_vector(adapter);
860 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
867 igb_free_queues(adapter);
869 igb_free_q_vectors(adapter);
871 igb_reset_interrupt_capability(adapter);
876 * igb_request_irq - initialize interrupts
878 * Attempts to configure interrupts using the best available
879 * capabilities of the hardware and kernel.
881 static int igb_request_irq(struct igb_adapter *adapter)
883 struct net_device *netdev = adapter->netdev;
884 struct pci_dev *pdev = adapter->pdev;
887 if (adapter->msix_entries) {
888 err = igb_request_msix(adapter);
891 /* fall back to MSI */
892 igb_clear_interrupt_scheme(adapter);
893 if (!pci_enable_msi(adapter->pdev))
894 adapter->flags |= IGB_FLAG_HAS_MSI;
895 igb_free_all_tx_resources(adapter);
896 igb_free_all_rx_resources(adapter);
897 adapter->num_tx_queues = 1;
898 adapter->num_rx_queues = 1;
899 adapter->num_q_vectors = 1;
900 err = igb_alloc_q_vectors(adapter);
903 "Unable to allocate memory for vectors\n");
906 err = igb_alloc_queues(adapter);
909 "Unable to allocate memory for queues\n");
910 igb_free_q_vectors(adapter);
913 igb_setup_all_tx_resources(adapter);
914 igb_setup_all_rx_resources(adapter);
916 igb_assign_vector(adapter->q_vector[0], 0);
919 if (adapter->flags & IGB_FLAG_HAS_MSI) {
920 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
921 netdev->name, adapter);
925 /* fall back to legacy interrupts */
926 igb_reset_interrupt_capability(adapter);
927 adapter->flags &= ~IGB_FLAG_HAS_MSI;
930 err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
931 netdev->name, adapter);
934 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
941 static void igb_free_irq(struct igb_adapter *adapter)
943 if (adapter->msix_entries) {
946 free_irq(adapter->msix_entries[vector++].vector, adapter);
948 for (i = 0; i < adapter->num_q_vectors; i++) {
949 struct igb_q_vector *q_vector = adapter->q_vector[i];
950 free_irq(adapter->msix_entries[vector++].vector,
954 free_irq(adapter->pdev->irq, adapter);
959 * igb_irq_disable - Mask off interrupt generation on the NIC
960 * @adapter: board private structure
962 static void igb_irq_disable(struct igb_adapter *adapter)
964 struct e1000_hw *hw = &adapter->hw;
967 * we need to be careful when disabling interrupts. The VFs are also
968 * mapped into these registers and so clearing the bits can cause
969 * issues on the VF drivers so we only need to clear what we set
971 if (adapter->msix_entries) {
972 u32 regval = rd32(E1000_EIAM);
973 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
974 wr32(E1000_EIMC, adapter->eims_enable_mask);
975 regval = rd32(E1000_EIAC);
976 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
982 synchronize_irq(adapter->pdev->irq);
986 * igb_irq_enable - Enable default interrupt generation settings
987 * @adapter: board private structure
989 static void igb_irq_enable(struct igb_adapter *adapter)
991 struct e1000_hw *hw = &adapter->hw;
993 if (adapter->msix_entries) {
994 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
995 u32 regval = rd32(E1000_EIAC);
996 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
997 regval = rd32(E1000_EIAM);
998 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
999 wr32(E1000_EIMS, adapter->eims_enable_mask);
1000 if (adapter->vfs_allocated_count) {
1001 wr32(E1000_MBVFIMR, 0xFF);
1002 ims |= E1000_IMS_VMMB;
1004 if (adapter->hw.mac.type == e1000_82580)
1005 ims |= E1000_IMS_DRSTA;
1007 wr32(E1000_IMS, ims);
1009 wr32(E1000_IMS, IMS_ENABLE_MASK |
1011 wr32(E1000_IAM, IMS_ENABLE_MASK |
1016 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1018 struct e1000_hw *hw = &adapter->hw;
1019 u16 vid = adapter->hw.mng_cookie.vlan_id;
1020 u16 old_vid = adapter->mng_vlan_id;
1022 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1023 /* add VID to filter table */
1024 igb_vfta_set(hw, vid, true);
1025 adapter->mng_vlan_id = vid;
1027 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1030 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1032 !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1033 /* remove VID from filter table */
1034 igb_vfta_set(hw, old_vid, false);
1039 * igb_release_hw_control - release control of the h/w to f/w
1040 * @adapter: address of board private structure
1042 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1043 * For ASF and Pass Through versions of f/w this means that the
1044 * driver is no longer loaded.
1047 static void igb_release_hw_control(struct igb_adapter *adapter)
1049 struct e1000_hw *hw = &adapter->hw;
1052 /* Let firmware take over control of h/w */
1053 ctrl_ext = rd32(E1000_CTRL_EXT);
1054 wr32(E1000_CTRL_EXT,
1055 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1059 * igb_get_hw_control - get control of the h/w from f/w
1060 * @adapter: address of board private structure
1062 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1063 * For ASF and Pass Through versions of f/w this means that
1064 * the driver is loaded.
1067 static void igb_get_hw_control(struct igb_adapter *adapter)
1069 struct e1000_hw *hw = &adapter->hw;
1072 /* Let firmware know the driver has taken over */
1073 ctrl_ext = rd32(E1000_CTRL_EXT);
1074 wr32(E1000_CTRL_EXT,
1075 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1079 * igb_configure - configure the hardware for RX and TX
1080 * @adapter: private board structure
1082 static void igb_configure(struct igb_adapter *adapter)
1084 struct net_device *netdev = adapter->netdev;
1087 igb_get_hw_control(adapter);
1088 igb_set_rx_mode(netdev);
1090 igb_restore_vlan(adapter);
1092 igb_setup_tctl(adapter);
1093 igb_setup_mrqc(adapter);
1094 igb_setup_rctl(adapter);
1096 igb_configure_tx(adapter);
1097 igb_configure_rx(adapter);
1099 igb_rx_fifo_flush_82575(&adapter->hw);
1101 /* call igb_desc_unused which always leaves
1102 * at least 1 descriptor unused to make sure
1103 * next_to_use != next_to_clean */
1104 for (i = 0; i < adapter->num_rx_queues; i++) {
1105 struct igb_ring *ring = adapter->rx_ring[i];
1106 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1111 * igb_power_up_link - Power up the phy/serdes link
1112 * @adapter: address of board private structure
1114 void igb_power_up_link(struct igb_adapter *adapter)
1116 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1117 igb_power_up_phy_copper(&adapter->hw);
1119 igb_power_up_serdes_link_82575(&adapter->hw);
1123 * igb_power_down_link - Power down the phy/serdes link
1124 * @adapter: address of board private structure
1126 static void igb_power_down_link(struct igb_adapter *adapter)
1128 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1129 igb_power_down_phy_copper_82575(&adapter->hw);
1131 igb_shutdown_serdes_link_82575(&adapter->hw);
1135 * igb_up - Open the interface and prepare it to handle traffic
1136 * @adapter: board private structure
1138 int igb_up(struct igb_adapter *adapter)
1140 struct e1000_hw *hw = &adapter->hw;
1143 /* hardware has been reset, we need to reload some things */
1144 igb_configure(adapter);
1146 clear_bit(__IGB_DOWN, &adapter->state);
1148 for (i = 0; i < adapter->num_q_vectors; i++) {
1149 struct igb_q_vector *q_vector = adapter->q_vector[i];
1150 napi_enable(&q_vector->napi);
1152 if (adapter->msix_entries)
1153 igb_configure_msix(adapter);
1155 igb_assign_vector(adapter->q_vector[0], 0);
1157 /* Clear any pending interrupts. */
1159 igb_irq_enable(adapter);
1161 /* notify VFs that reset has been completed */
1162 if (adapter->vfs_allocated_count) {
1163 u32 reg_data = rd32(E1000_CTRL_EXT);
1164 reg_data |= E1000_CTRL_EXT_PFRSTD;
1165 wr32(E1000_CTRL_EXT, reg_data);
1168 netif_tx_start_all_queues(adapter->netdev);
1170 /* start the watchdog. */
1171 hw->mac.get_link_status = 1;
1172 schedule_work(&adapter->watchdog_task);
1177 void igb_down(struct igb_adapter *adapter)
1179 struct net_device *netdev = adapter->netdev;
1180 struct e1000_hw *hw = &adapter->hw;
1184 /* signal that we're down so the interrupt handler does not
1185 * reschedule our watchdog timer */
1186 set_bit(__IGB_DOWN, &adapter->state);
1188 /* disable receives in the hardware */
1189 rctl = rd32(E1000_RCTL);
1190 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1191 /* flush and sleep below */
1193 netif_tx_stop_all_queues(netdev);
1195 /* disable transmits in the hardware */
1196 tctl = rd32(E1000_TCTL);
1197 tctl &= ~E1000_TCTL_EN;
1198 wr32(E1000_TCTL, tctl);
1199 /* flush both disables and wait for them to finish */
1203 for (i = 0; i < adapter->num_q_vectors; i++) {
1204 struct igb_q_vector *q_vector = adapter->q_vector[i];
1205 napi_disable(&q_vector->napi);
1208 igb_irq_disable(adapter);
1210 del_timer_sync(&adapter->watchdog_timer);
1211 del_timer_sync(&adapter->phy_info_timer);
1213 netif_carrier_off(netdev);
1215 /* record the stats before reset*/
1216 igb_update_stats(adapter);
1218 adapter->link_speed = 0;
1219 adapter->link_duplex = 0;
1221 if (!pci_channel_offline(adapter->pdev))
1223 igb_clean_all_tx_rings(adapter);
1224 igb_clean_all_rx_rings(adapter);
1225 #ifdef CONFIG_IGB_DCA
1227 /* since we reset the hardware DCA settings were cleared */
1228 igb_setup_dca(adapter);
1232 void igb_reinit_locked(struct igb_adapter *adapter)
1234 WARN_ON(in_interrupt());
1235 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1239 clear_bit(__IGB_RESETTING, &adapter->state);
1242 void igb_reset(struct igb_adapter *adapter)
1244 struct pci_dev *pdev = adapter->pdev;
1245 struct e1000_hw *hw = &adapter->hw;
1246 struct e1000_mac_info *mac = &hw->mac;
1247 struct e1000_fc_info *fc = &hw->fc;
1248 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1251 /* Repartition Pba for greater than 9k mtu
1252 * To take effect CTRL.RST is required.
1254 switch (mac->type) {
1256 pba = rd32(E1000_RXPBS);
1257 pba = igb_rxpbs_adjust_82580(pba);
1260 pba = rd32(E1000_RXPBS);
1261 pba &= E1000_RXPBS_SIZE_MASK_82576;
1265 pba = E1000_PBA_34K;
1269 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1270 (mac->type < e1000_82576)) {
1271 /* adjust PBA for jumbo frames */
1272 wr32(E1000_PBA, pba);
1274 /* To maintain wire speed transmits, the Tx FIFO should be
1275 * large enough to accommodate two full transmit packets,
1276 * rounded up to the next 1KB and expressed in KB. Likewise,
1277 * the Rx FIFO should be large enough to accommodate at least
1278 * one full receive packet and is similarly rounded up and
1279 * expressed in KB. */
1280 pba = rd32(E1000_PBA);
1281 /* upper 16 bits has Tx packet buffer allocation size in KB */
1282 tx_space = pba >> 16;
1283 /* lower 16 bits has Rx packet buffer allocation size in KB */
1285 /* the tx fifo also stores 16 bytes of information about the tx
1286 * but don't include ethernet FCS because hardware appends it */
1287 min_tx_space = (adapter->max_frame_size +
1288 sizeof(union e1000_adv_tx_desc) -
1290 min_tx_space = ALIGN(min_tx_space, 1024);
1291 min_tx_space >>= 10;
1292 /* software strips receive CRC, so leave room for it */
1293 min_rx_space = adapter->max_frame_size;
1294 min_rx_space = ALIGN(min_rx_space, 1024);
1295 min_rx_space >>= 10;
1297 /* If current Tx allocation is less than the min Tx FIFO size,
1298 * and the min Tx FIFO size is less than the current Rx FIFO
1299 * allocation, take space away from current Rx allocation */
1300 if (tx_space < min_tx_space &&
1301 ((min_tx_space - tx_space) < pba)) {
1302 pba = pba - (min_tx_space - tx_space);
1304 /* if short on rx space, rx wins and must trump tx
1306 if (pba < min_rx_space)
1309 wr32(E1000_PBA, pba);
1312 /* flow control settings */
1313 /* The high water mark must be low enough to fit one full frame
1314 * (or the size used for early receive) above it in the Rx FIFO.
1315 * Set it to the lower of:
1316 * - 90% of the Rx FIFO size, or
1317 * - the full Rx FIFO size minus one full frame */
1318 hwm = min(((pba << 10) * 9 / 10),
1319 ((pba << 10) - 2 * adapter->max_frame_size));
1321 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1322 fc->low_water = fc->high_water - 16;
1323 fc->pause_time = 0xFFFF;
1325 fc->current_mode = fc->requested_mode;
1327 /* disable receive for all VFs and wait one second */
1328 if (adapter->vfs_allocated_count) {
1330 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1331 adapter->vf_data[i].flags = 0;
1333 /* ping all the active vfs to let them know we are going down */
1334 igb_ping_all_vfs(adapter);
1336 /* disable transmits and receives */
1337 wr32(E1000_VFRE, 0);
1338 wr32(E1000_VFTE, 0);
1341 /* Allow time for pending master requests to run */
1342 hw->mac.ops.reset_hw(hw);
1345 if (hw->mac.ops.init_hw(hw))
1346 dev_err(&pdev->dev, "Hardware Error\n");
1348 if (hw->mac.type == e1000_82580) {
1349 u32 reg = rd32(E1000_PCIEMISC);
1350 wr32(E1000_PCIEMISC,
1351 reg & ~E1000_PCIEMISC_LX_DECISION);
1353 if (!netif_running(adapter->netdev))
1354 igb_power_down_link(adapter);
1356 igb_update_mng_vlan(adapter);
1358 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1359 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1361 igb_get_phy_info(hw);
1364 static const struct net_device_ops igb_netdev_ops = {
1365 .ndo_open = igb_open,
1366 .ndo_stop = igb_close,
1367 .ndo_start_xmit = igb_xmit_frame_adv,
1368 .ndo_get_stats = igb_get_stats,
1369 .ndo_set_rx_mode = igb_set_rx_mode,
1370 .ndo_set_multicast_list = igb_set_rx_mode,
1371 .ndo_set_mac_address = igb_set_mac,
1372 .ndo_change_mtu = igb_change_mtu,
1373 .ndo_do_ioctl = igb_ioctl,
1374 .ndo_tx_timeout = igb_tx_timeout,
1375 .ndo_validate_addr = eth_validate_addr,
1376 .ndo_vlan_rx_register = igb_vlan_rx_register,
1377 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1378 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1379 .ndo_set_vf_mac = igb_ndo_set_vf_mac,
1380 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan,
1381 .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw,
1382 .ndo_get_vf_config = igb_ndo_get_vf_config,
1383 #ifdef CONFIG_NET_POLL_CONTROLLER
1384 .ndo_poll_controller = igb_netpoll,
1389 * igb_probe - Device Initialization Routine
1390 * @pdev: PCI device information struct
1391 * @ent: entry in igb_pci_tbl
1393 * Returns 0 on success, negative on failure
1395 * igb_probe initializes an adapter identified by a pci_dev structure.
1396 * The OS initialization, configuring of the adapter private structure,
1397 * and a hardware reset occur.
1399 static int __devinit igb_probe(struct pci_dev *pdev,
1400 const struct pci_device_id *ent)
1402 struct net_device *netdev;
1403 struct igb_adapter *adapter;
1404 struct e1000_hw *hw;
1405 u16 eeprom_data = 0;
1406 static int global_quad_port_a; /* global quad port a indication */
1407 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1408 unsigned long mmio_start, mmio_len;
1409 int err, pci_using_dac;
1410 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1413 err = pci_enable_device_mem(pdev);
1418 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1420 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1424 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1426 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1428 dev_err(&pdev->dev, "No usable DMA "
1429 "configuration, aborting\n");
1435 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1441 pci_enable_pcie_error_reporting(pdev);
1443 pci_set_master(pdev);
1444 pci_save_state(pdev);
1447 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1448 IGB_ABS_MAX_TX_QUEUES);
1450 goto err_alloc_etherdev;
1452 SET_NETDEV_DEV(netdev, &pdev->dev);
1454 pci_set_drvdata(pdev, netdev);
1455 adapter = netdev_priv(netdev);
1456 adapter->netdev = netdev;
1457 adapter->pdev = pdev;
1460 adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1462 mmio_start = pci_resource_start(pdev, 0);
1463 mmio_len = pci_resource_len(pdev, 0);
1466 hw->hw_addr = ioremap(mmio_start, mmio_len);
1470 netdev->netdev_ops = &igb_netdev_ops;
1471 igb_set_ethtool_ops(netdev);
1472 netdev->watchdog_timeo = 5 * HZ;
1474 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1476 netdev->mem_start = mmio_start;
1477 netdev->mem_end = mmio_start + mmio_len;
1479 /* PCI config space info */
1480 hw->vendor_id = pdev->vendor;
1481 hw->device_id = pdev->device;
1482 hw->revision_id = pdev->revision;
1483 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1484 hw->subsystem_device_id = pdev->subsystem_device;
1486 /* Copy the default MAC, PHY and NVM function pointers */
1487 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1488 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1489 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1490 /* Initialize skew-specific constants */
1491 err = ei->get_invariants(hw);
1495 /* setup the private structure */
1496 err = igb_sw_init(adapter);
1500 igb_get_bus_info_pcie(hw);
1502 hw->phy.autoneg_wait_to_complete = false;
1504 /* Copper options */
1505 if (hw->phy.media_type == e1000_media_type_copper) {
1506 hw->phy.mdix = AUTO_ALL_MODES;
1507 hw->phy.disable_polarity_correction = false;
1508 hw->phy.ms_type = e1000_ms_hw_default;
1511 if (igb_check_reset_block(hw))
1512 dev_info(&pdev->dev,
1513 "PHY reset is blocked due to SOL/IDER session.\n");
1515 netdev->features = NETIF_F_SG |
1517 NETIF_F_HW_VLAN_TX |
1518 NETIF_F_HW_VLAN_RX |
1519 NETIF_F_HW_VLAN_FILTER;
1521 netdev->features |= NETIF_F_IPV6_CSUM;
1522 netdev->features |= NETIF_F_TSO;
1523 netdev->features |= NETIF_F_TSO6;
1524 netdev->features |= NETIF_F_GRO;
1526 netdev->vlan_features |= NETIF_F_TSO;
1527 netdev->vlan_features |= NETIF_F_TSO6;
1528 netdev->vlan_features |= NETIF_F_IP_CSUM;
1529 netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1530 netdev->vlan_features |= NETIF_F_SG;
1533 netdev->features |= NETIF_F_HIGHDMA;
1535 if (hw->mac.type >= e1000_82576)
1536 netdev->features |= NETIF_F_SCTP_CSUM;
1538 adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1540 /* before reading the NVM, reset the controller to put the device in a
1541 * known good starting state */
1542 hw->mac.ops.reset_hw(hw);
1544 /* make sure the NVM is good */
1545 if (igb_validate_nvm_checksum(hw) < 0) {
1546 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1551 /* copy the MAC address out of the NVM */
1552 if (hw->mac.ops.read_mac_addr(hw))
1553 dev_err(&pdev->dev, "NVM Read Error\n");
1555 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1556 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1558 if (!is_valid_ether_addr(netdev->perm_addr)) {
1559 dev_err(&pdev->dev, "Invalid MAC Address\n");
1564 setup_timer(&adapter->watchdog_timer, &igb_watchdog,
1565 (unsigned long) adapter);
1566 setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
1567 (unsigned long) adapter);
1569 INIT_WORK(&adapter->reset_task, igb_reset_task);
1570 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1572 /* Initialize link properties that are user-changeable */
1573 adapter->fc_autoneg = true;
1574 hw->mac.autoneg = true;
1575 hw->phy.autoneg_advertised = 0x2f;
1577 hw->fc.requested_mode = e1000_fc_default;
1578 hw->fc.current_mode = e1000_fc_default;
1580 igb_validate_mdi_setting(hw);
1582 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1583 * enable the ACPI Magic Packet filter
1586 if (hw->bus.func == 0)
1587 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1588 else if (hw->mac.type == e1000_82580)
1589 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1590 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1592 else if (hw->bus.func == 1)
1593 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1595 if (eeprom_data & eeprom_apme_mask)
1596 adapter->eeprom_wol |= E1000_WUFC_MAG;
1598 /* now that we have the eeprom settings, apply the special cases where
1599 * the eeprom may be wrong or the board simply won't support wake on
1600 * lan on a particular port */
1601 switch (pdev->device) {
1602 case E1000_DEV_ID_82575GB_QUAD_COPPER:
1603 adapter->eeprom_wol = 0;
1605 case E1000_DEV_ID_82575EB_FIBER_SERDES:
1606 case E1000_DEV_ID_82576_FIBER:
1607 case E1000_DEV_ID_82576_SERDES:
1608 /* Wake events only supported on port A for dual fiber
1609 * regardless of eeprom setting */
1610 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1611 adapter->eeprom_wol = 0;
1613 case E1000_DEV_ID_82576_QUAD_COPPER:
1614 /* if quad port adapter, disable WoL on all but port A */
1615 if (global_quad_port_a != 0)
1616 adapter->eeprom_wol = 0;
1618 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1619 /* Reset for multiple quad port adapters */
1620 if (++global_quad_port_a == 4)
1621 global_quad_port_a = 0;
1625 /* initialize the wol settings based on the eeprom settings */
1626 adapter->wol = adapter->eeprom_wol;
1627 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1629 /* reset the hardware with the new settings */
1632 /* let the f/w know that the h/w is now under the control of the
1634 igb_get_hw_control(adapter);
1636 strcpy(netdev->name, "eth%d");
1637 err = register_netdev(netdev);
1641 /* carrier off reporting is important to ethtool even BEFORE open */
1642 netif_carrier_off(netdev);
1644 #ifdef CONFIG_IGB_DCA
1645 if (dca_add_requester(&pdev->dev) == 0) {
1646 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1647 dev_info(&pdev->dev, "DCA enabled\n");
1648 igb_setup_dca(adapter);
1652 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1653 /* print bus type/speed/width info */
1654 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1656 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
1658 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1659 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1660 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1664 igb_read_part_num(hw, &part_num);
1665 dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
1666 (part_num >> 8), (part_num & 0xff));
1668 dev_info(&pdev->dev,
1669 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
1670 adapter->msix_entries ? "MSI-X" :
1671 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
1672 adapter->num_rx_queues, adapter->num_tx_queues);
1677 igb_release_hw_control(adapter);
1679 if (!igb_check_reset_block(hw))
1682 if (hw->flash_address)
1683 iounmap(hw->flash_address);
1685 igb_clear_interrupt_scheme(adapter);
1686 iounmap(hw->hw_addr);
1688 free_netdev(netdev);
1690 pci_release_selected_regions(pdev,
1691 pci_select_bars(pdev, IORESOURCE_MEM));
1694 pci_disable_device(pdev);
1699 * igb_remove - Device Removal Routine
1700 * @pdev: PCI device information struct
1702 * igb_remove is called by the PCI subsystem to alert the driver
1703 * that it should release a PCI device. The could be caused by a
1704 * Hot-Plug event, or because the driver is going to be removed from
1707 static void __devexit igb_remove(struct pci_dev *pdev)
1709 struct net_device *netdev = pci_get_drvdata(pdev);
1710 struct igb_adapter *adapter = netdev_priv(netdev);
1711 struct e1000_hw *hw = &adapter->hw;
1713 /* flush_scheduled work may reschedule our watchdog task, so
1714 * explicitly disable watchdog tasks from being rescheduled */
1715 set_bit(__IGB_DOWN, &adapter->state);
1716 del_timer_sync(&adapter->watchdog_timer);
1717 del_timer_sync(&adapter->phy_info_timer);
1719 flush_scheduled_work();
1721 #ifdef CONFIG_IGB_DCA
1722 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
1723 dev_info(&pdev->dev, "DCA disabled\n");
1724 dca_remove_requester(&pdev->dev);
1725 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
1726 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
1730 /* Release control of h/w to f/w. If f/w is AMT enabled, this
1731 * would have already happened in close and is redundant. */
1732 igb_release_hw_control(adapter);
1734 unregister_netdev(netdev);
1736 igb_clear_interrupt_scheme(adapter);
1738 #ifdef CONFIG_PCI_IOV
1739 /* reclaim resources allocated to VFs */
1740 if (adapter->vf_data) {
1741 /* disable iov and allow time for transactions to clear */
1742 pci_disable_sriov(pdev);
1745 kfree(adapter->vf_data);
1746 adapter->vf_data = NULL;
1747 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1749 dev_info(&pdev->dev, "IOV Disabled\n");
1753 iounmap(hw->hw_addr);
1754 if (hw->flash_address)
1755 iounmap(hw->flash_address);
1756 pci_release_selected_regions(pdev,
1757 pci_select_bars(pdev, IORESOURCE_MEM));
1759 free_netdev(netdev);
1761 pci_disable_pcie_error_reporting(pdev);
1763 pci_disable_device(pdev);
1767 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
1768 * @adapter: board private structure to initialize
1770 * This function initializes the vf specific data storage and then attempts to
1771 * allocate the VFs. The reason for ordering it this way is because it is much
1772 * mor expensive time wise to disable SR-IOV than it is to allocate and free
1773 * the memory for the VFs.
1775 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
1777 #ifdef CONFIG_PCI_IOV
1778 struct pci_dev *pdev = adapter->pdev;
1780 if (adapter->vfs_allocated_count > 7)
1781 adapter->vfs_allocated_count = 7;
1783 if (adapter->vfs_allocated_count) {
1784 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
1785 sizeof(struct vf_data_storage),
1787 /* if allocation failed then we do not support SR-IOV */
1788 if (!adapter->vf_data) {
1789 adapter->vfs_allocated_count = 0;
1790 dev_err(&pdev->dev, "Unable to allocate memory for VF "
1795 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
1796 kfree(adapter->vf_data);
1797 adapter->vf_data = NULL;
1798 #endif /* CONFIG_PCI_IOV */
1799 adapter->vfs_allocated_count = 0;
1800 #ifdef CONFIG_PCI_IOV
1802 unsigned char mac_addr[ETH_ALEN];
1804 dev_info(&pdev->dev, "%d vfs allocated\n",
1805 adapter->vfs_allocated_count);
1806 for (i = 0; i < adapter->vfs_allocated_count; i++) {
1807 random_ether_addr(mac_addr);
1808 igb_set_vf_mac(adapter, i, mac_addr);
1811 #endif /* CONFIG_PCI_IOV */
1816 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
1817 * @adapter: board private structure to initialize
1819 * igb_init_hw_timer initializes the function pointer and values for the hw
1820 * timer found in hardware.
1822 static void igb_init_hw_timer(struct igb_adapter *adapter)
1824 struct e1000_hw *hw = &adapter->hw;
1826 switch (hw->mac.type) {
1828 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1829 adapter->cycles.read = igb_read_clock;
1830 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1831 adapter->cycles.mult = 1;
1833 * The 82580 timesync updates the system timer every 8ns by 8ns
1834 * and the value cannot be shifted. Instead we need to shift
1835 * the registers to generate a 64bit timer value. As a result
1836 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
1837 * 24 in order to generate a larger value for synchronization.
1839 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
1840 /* disable system timer temporarily by setting bit 31 */
1841 wr32(E1000_TSAUXC, 0x80000000);
1844 /* Set registers so that rollover occurs soon to test this. */
1845 wr32(E1000_SYSTIMR, 0x00000000);
1846 wr32(E1000_SYSTIML, 0x80000000);
1847 wr32(E1000_SYSTIMH, 0x000000FF);
1850 /* enable system timer by clearing bit 31 */
1851 wr32(E1000_TSAUXC, 0x0);
1854 timecounter_init(&adapter->clock,
1856 ktime_to_ns(ktime_get_real()));
1858 * Synchronize our NIC clock against system wall clock. NIC
1859 * time stamp reading requires ~3us per sample, each sample
1860 * was pretty stable even under load => only require 10
1861 * samples for each offset comparison.
1863 memset(&adapter->compare, 0, sizeof(adapter->compare));
1864 adapter->compare.source = &adapter->clock;
1865 adapter->compare.target = ktime_get_real;
1866 adapter->compare.num_samples = 10;
1867 timecompare_update(&adapter->compare, 0);
1871 * Initialize hardware timer: we keep it running just in case
1872 * that some program needs it later on.
1874 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1875 adapter->cycles.read = igb_read_clock;
1876 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1877 adapter->cycles.mult = 1;
1879 * Scale the NIC clock cycle by a large factor so that
1880 * relatively small clock corrections can be added or
1881 * substracted at each clock tick. The drawbacks of a large
1882 * factor are a) that the clock register overflows more quickly
1883 * (not such a big deal) and b) that the increment per tick has
1884 * to fit into 24 bits. As a result we need to use a shift of
1885 * 19 so we can fit a value of 16 into the TIMINCA register.
1887 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
1889 (1 << E1000_TIMINCA_16NS_SHIFT) |
1890 (16 << IGB_82576_TSYNC_SHIFT));
1892 /* Set registers so that rollover occurs soon to test this. */
1893 wr32(E1000_SYSTIML, 0x00000000);
1894 wr32(E1000_SYSTIMH, 0xFF800000);
1897 timecounter_init(&adapter->clock,
1899 ktime_to_ns(ktime_get_real()));
1901 * Synchronize our NIC clock against system wall clock. NIC
1902 * time stamp reading requires ~3us per sample, each sample
1903 * was pretty stable even under load => only require 10
1904 * samples for each offset comparison.
1906 memset(&adapter->compare, 0, sizeof(adapter->compare));
1907 adapter->compare.source = &adapter->clock;
1908 adapter->compare.target = ktime_get_real;
1909 adapter->compare.num_samples = 10;
1910 timecompare_update(&adapter->compare, 0);
1913 /* 82575 does not support timesync */
1921 * igb_sw_init - Initialize general software structures (struct igb_adapter)
1922 * @adapter: board private structure to initialize
1924 * igb_sw_init initializes the Adapter private data structure.
1925 * Fields are initialized based on PCI device information and
1926 * OS network device settings (MTU size).
1928 static int __devinit igb_sw_init(struct igb_adapter *adapter)
1930 struct e1000_hw *hw = &adapter->hw;
1931 struct net_device *netdev = adapter->netdev;
1932 struct pci_dev *pdev = adapter->pdev;
1934 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
1936 adapter->tx_ring_count = IGB_DEFAULT_TXD;
1937 adapter->rx_ring_count = IGB_DEFAULT_RXD;
1938 adapter->rx_itr_setting = IGB_DEFAULT_ITR;
1939 adapter->tx_itr_setting = IGB_DEFAULT_ITR;
1941 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
1942 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
1944 #ifdef CONFIG_PCI_IOV
1945 if (hw->mac.type == e1000_82576)
1946 adapter->vfs_allocated_count = max_vfs;
1948 #endif /* CONFIG_PCI_IOV */
1949 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
1952 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
1953 * then we should combine the queues into a queue pair in order to
1954 * conserve interrupts due to limited supply
1956 if ((adapter->rss_queues > 4) ||
1957 ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
1958 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1960 /* This call may decrease the number of queues */
1961 if (igb_init_interrupt_scheme(adapter)) {
1962 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1966 igb_init_hw_timer(adapter);
1967 igb_probe_vfs(adapter);
1969 /* Explicitly disable IRQ since the NIC can be in any state. */
1970 igb_irq_disable(adapter);
1972 set_bit(__IGB_DOWN, &adapter->state);
1977 * igb_open - Called when a network interface is made active
1978 * @netdev: network interface device structure
1980 * Returns 0 on success, negative value on failure
1982 * The open entry point is called when a network interface is made
1983 * active by the system (IFF_UP). At this point all resources needed
1984 * for transmit and receive operations are allocated, the interrupt
1985 * handler is registered with the OS, the watchdog timer is started,
1986 * and the stack is notified that the interface is ready.
1988 static int igb_open(struct net_device *netdev)
1990 struct igb_adapter *adapter = netdev_priv(netdev);
1991 struct e1000_hw *hw = &adapter->hw;
1995 /* disallow open during test */
1996 if (test_bit(__IGB_TESTING, &adapter->state))
1999 netif_carrier_off(netdev);
2001 /* allocate transmit descriptors */
2002 err = igb_setup_all_tx_resources(adapter);
2006 /* allocate receive descriptors */
2007 err = igb_setup_all_rx_resources(adapter);
2011 igb_power_up_link(adapter);
2013 /* before we allocate an interrupt, we must be ready to handle it.
2014 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2015 * as soon as we call pci_request_irq, so we have to setup our
2016 * clean_rx handler before we do so. */
2017 igb_configure(adapter);
2019 err = igb_request_irq(adapter);
2023 /* From here on the code is the same as igb_up() */
2024 clear_bit(__IGB_DOWN, &adapter->state);
2026 for (i = 0; i < adapter->num_q_vectors; i++) {
2027 struct igb_q_vector *q_vector = adapter->q_vector[i];
2028 napi_enable(&q_vector->napi);
2031 /* Clear any pending interrupts. */
2034 igb_irq_enable(adapter);
2036 /* notify VFs that reset has been completed */
2037 if (adapter->vfs_allocated_count) {
2038 u32 reg_data = rd32(E1000_CTRL_EXT);
2039 reg_data |= E1000_CTRL_EXT_PFRSTD;
2040 wr32(E1000_CTRL_EXT, reg_data);
2043 netif_tx_start_all_queues(netdev);
2045 /* start the watchdog. */
2046 hw->mac.get_link_status = 1;
2047 schedule_work(&adapter->watchdog_task);
2052 igb_release_hw_control(adapter);
2053 igb_power_down_link(adapter);
2054 igb_free_all_rx_resources(adapter);
2056 igb_free_all_tx_resources(adapter);
2064 * igb_close - Disables a network interface
2065 * @netdev: network interface device structure
2067 * Returns 0, this is not allowed to fail
2069 * The close entry point is called when an interface is de-activated
2070 * by the OS. The hardware is still under the driver's control, but
2071 * needs to be disabled. A global MAC reset is issued to stop the
2072 * hardware, and all transmit and receive resources are freed.
2074 static int igb_close(struct net_device *netdev)
2076 struct igb_adapter *adapter = netdev_priv(netdev);
2078 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2081 igb_free_irq(adapter);
2083 igb_free_all_tx_resources(adapter);
2084 igb_free_all_rx_resources(adapter);
2090 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2091 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2093 * Return 0 on success, negative on failure
2095 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2097 struct pci_dev *pdev = tx_ring->pdev;
2100 size = sizeof(struct igb_buffer) * tx_ring->count;
2101 tx_ring->buffer_info = vmalloc(size);
2102 if (!tx_ring->buffer_info)
2104 memset(tx_ring->buffer_info, 0, size);
2106 /* round up to nearest 4K */
2107 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2108 tx_ring->size = ALIGN(tx_ring->size, 4096);
2110 tx_ring->desc = pci_alloc_consistent(pdev,
2117 tx_ring->next_to_use = 0;
2118 tx_ring->next_to_clean = 0;
2122 vfree(tx_ring->buffer_info);
2124 "Unable to allocate memory for the transmit descriptor ring\n");
2129 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2130 * (Descriptors) for all queues
2131 * @adapter: board private structure
2133 * Return 0 on success, negative on failure
2135 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2137 struct pci_dev *pdev = adapter->pdev;
2140 for (i = 0; i < adapter->num_tx_queues; i++) {
2141 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2144 "Allocation for Tx Queue %u failed\n", i);
2145 for (i--; i >= 0; i--)
2146 igb_free_tx_resources(adapter->tx_ring[i]);
2151 for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2152 int r_idx = i % adapter->num_tx_queues;
2153 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2159 * igb_setup_tctl - configure the transmit control registers
2160 * @adapter: Board private structure
2162 void igb_setup_tctl(struct igb_adapter *adapter)
2164 struct e1000_hw *hw = &adapter->hw;
2167 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2168 wr32(E1000_TXDCTL(0), 0);
2170 /* Program the Transmit Control Register */
2171 tctl = rd32(E1000_TCTL);
2172 tctl &= ~E1000_TCTL_CT;
2173 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2174 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2176 igb_config_collision_dist(hw);
2178 /* Enable transmits */
2179 tctl |= E1000_TCTL_EN;
2181 wr32(E1000_TCTL, tctl);
2185 * igb_configure_tx_ring - Configure transmit ring after Reset
2186 * @adapter: board private structure
2187 * @ring: tx ring to configure
2189 * Configure a transmit ring after a reset.
2191 void igb_configure_tx_ring(struct igb_adapter *adapter,
2192 struct igb_ring *ring)
2194 struct e1000_hw *hw = &adapter->hw;
2196 u64 tdba = ring->dma;
2197 int reg_idx = ring->reg_idx;
2199 /* disable the queue */
2200 txdctl = rd32(E1000_TXDCTL(reg_idx));
2201 wr32(E1000_TXDCTL(reg_idx),
2202 txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2206 wr32(E1000_TDLEN(reg_idx),
2207 ring->count * sizeof(union e1000_adv_tx_desc));
2208 wr32(E1000_TDBAL(reg_idx),
2209 tdba & 0x00000000ffffffffULL);
2210 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2212 ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2213 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2214 writel(0, ring->head);
2215 writel(0, ring->tail);
2217 txdctl |= IGB_TX_PTHRESH;
2218 txdctl |= IGB_TX_HTHRESH << 8;
2219 txdctl |= IGB_TX_WTHRESH << 16;
2221 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2222 wr32(E1000_TXDCTL(reg_idx), txdctl);
2226 * igb_configure_tx - Configure transmit Unit after Reset
2227 * @adapter: board private structure
2229 * Configure the Tx unit of the MAC after a reset.
2231 static void igb_configure_tx(struct igb_adapter *adapter)
2235 for (i = 0; i < adapter->num_tx_queues; i++)
2236 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2240 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2241 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2243 * Returns 0 on success, negative on failure
2245 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2247 struct pci_dev *pdev = rx_ring->pdev;
2250 size = sizeof(struct igb_buffer) * rx_ring->count;
2251 rx_ring->buffer_info = vmalloc(size);
2252 if (!rx_ring->buffer_info)
2254 memset(rx_ring->buffer_info, 0, size);
2256 desc_len = sizeof(union e1000_adv_rx_desc);
2258 /* Round up to nearest 4K */
2259 rx_ring->size = rx_ring->count * desc_len;
2260 rx_ring->size = ALIGN(rx_ring->size, 4096);
2262 rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
2268 rx_ring->next_to_clean = 0;
2269 rx_ring->next_to_use = 0;
2274 vfree(rx_ring->buffer_info);
2275 rx_ring->buffer_info = NULL;
2276 dev_err(&pdev->dev, "Unable to allocate memory for "
2277 "the receive descriptor ring\n");
2282 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2283 * (Descriptors) for all queues
2284 * @adapter: board private structure
2286 * Return 0 on success, negative on failure
2288 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2290 struct pci_dev *pdev = adapter->pdev;
2293 for (i = 0; i < adapter->num_rx_queues; i++) {
2294 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2297 "Allocation for Rx Queue %u failed\n", i);
2298 for (i--; i >= 0; i--)
2299 igb_free_rx_resources(adapter->rx_ring[i]);
2308 * igb_setup_mrqc - configure the multiple receive queue control registers
2309 * @adapter: Board private structure
2311 static void igb_setup_mrqc(struct igb_adapter *adapter)
2313 struct e1000_hw *hw = &adapter->hw;
2315 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2320 static const u8 rsshash[40] = {
2321 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2322 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2323 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2324 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2326 /* Fill out hash function seeds */
2327 for (j = 0; j < 10; j++) {
2328 u32 rsskey = rsshash[(j * 4)];
2329 rsskey |= rsshash[(j * 4) + 1] << 8;
2330 rsskey |= rsshash[(j * 4) + 2] << 16;
2331 rsskey |= rsshash[(j * 4) + 3] << 24;
2332 array_wr32(E1000_RSSRK(0), j, rsskey);
2335 num_rx_queues = adapter->rss_queues;
2337 if (adapter->vfs_allocated_count) {
2338 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2339 switch (hw->mac.type) {
2355 if (hw->mac.type == e1000_82575)
2359 for (j = 0; j < (32 * 4); j++) {
2360 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2362 reta.bytes[j & 3] |= num_rx_queues << shift2;
2364 wr32(E1000_RETA(j >> 2), reta.dword);
2368 * Disable raw packet checksumming so that RSS hash is placed in
2369 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2370 * offloads as they are enabled by default
2372 rxcsum = rd32(E1000_RXCSUM);
2373 rxcsum |= E1000_RXCSUM_PCSD;
2375 if (adapter->hw.mac.type >= e1000_82576)
2376 /* Enable Receive Checksum Offload for SCTP */
2377 rxcsum |= E1000_RXCSUM_CRCOFL;
2379 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2380 wr32(E1000_RXCSUM, rxcsum);
2382 /* If VMDq is enabled then we set the appropriate mode for that, else
2383 * we default to RSS so that an RSS hash is calculated per packet even
2384 * if we are only using one queue */
2385 if (adapter->vfs_allocated_count) {
2386 if (hw->mac.type > e1000_82575) {
2387 /* Set the default pool for the PF's first queue */
2388 u32 vtctl = rd32(E1000_VT_CTL);
2389 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2390 E1000_VT_CTL_DISABLE_DEF_POOL);
2391 vtctl |= adapter->vfs_allocated_count <<
2392 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2393 wr32(E1000_VT_CTL, vtctl);
2395 if (adapter->rss_queues > 1)
2396 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2398 mrqc = E1000_MRQC_ENABLE_VMDQ;
2400 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2402 igb_vmm_control(adapter);
2404 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2405 E1000_MRQC_RSS_FIELD_IPV4_TCP);
2406 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
2407 E1000_MRQC_RSS_FIELD_IPV6_TCP);
2408 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
2409 E1000_MRQC_RSS_FIELD_IPV6_UDP);
2410 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2411 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2413 wr32(E1000_MRQC, mrqc);
2417 * igb_setup_rctl - configure the receive control registers
2418 * @adapter: Board private structure
2420 void igb_setup_rctl(struct igb_adapter *adapter)
2422 struct e1000_hw *hw = &adapter->hw;
2425 rctl = rd32(E1000_RCTL);
2427 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2428 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2430 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2431 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2434 * enable stripping of CRC. It's unlikely this will break BMC
2435 * redirection as it did with e1000. Newer features require
2436 * that the HW strips the CRC.
2438 rctl |= E1000_RCTL_SECRC;
2440 /* disable store bad packets and clear size bits. */
2441 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2443 /* enable LPE to prevent packets larger than max_frame_size */
2444 rctl |= E1000_RCTL_LPE;
2446 /* disable queue 0 to prevent tail write w/o re-config */
2447 wr32(E1000_RXDCTL(0), 0);
2449 /* Attention!!! For SR-IOV PF driver operations you must enable
2450 * queue drop for all VF and PF queues to prevent head of line blocking
2451 * if an un-trusted VF does not provide descriptors to hardware.
2453 if (adapter->vfs_allocated_count) {
2454 /* set all queue drop enable bits */
2455 wr32(E1000_QDE, ALL_QUEUES);
2458 wr32(E1000_RCTL, rctl);
2461 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2464 struct e1000_hw *hw = &adapter->hw;
2467 /* if it isn't the PF check to see if VFs are enabled and
2468 * increase the size to support vlan tags */
2469 if (vfn < adapter->vfs_allocated_count &&
2470 adapter->vf_data[vfn].vlans_enabled)
2471 size += VLAN_TAG_SIZE;
2473 vmolr = rd32(E1000_VMOLR(vfn));
2474 vmolr &= ~E1000_VMOLR_RLPML_MASK;
2475 vmolr |= size | E1000_VMOLR_LPE;
2476 wr32(E1000_VMOLR(vfn), vmolr);
2482 * igb_rlpml_set - set maximum receive packet size
2483 * @adapter: board private structure
2485 * Configure maximum receivable packet size.
2487 static void igb_rlpml_set(struct igb_adapter *adapter)
2489 u32 max_frame_size = adapter->max_frame_size;
2490 struct e1000_hw *hw = &adapter->hw;
2491 u16 pf_id = adapter->vfs_allocated_count;
2494 max_frame_size += VLAN_TAG_SIZE;
2496 /* if vfs are enabled we set RLPML to the largest possible request
2497 * size and set the VMOLR RLPML to the size we need */
2499 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2500 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2503 wr32(E1000_RLPML, max_frame_size);
2506 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2509 struct e1000_hw *hw = &adapter->hw;
2513 * This register exists only on 82576 and newer so if we are older then
2514 * we should exit and do nothing
2516 if (hw->mac.type < e1000_82576)
2519 vmolr = rd32(E1000_VMOLR(vfn));
2520 vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */
2522 vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
2524 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2526 /* clear all bits that might not be set */
2527 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2529 if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2530 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2532 * for VMDq only allow the VFs and pool 0 to accept broadcast and
2535 if (vfn <= adapter->vfs_allocated_count)
2536 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
2538 wr32(E1000_VMOLR(vfn), vmolr);
2542 * igb_configure_rx_ring - Configure a receive ring after Reset
2543 * @adapter: board private structure
2544 * @ring: receive ring to be configured
2546 * Configure the Rx unit of the MAC after a reset.
2548 void igb_configure_rx_ring(struct igb_adapter *adapter,
2549 struct igb_ring *ring)
2551 struct e1000_hw *hw = &adapter->hw;
2552 u64 rdba = ring->dma;
2553 int reg_idx = ring->reg_idx;
2556 /* disable the queue */
2557 rxdctl = rd32(E1000_RXDCTL(reg_idx));
2558 wr32(E1000_RXDCTL(reg_idx),
2559 rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2561 /* Set DMA base address registers */
2562 wr32(E1000_RDBAL(reg_idx),
2563 rdba & 0x00000000ffffffffULL);
2564 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2565 wr32(E1000_RDLEN(reg_idx),
2566 ring->count * sizeof(union e1000_adv_rx_desc));
2568 /* initialize head and tail */
2569 ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2570 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2571 writel(0, ring->head);
2572 writel(0, ring->tail);
2574 /* set descriptor configuration */
2575 if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2576 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2577 E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2578 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2579 srrctl |= IGB_RXBUFFER_16384 >>
2580 E1000_SRRCTL_BSIZEPKT_SHIFT;
2582 srrctl |= (PAGE_SIZE / 2) >>
2583 E1000_SRRCTL_BSIZEPKT_SHIFT;
2585 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2587 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2588 E1000_SRRCTL_BSIZEPKT_SHIFT;
2589 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2591 /* Only set Drop Enable if we are supporting multiple queues */
2592 if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
2593 srrctl |= E1000_SRRCTL_DROP_EN;
2595 wr32(E1000_SRRCTL(reg_idx), srrctl);
2597 /* set filtering for VMDQ pools */
2598 igb_set_vmolr(adapter, reg_idx & 0x7, true);
2600 /* enable receive descriptor fetching */
2601 rxdctl = rd32(E1000_RXDCTL(reg_idx));
2602 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2603 rxdctl &= 0xFFF00000;
2604 rxdctl |= IGB_RX_PTHRESH;
2605 rxdctl |= IGB_RX_HTHRESH << 8;
2606 rxdctl |= IGB_RX_WTHRESH << 16;
2607 wr32(E1000_RXDCTL(reg_idx), rxdctl);
2611 * igb_configure_rx - Configure receive Unit after Reset
2612 * @adapter: board private structure
2614 * Configure the Rx unit of the MAC after a reset.
2616 static void igb_configure_rx(struct igb_adapter *adapter)
2620 /* set UTA to appropriate mode */
2621 igb_set_uta(adapter);
2623 /* set the correct pool for the PF default MAC address in entry 0 */
2624 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2625 adapter->vfs_allocated_count);
2627 /* Setup the HW Rx Head and Tail Descriptor Pointers and
2628 * the Base and Length of the Rx Descriptor Ring */
2629 for (i = 0; i < adapter->num_rx_queues; i++)
2630 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
2634 * igb_free_tx_resources - Free Tx Resources per Queue
2635 * @tx_ring: Tx descriptor ring for a specific queue
2637 * Free all transmit software resources
2639 void igb_free_tx_resources(struct igb_ring *tx_ring)
2641 igb_clean_tx_ring(tx_ring);
2643 vfree(tx_ring->buffer_info);
2644 tx_ring->buffer_info = NULL;
2646 /* if not set, then don't free */
2650 pci_free_consistent(tx_ring->pdev, tx_ring->size,
2651 tx_ring->desc, tx_ring->dma);
2653 tx_ring->desc = NULL;
2657 * igb_free_all_tx_resources - Free Tx Resources for All Queues
2658 * @adapter: board private structure
2660 * Free all transmit software resources
2662 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
2666 for (i = 0; i < adapter->num_tx_queues; i++)
2667 igb_free_tx_resources(adapter->tx_ring[i]);
2670 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
2671 struct igb_buffer *buffer_info)
2673 if (buffer_info->dma) {
2674 if (buffer_info->mapped_as_page)
2675 pci_unmap_page(tx_ring->pdev,
2677 buffer_info->length,
2680 pci_unmap_single(tx_ring->pdev,
2682 buffer_info->length,
2684 buffer_info->dma = 0;
2686 if (buffer_info->skb) {
2687 dev_kfree_skb_any(buffer_info->skb);
2688 buffer_info->skb = NULL;
2690 buffer_info->time_stamp = 0;
2691 buffer_info->length = 0;
2692 buffer_info->next_to_watch = 0;
2693 buffer_info->mapped_as_page = false;
2697 * igb_clean_tx_ring - Free Tx Buffers
2698 * @tx_ring: ring to be cleaned
2700 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
2702 struct igb_buffer *buffer_info;
2706 if (!tx_ring->buffer_info)
2708 /* Free all the Tx ring sk_buffs */
2710 for (i = 0; i < tx_ring->count; i++) {
2711 buffer_info = &tx_ring->buffer_info[i];
2712 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
2715 size = sizeof(struct igb_buffer) * tx_ring->count;
2716 memset(tx_ring->buffer_info, 0, size);
2718 /* Zero out the descriptor ring */
2719 memset(tx_ring->desc, 0, tx_ring->size);
2721 tx_ring->next_to_use = 0;
2722 tx_ring->next_to_clean = 0;
2726 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
2727 * @adapter: board private structure
2729 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
2733 for (i = 0; i < adapter->num_tx_queues; i++)
2734 igb_clean_tx_ring(adapter->tx_ring[i]);
2738 * igb_free_rx_resources - Free Rx Resources
2739 * @rx_ring: ring to clean the resources from
2741 * Free all receive software resources
2743 void igb_free_rx_resources(struct igb_ring *rx_ring)
2745 igb_clean_rx_ring(rx_ring);
2747 vfree(rx_ring->buffer_info);
2748 rx_ring->buffer_info = NULL;
2750 /* if not set, then don't free */
2754 pci_free_consistent(rx_ring->pdev, rx_ring->size,
2755 rx_ring->desc, rx_ring->dma);
2757 rx_ring->desc = NULL;
2761 * igb_free_all_rx_resources - Free Rx Resources for All Queues
2762 * @adapter: board private structure
2764 * Free all receive software resources
2766 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
2770 for (i = 0; i < adapter->num_rx_queues; i++)
2771 igb_free_rx_resources(adapter->rx_ring[i]);
2775 * igb_clean_rx_ring - Free Rx Buffers per Queue
2776 * @rx_ring: ring to free buffers from
2778 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
2780 struct igb_buffer *buffer_info;
2784 if (!rx_ring->buffer_info)
2787 /* Free all the Rx ring sk_buffs */
2788 for (i = 0; i < rx_ring->count; i++) {
2789 buffer_info = &rx_ring->buffer_info[i];
2790 if (buffer_info->dma) {
2791 pci_unmap_single(rx_ring->pdev,
2793 rx_ring->rx_buffer_len,
2794 PCI_DMA_FROMDEVICE);
2795 buffer_info->dma = 0;
2798 if (buffer_info->skb) {
2799 dev_kfree_skb(buffer_info->skb);
2800 buffer_info->skb = NULL;
2802 if (buffer_info->page_dma) {
2803 pci_unmap_page(rx_ring->pdev,
2804 buffer_info->page_dma,
2806 PCI_DMA_FROMDEVICE);
2807 buffer_info->page_dma = 0;
2809 if (buffer_info->page) {
2810 put_page(buffer_info->page);
2811 buffer_info->page = NULL;
2812 buffer_info->page_offset = 0;
2816 size = sizeof(struct igb_buffer) * rx_ring->count;
2817 memset(rx_ring->buffer_info, 0, size);
2819 /* Zero out the descriptor ring */
2820 memset(rx_ring->desc, 0, rx_ring->size);
2822 rx_ring->next_to_clean = 0;
2823 rx_ring->next_to_use = 0;
2827 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
2828 * @adapter: board private structure
2830 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
2834 for (i = 0; i < adapter->num_rx_queues; i++)
2835 igb_clean_rx_ring(adapter->rx_ring[i]);
2839 * igb_set_mac - Change the Ethernet Address of the NIC
2840 * @netdev: network interface device structure
2841 * @p: pointer to an address structure
2843 * Returns 0 on success, negative on failure
2845 static int igb_set_mac(struct net_device *netdev, void *p)
2847 struct igb_adapter *adapter = netdev_priv(netdev);
2848 struct e1000_hw *hw = &adapter->hw;
2849 struct sockaddr *addr = p;
2851 if (!is_valid_ether_addr(addr->sa_data))
2852 return -EADDRNOTAVAIL;
2854 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2855 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
2857 /* set the correct pool for the new PF MAC address in entry 0 */
2858 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
2859 adapter->vfs_allocated_count);
2865 * igb_write_mc_addr_list - write multicast addresses to MTA
2866 * @netdev: network interface device structure
2868 * Writes multicast address list to the MTA hash table.
2869 * Returns: -ENOMEM on failure
2870 * 0 on no addresses written
2871 * X on writing X addresses to MTA
2873 static int igb_write_mc_addr_list(struct net_device *netdev)
2875 struct igb_adapter *adapter = netdev_priv(netdev);
2876 struct e1000_hw *hw = &adapter->hw;
2877 struct dev_mc_list *mc_ptr;
2881 if (netdev_mc_empty(netdev)) {
2882 /* nothing to program, so clear mc list */
2883 igb_update_mc_addr_list(hw, NULL, 0);
2884 igb_restore_vf_multicasts(adapter);
2888 mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
2892 /* The shared function expects a packed array of only addresses. */
2894 netdev_for_each_mc_addr(mc_ptr, netdev)
2895 memcpy(mta_list + (i++ * ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN);
2897 igb_update_mc_addr_list(hw, mta_list, i);
2900 return netdev_mc_count(netdev);
2904 * igb_write_uc_addr_list - write unicast addresses to RAR table
2905 * @netdev: network interface device structure
2907 * Writes unicast address list to the RAR table.
2908 * Returns: -ENOMEM on failure/insufficient address space
2909 * 0 on no addresses written
2910 * X on writing X addresses to the RAR table
2912 static int igb_write_uc_addr_list(struct net_device *netdev)
2914 struct igb_adapter *adapter = netdev_priv(netdev);
2915 struct e1000_hw *hw = &adapter->hw;
2916 unsigned int vfn = adapter->vfs_allocated_count;
2917 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
2920 /* return ENOMEM indicating insufficient memory for addresses */
2921 if (netdev_uc_count(netdev) > rar_entries)
2924 if (!netdev_uc_empty(netdev) && rar_entries) {
2925 struct netdev_hw_addr *ha;
2927 netdev_for_each_uc_addr(ha, netdev) {
2930 igb_rar_set_qsel(adapter, ha->addr,
2936 /* write the addresses in reverse order to avoid write combining */
2937 for (; rar_entries > 0 ; rar_entries--) {
2938 wr32(E1000_RAH(rar_entries), 0);
2939 wr32(E1000_RAL(rar_entries), 0);
2947 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
2948 * @netdev: network interface device structure
2950 * The set_rx_mode entry point is called whenever the unicast or multicast
2951 * address lists or the network interface flags are updated. This routine is
2952 * responsible for configuring the hardware for proper unicast, multicast,
2953 * promiscuous mode, and all-multi behavior.
2955 static void igb_set_rx_mode(struct net_device *netdev)
2957 struct igb_adapter *adapter = netdev_priv(netdev);
2958 struct e1000_hw *hw = &adapter->hw;
2959 unsigned int vfn = adapter->vfs_allocated_count;
2960 u32 rctl, vmolr = 0;
2963 /* Check for Promiscuous and All Multicast modes */
2964 rctl = rd32(E1000_RCTL);
2966 /* clear the effected bits */
2967 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
2969 if (netdev->flags & IFF_PROMISC) {
2970 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2971 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
2973 if (netdev->flags & IFF_ALLMULTI) {
2974 rctl |= E1000_RCTL_MPE;
2975 vmolr |= E1000_VMOLR_MPME;
2978 * Write addresses to the MTA, if the attempt fails
2979 * then we should just turn on promiscous mode so
2980 * that we can at least receive multicast traffic
2982 count = igb_write_mc_addr_list(netdev);
2984 rctl |= E1000_RCTL_MPE;
2985 vmolr |= E1000_VMOLR_MPME;
2987 vmolr |= E1000_VMOLR_ROMPE;
2991 * Write addresses to available RAR registers, if there is not
2992 * sufficient space to store all the addresses then enable
2993 * unicast promiscous mode
2995 count = igb_write_uc_addr_list(netdev);
2997 rctl |= E1000_RCTL_UPE;
2998 vmolr |= E1000_VMOLR_ROPE;
3000 rctl |= E1000_RCTL_VFE;
3002 wr32(E1000_RCTL, rctl);
3005 * In order to support SR-IOV and eventually VMDq it is necessary to set
3006 * the VMOLR to enable the appropriate modes. Without this workaround
3007 * we will have issues with VLAN tag stripping not being done for frames
3008 * that are only arriving because we are the default pool
3010 if (hw->mac.type < e1000_82576)
3013 vmolr |= rd32(E1000_VMOLR(vfn)) &
3014 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3015 wr32(E1000_VMOLR(vfn), vmolr);
3016 igb_restore_vf_multicasts(adapter);
3019 /* Need to wait a few seconds after link up to get diagnostic information from
3021 static void igb_update_phy_info(unsigned long data)
3023 struct igb_adapter *adapter = (struct igb_adapter *) data;
3024 igb_get_phy_info(&adapter->hw);
3028 * igb_has_link - check shared code for link and determine up/down
3029 * @adapter: pointer to driver private info
3031 bool igb_has_link(struct igb_adapter *adapter)
3033 struct e1000_hw *hw = &adapter->hw;
3034 bool link_active = false;
3037 /* get_link_status is set on LSC (link status) interrupt or
3038 * rx sequence error interrupt. get_link_status will stay
3039 * false until the e1000_check_for_link establishes link
3040 * for copper adapters ONLY
3042 switch (hw->phy.media_type) {
3043 case e1000_media_type_copper:
3044 if (hw->mac.get_link_status) {
3045 ret_val = hw->mac.ops.check_for_link(hw);
3046 link_active = !hw->mac.get_link_status;
3051 case e1000_media_type_internal_serdes:
3052 ret_val = hw->mac.ops.check_for_link(hw);
3053 link_active = hw->mac.serdes_has_link;
3056 case e1000_media_type_unknown:
3064 * igb_watchdog - Timer Call-back
3065 * @data: pointer to adapter cast into an unsigned long
3067 static void igb_watchdog(unsigned long data)
3069 struct igb_adapter *adapter = (struct igb_adapter *)data;
3070 /* Do the rest outside of interrupt context */
3071 schedule_work(&adapter->watchdog_task);
3074 static void igb_watchdog_task(struct work_struct *work)
3076 struct igb_adapter *adapter = container_of(work,
3079 struct e1000_hw *hw = &adapter->hw;
3080 struct net_device *netdev = adapter->netdev;
3084 link = igb_has_link(adapter);
3086 if (!netif_carrier_ok(netdev)) {
3088 hw->mac.ops.get_speed_and_duplex(hw,
3089 &adapter->link_speed,
3090 &adapter->link_duplex);
3092 ctrl = rd32(E1000_CTRL);
3093 /* Links status message must follow this format */
3094 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3095 "Flow Control: %s\n",
3097 adapter->link_speed,
3098 adapter->link_duplex == FULL_DUPLEX ?
3099 "Full Duplex" : "Half Duplex",
3100 ((ctrl & E1000_CTRL_TFCE) &&
3101 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3102 ((ctrl & E1000_CTRL_RFCE) ? "RX" :
3103 ((ctrl & E1000_CTRL_TFCE) ? "TX" : "None")));
3105 /* adjust timeout factor according to speed/duplex */
3106 adapter->tx_timeout_factor = 1;
3107 switch (adapter->link_speed) {
3109 adapter->tx_timeout_factor = 14;
3112 /* maybe add some timeout factor ? */
3116 netif_carrier_on(netdev);
3118 igb_ping_all_vfs(adapter);
3120 /* link state has changed, schedule phy info update */
3121 if (!test_bit(__IGB_DOWN, &adapter->state))
3122 mod_timer(&adapter->phy_info_timer,
3123 round_jiffies(jiffies + 2 * HZ));
3126 if (netif_carrier_ok(netdev)) {
3127 adapter->link_speed = 0;
3128 adapter->link_duplex = 0;
3129 /* Links status message must follow this format */
3130 printk(KERN_INFO "igb: %s NIC Link is Down\n",
3132 netif_carrier_off(netdev);
3134 igb_ping_all_vfs(adapter);
3136 /* link state has changed, schedule phy info update */
3137 if (!test_bit(__IGB_DOWN, &adapter->state))
3138 mod_timer(&adapter->phy_info_timer,
3139 round_jiffies(jiffies + 2 * HZ));
3143 igb_update_stats(adapter);
3145 for (i = 0; i < adapter->num_tx_queues; i++) {
3146 struct igb_ring *tx_ring = adapter->tx_ring[i];
3147 if (!netif_carrier_ok(netdev)) {
3148 /* We've lost link, so the controller stops DMA,
3149 * but we've got queued Tx work that's never going
3150 * to get done, so reset controller to flush Tx.
3151 * (Do the reset outside of interrupt context). */
3152 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3153 adapter->tx_timeout_count++;
3154 schedule_work(&adapter->reset_task);
3155 /* return immediately since reset is imminent */
3160 /* Force detection of hung controller every watchdog period */
3161 tx_ring->detect_tx_hung = true;
3164 /* Cause software interrupt to ensure rx ring is cleaned */
3165 if (adapter->msix_entries) {
3167 for (i = 0; i < adapter->num_q_vectors; i++) {
3168 struct igb_q_vector *q_vector = adapter->q_vector[i];
3169 eics |= q_vector->eims_value;
3171 wr32(E1000_EICS, eics);
3173 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3176 /* Reset the timer */
3177 if (!test_bit(__IGB_DOWN, &adapter->state))
3178 mod_timer(&adapter->watchdog_timer,
3179 round_jiffies(jiffies + 2 * HZ));
3182 enum latency_range {
3186 latency_invalid = 255
3190 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3192 * Stores a new ITR value based on strictly on packet size. This
3193 * algorithm is less sophisticated than that used in igb_update_itr,
3194 * due to the difficulty of synchronizing statistics across multiple
3195 * receive rings. The divisors and thresholds used by this fuction
3196 * were determined based on theoretical maximum wire speed and testing
3197 * data, in order to minimize response time while increasing bulk
3199 * This functionality is controlled by the InterruptThrottleRate module
3200 * parameter (see igb_param.c)
3201 * NOTE: This function is called only when operating in a multiqueue
3202 * receive environment.
3203 * @q_vector: pointer to q_vector
3205 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3207 int new_val = q_vector->itr_val;
3208 int avg_wire_size = 0;
3209 struct igb_adapter *adapter = q_vector->adapter;
3211 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3212 * ints/sec - ITR timer value of 120 ticks.
3214 if (adapter->link_speed != SPEED_1000) {
3219 if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3220 struct igb_ring *ring = q_vector->rx_ring;
3221 avg_wire_size = ring->total_bytes / ring->total_packets;
3224 if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3225 struct igb_ring *ring = q_vector->tx_ring;
3226 avg_wire_size = max_t(u32, avg_wire_size,
3227 (ring->total_bytes /
3228 ring->total_packets));
3231 /* if avg_wire_size isn't set no work was done */
3235 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3236 avg_wire_size += 24;
3238 /* Don't starve jumbo frames */
3239 avg_wire_size = min(avg_wire_size, 3000);
3241 /* Give a little boost to mid-size frames */
3242 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3243 new_val = avg_wire_size / 3;
3245 new_val = avg_wire_size / 2;
3247 /* when in itr mode 3 do not exceed 20K ints/sec */
3248 if (adapter->rx_itr_setting == 3 && new_val < 196)
3252 if (new_val != q_vector->itr_val) {
3253 q_vector->itr_val = new_val;
3254 q_vector->set_itr = 1;
3257 if (q_vector->rx_ring) {
3258 q_vector->rx_ring->total_bytes = 0;
3259 q_vector->rx_ring->total_packets = 0;
3261 if (q_vector->tx_ring) {
3262 q_vector->tx_ring->total_bytes = 0;
3263 q_vector->tx_ring->total_packets = 0;
3268 * igb_update_itr - update the dynamic ITR value based on statistics
3269 * Stores a new ITR value based on packets and byte
3270 * counts during the last interrupt. The advantage of per interrupt
3271 * computation is faster updates and more accurate ITR for the current
3272 * traffic pattern. Constants in this function were computed
3273 * based on theoretical maximum wire speed and thresholds were set based
3274 * on testing data as well as attempting to minimize response time
3275 * while increasing bulk throughput.
3276 * this functionality is controlled by the InterruptThrottleRate module
3277 * parameter (see igb_param.c)
3278 * NOTE: These calculations are only valid when operating in a single-
3279 * queue environment.
3280 * @adapter: pointer to adapter
3281 * @itr_setting: current q_vector->itr_val
3282 * @packets: the number of packets during this measurement interval
3283 * @bytes: the number of bytes during this measurement interval
3285 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3286 int packets, int bytes)
3288 unsigned int retval = itr_setting;
3291 goto update_itr_done;
3293 switch (itr_setting) {
3294 case lowest_latency:
3295 /* handle TSO and jumbo frames */
3296 if (bytes/packets > 8000)
3297 retval = bulk_latency;
3298 else if ((packets < 5) && (bytes > 512))
3299 retval = low_latency;
3301 case low_latency: /* 50 usec aka 20000 ints/s */
3302 if (bytes > 10000) {
3303 /* this if handles the TSO accounting */
3304 if (bytes/packets > 8000) {
3305 retval = bulk_latency;
3306 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3307 retval = bulk_latency;
3308 } else if ((packets > 35)) {
3309 retval = lowest_latency;
3311 } else if (bytes/packets > 2000) {
3312 retval = bulk_latency;
3313 } else if (packets <= 2 && bytes < 512) {
3314 retval = lowest_latency;
3317 case bulk_latency: /* 250 usec aka 4000 ints/s */
3318 if (bytes > 25000) {
3320 retval = low_latency;
3321 } else if (bytes < 1500) {
3322 retval = low_latency;
3331 static void igb_set_itr(struct igb_adapter *adapter)
3333 struct igb_q_vector *q_vector = adapter->q_vector[0];
3335 u32 new_itr = q_vector->itr_val;
3337 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3338 if (adapter->link_speed != SPEED_1000) {
3344 adapter->rx_itr = igb_update_itr(adapter,
3346 q_vector->rx_ring->total_packets,
3347 q_vector->rx_ring->total_bytes);
3349 adapter->tx_itr = igb_update_itr(adapter,
3351 q_vector->tx_ring->total_packets,
3352 q_vector->tx_ring->total_bytes);
3353 current_itr = max(adapter->rx_itr, adapter->tx_itr);
3355 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3356 if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3357 current_itr = low_latency;
3359 switch (current_itr) {
3360 /* counts and packets in update_itr are dependent on these numbers */
3361 case lowest_latency:
3362 new_itr = 56; /* aka 70,000 ints/sec */
3365 new_itr = 196; /* aka 20,000 ints/sec */
3368 new_itr = 980; /* aka 4,000 ints/sec */
3375 q_vector->rx_ring->total_bytes = 0;
3376 q_vector->rx_ring->total_packets = 0;
3377 q_vector->tx_ring->total_bytes = 0;
3378 q_vector->tx_ring->total_packets = 0;
3380 if (new_itr != q_vector->itr_val) {
3381 /* this attempts to bias the interrupt rate towards Bulk
3382 * by adding intermediate steps when interrupt rate is
3384 new_itr = new_itr > q_vector->itr_val ?
3385 max((new_itr * q_vector->itr_val) /
3386 (new_itr + (q_vector->itr_val >> 2)),
3389 /* Don't write the value here; it resets the adapter's
3390 * internal timer, and causes us to delay far longer than
3391 * we should between interrupts. Instead, we write the ITR
3392 * value at the beginning of the next interrupt so the timing
3393 * ends up being correct.
3395 q_vector->itr_val = new_itr;
3396 q_vector->set_itr = 1;
3402 #define IGB_TX_FLAGS_CSUM 0x00000001
3403 #define IGB_TX_FLAGS_VLAN 0x00000002
3404 #define IGB_TX_FLAGS_TSO 0x00000004
3405 #define IGB_TX_FLAGS_IPV4 0x00000008
3406 #define IGB_TX_FLAGS_TSTAMP 0x00000010
3407 #define IGB_TX_FLAGS_VLAN_MASK 0xffff0000
3408 #define IGB_TX_FLAGS_VLAN_SHIFT 16
3410 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3411 struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3413 struct e1000_adv_tx_context_desc *context_desc;
3416 struct igb_buffer *buffer_info;
3417 u32 info = 0, tu_cmd = 0;
3421 if (skb_header_cloned(skb)) {
3422 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3427 l4len = tcp_hdrlen(skb);
3430 if (skb->protocol == htons(ETH_P_IP)) {
3431 struct iphdr *iph = ip_hdr(skb);
3434 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3438 } else if (skb_is_gso_v6(skb)) {
3439 ipv6_hdr(skb)->payload_len = 0;
3440 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3441 &ipv6_hdr(skb)->daddr,
3445 i = tx_ring->next_to_use;
3447 buffer_info = &tx_ring->buffer_info[i];
3448 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3449 /* VLAN MACLEN IPLEN */
3450 if (tx_flags & IGB_TX_FLAGS_VLAN)
3451 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3452 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3453 *hdr_len += skb_network_offset(skb);
3454 info |= skb_network_header_len(skb);
3455 *hdr_len += skb_network_header_len(skb);
3456 context_desc->vlan_macip_lens = cpu_to_le32(info);
3458 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3459 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3461 if (skb->protocol == htons(ETH_P_IP))
3462 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3463 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3465 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3468 mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3469 mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3471 /* For 82575, context index must be unique per ring. */
3472 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3473 mss_l4len_idx |= tx_ring->reg_idx << 4;
3475 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3476 context_desc->seqnum_seed = 0;
3478 buffer_info->time_stamp = jiffies;
3479 buffer_info->next_to_watch = i;
3480 buffer_info->dma = 0;
3482 if (i == tx_ring->count)
3485 tx_ring->next_to_use = i;
3490 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3491 struct sk_buff *skb, u32 tx_flags)
3493 struct e1000_adv_tx_context_desc *context_desc;
3494 struct pci_dev *pdev = tx_ring->pdev;
3495 struct igb_buffer *buffer_info;
3496 u32 info = 0, tu_cmd = 0;
3499 if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3500 (tx_flags & IGB_TX_FLAGS_VLAN)) {
3501 i = tx_ring->next_to_use;
3502 buffer_info = &tx_ring->buffer_info[i];
3503 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3505 if (tx_flags & IGB_TX_FLAGS_VLAN)
3506 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3508 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3509 if (skb->ip_summed == CHECKSUM_PARTIAL)
3510 info |= skb_network_header_len(skb);
3512 context_desc->vlan_macip_lens = cpu_to_le32(info);
3514 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3516 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3519 if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3520 const struct vlan_ethhdr *vhdr =
3521 (const struct vlan_ethhdr*)skb->data;
3523 protocol = vhdr->h_vlan_encapsulated_proto;
3525 protocol = skb->protocol;
3529 case cpu_to_be16(ETH_P_IP):
3530 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3531 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3532 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3533 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3534 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3536 case cpu_to_be16(ETH_P_IPV6):
3537 /* XXX what about other V6 headers?? */
3538 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3539 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3540 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3541 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3544 if (unlikely(net_ratelimit()))
3545 dev_warn(&pdev->dev,
3546 "partial checksum but proto=%x!\n",
3552 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3553 context_desc->seqnum_seed = 0;
3554 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3555 context_desc->mss_l4len_idx =
3556 cpu_to_le32(tx_ring->reg_idx << 4);
3558 buffer_info->time_stamp = jiffies;
3559 buffer_info->next_to_watch = i;
3560 buffer_info->dma = 0;
3563 if (i == tx_ring->count)
3565 tx_ring->next_to_use = i;
3572 #define IGB_MAX_TXD_PWR 16
3573 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
3575 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3578 struct igb_buffer *buffer_info;
3579 struct pci_dev *pdev = tx_ring->pdev;
3580 unsigned int len = skb_headlen(skb);
3581 unsigned int count = 0, i;
3584 i = tx_ring->next_to_use;
3586 buffer_info = &tx_ring->buffer_info[i];
3587 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3588 buffer_info->length = len;
3589 /* set time_stamp *before* dma to help avoid a possible race */
3590 buffer_info->time_stamp = jiffies;
3591 buffer_info->next_to_watch = i;
3592 buffer_info->dma = pci_map_single(pdev, skb->data, len,
3594 if (pci_dma_mapping_error(pdev, buffer_info->dma))
3597 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3598 struct skb_frag_struct *frag;
3602 if (i == tx_ring->count)
3605 frag = &skb_shinfo(skb)->frags[f];
3608 buffer_info = &tx_ring->buffer_info[i];
3609 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3610 buffer_info->length = len;
3611 buffer_info->time_stamp = jiffies;
3612 buffer_info->next_to_watch = i;
3613 buffer_info->mapped_as_page = true;
3614 buffer_info->dma = pci_map_page(pdev,
3619 if (pci_dma_mapping_error(pdev, buffer_info->dma))
3624 tx_ring->buffer_info[i].skb = skb;
3625 tx_ring->buffer_info[i].gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
3626 tx_ring->buffer_info[first].next_to_watch = i;
3631 dev_err(&pdev->dev, "TX DMA map failed\n");
3633 /* clear timestamp and dma mappings for failed buffer_info mapping */
3634 buffer_info->dma = 0;
3635 buffer_info->time_stamp = 0;
3636 buffer_info->length = 0;
3637 buffer_info->next_to_watch = 0;
3638 buffer_info->mapped_as_page = false;
3640 /* clear timestamp and dma mappings for remaining portion of packet */
3645 buffer_info = &tx_ring->buffer_info[i];
3646 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3652 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3653 u32 tx_flags, int count, u32 paylen,
3656 union e1000_adv_tx_desc *tx_desc;
3657 struct igb_buffer *buffer_info;
3658 u32 olinfo_status = 0, cmd_type_len;
3659 unsigned int i = tx_ring->next_to_use;
3661 cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
3662 E1000_ADVTXD_DCMD_DEXT);
3664 if (tx_flags & IGB_TX_FLAGS_VLAN)
3665 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
3667 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
3668 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
3670 if (tx_flags & IGB_TX_FLAGS_TSO) {
3671 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3673 /* insert tcp checksum */
3674 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3676 /* insert ip checksum */
3677 if (tx_flags & IGB_TX_FLAGS_IPV4)
3678 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3680 } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
3681 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3684 if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
3685 (tx_flags & (IGB_TX_FLAGS_CSUM |
3687 IGB_TX_FLAGS_VLAN)))
3688 olinfo_status |= tx_ring->reg_idx << 4;
3690 olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
3693 buffer_info = &tx_ring->buffer_info[i];
3694 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
3695 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
3696 tx_desc->read.cmd_type_len =
3697 cpu_to_le32(cmd_type_len | buffer_info->length);
3698 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
3701 if (i == tx_ring->count)
3703 } while (count > 0);
3705 tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
3706 /* Force memory writes to complete before letting h/w
3707 * know there are new descriptors to fetch. (Only
3708 * applicable for weak-ordered memory model archs,
3709 * such as IA-64). */
3712 tx_ring->next_to_use = i;
3713 writel(i, tx_ring->tail);
3714 /* we need this if more than one processor can write to our tail
3715 * at a time, it syncronizes IO on IA64/Altix systems */
3719 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3721 struct net_device *netdev = tx_ring->netdev;
3723 netif_stop_subqueue(netdev, tx_ring->queue_index);
3725 /* Herbert's original patch had:
3726 * smp_mb__after_netif_stop_queue();
3727 * but since that doesn't exist yet, just open code it. */
3730 /* We need to check again in a case another CPU has just
3731 * made room available. */
3732 if (igb_desc_unused(tx_ring) < size)
3736 netif_wake_subqueue(netdev, tx_ring->queue_index);
3737 tx_ring->tx_stats.restart_queue++;
3741 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3743 if (igb_desc_unused(tx_ring) >= size)
3745 return __igb_maybe_stop_tx(tx_ring, size);
3748 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
3749 struct igb_ring *tx_ring)
3751 struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
3756 union skb_shared_tx *shtx = skb_tx(skb);
3758 /* need: 1 descriptor per page,
3759 * + 2 desc gap to keep tail from touching head,
3760 * + 1 desc for skb->data,
3761 * + 1 desc for context descriptor,
3762 * otherwise try next time */
3763 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
3764 /* this is a hard error */
3765 return NETDEV_TX_BUSY;
3768 if (unlikely(shtx->hardware)) {
3769 shtx->in_progress = 1;
3770 tx_flags |= IGB_TX_FLAGS_TSTAMP;
3773 if (vlan_tx_tag_present(skb) && adapter->vlgrp) {
3774 tx_flags |= IGB_TX_FLAGS_VLAN;
3775 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
3778 if (skb->protocol == htons(ETH_P_IP))
3779 tx_flags |= IGB_TX_FLAGS_IPV4;
3781 first = tx_ring->next_to_use;
3782 if (skb_is_gso(skb)) {
3783 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
3786 dev_kfree_skb_any(skb);
3787 return NETDEV_TX_OK;
3792 tx_flags |= IGB_TX_FLAGS_TSO;
3793 else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
3794 (skb->ip_summed == CHECKSUM_PARTIAL))
3795 tx_flags |= IGB_TX_FLAGS_CSUM;
3798 * count reflects descriptors mapped, if 0 or less then mapping error
3799 * has occured and we need to rewind the descriptor queue
3801 count = igb_tx_map_adv(tx_ring, skb, first);
3803 dev_kfree_skb_any(skb);
3804 tx_ring->buffer_info[first].time_stamp = 0;
3805 tx_ring->next_to_use = first;
3806 return NETDEV_TX_OK;
3809 igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
3811 /* Make sure there is space in the ring for the next send. */
3812 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
3814 return NETDEV_TX_OK;
3817 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
3818 struct net_device *netdev)
3820 struct igb_adapter *adapter = netdev_priv(netdev);
3821 struct igb_ring *tx_ring;
3824 if (test_bit(__IGB_DOWN, &adapter->state)) {
3825 dev_kfree_skb_any(skb);
3826 return NETDEV_TX_OK;
3829 if (skb->len <= 0) {
3830 dev_kfree_skb_any(skb);
3831 return NETDEV_TX_OK;
3834 r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
3835 tx_ring = adapter->multi_tx_table[r_idx];
3837 /* This goes back to the question of how to logically map a tx queue
3838 * to a flow. Right now, performance is impacted slightly negatively
3839 * if using multiple tx queues. If the stack breaks away from a
3840 * single qdisc implementation, we can look at this again. */
3841 return igb_xmit_frame_ring_adv(skb, tx_ring);
3845 * igb_tx_timeout - Respond to a Tx Hang
3846 * @netdev: network interface device structure
3848 static void igb_tx_timeout(struct net_device *netdev)
3850 struct igb_adapter *adapter = netdev_priv(netdev);
3851 struct e1000_hw *hw = &adapter->hw;
3853 /* Do the reset outside of interrupt context */
3854 adapter->tx_timeout_count++;
3856 if (hw->mac.type == e1000_82580)
3857 hw->dev_spec._82575.global_device_reset = true;
3859 schedule_work(&adapter->reset_task);
3861 (adapter->eims_enable_mask & ~adapter->eims_other));
3864 static void igb_reset_task(struct work_struct *work)
3866 struct igb_adapter *adapter;
3867 adapter = container_of(work, struct igb_adapter, reset_task);
3869 igb_reinit_locked(adapter);
3873 * igb_get_stats - Get System Network Statistics
3874 * @netdev: network interface device structure
3876 * Returns the address of the device statistics structure.
3877 * The statistics are actually updated from the timer callback.
3879 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
3881 /* only return the current stats */
3882 return &netdev->stats;
3886 * igb_change_mtu - Change the Maximum Transfer Unit
3887 * @netdev: network interface device structure
3888 * @new_mtu: new value for maximum frame size
3890 * Returns 0 on success, negative on failure
3892 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
3894 struct igb_adapter *adapter = netdev_priv(netdev);
3895 struct pci_dev *pdev = adapter->pdev;
3896 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
3897 u32 rx_buffer_len, i;
3899 if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
3900 dev_err(&pdev->dev, "Invalid MTU setting\n");
3904 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
3905 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
3909 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
3912 /* igb_down has a dependency on max_frame_size */
3913 adapter->max_frame_size = max_frame;
3915 /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
3916 * means we reserve 2 more, this pushes us to allocate from the next
3918 * i.e. RXBUFFER_2048 --> size-4096 slab
3921 if (max_frame <= IGB_RXBUFFER_1024)
3922 rx_buffer_len = IGB_RXBUFFER_1024;
3923 else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
3924 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
3926 rx_buffer_len = IGB_RXBUFFER_128;
3928 if (netif_running(netdev))
3931 dev_info(&pdev->dev, "changing MTU from %d to %d\n",
3932 netdev->mtu, new_mtu);
3933 netdev->mtu = new_mtu;
3935 for (i = 0; i < adapter->num_rx_queues; i++)
3936 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
3938 if (netif_running(netdev))
3943 clear_bit(__IGB_RESETTING, &adapter->state);
3949 * igb_update_stats - Update the board statistics counters
3950 * @adapter: board private structure
3953 void igb_update_stats(struct igb_adapter *adapter)
3955 struct net_device_stats *net_stats = igb_get_stats(adapter->netdev);
3956 struct e1000_hw *hw = &adapter->hw;
3957 struct pci_dev *pdev = adapter->pdev;
3963 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
3966 * Prevent stats update while adapter is being reset, or if the pci
3967 * connection is down.
3969 if (adapter->link_speed == 0)
3971 if (pci_channel_offline(pdev))
3976 for (i = 0; i < adapter->num_rx_queues; i++) {
3977 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
3978 struct igb_ring *ring = adapter->rx_ring[i];
3979 ring->rx_stats.drops += rqdpc_tmp;
3980 net_stats->rx_fifo_errors += rqdpc_tmp;
3981 bytes += ring->rx_stats.bytes;
3982 packets += ring->rx_stats.packets;
3985 net_stats->rx_bytes = bytes;
3986 net_stats->rx_packets = packets;
3990 for (i = 0; i < adapter->num_tx_queues; i++) {
3991 struct igb_ring *ring = adapter->tx_ring[i];
3992 bytes += ring->tx_stats.bytes;
3993 packets += ring->tx_stats.packets;
3995 net_stats->tx_bytes = bytes;
3996 net_stats->tx_packets = packets;
3998 /* read stats registers */
3999 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4000 adapter->stats.gprc += rd32(E1000_GPRC);
4001 adapter->stats.gorc += rd32(E1000_GORCL);
4002 rd32(E1000_GORCH); /* clear GORCL */
4003 adapter->stats.bprc += rd32(E1000_BPRC);
4004 adapter->stats.mprc += rd32(E1000_MPRC);
4005 adapter->stats.roc += rd32(E1000_ROC);
4007 adapter->stats.prc64 += rd32(E1000_PRC64);
4008 adapter->stats.prc127 += rd32(E1000_PRC127);
4009 adapter->stats.prc255 += rd32(E1000_PRC255);
4010 adapter->stats.prc511 += rd32(E1000_PRC511);
4011 adapter->stats.prc1023 += rd32(E1000_PRC1023);
4012 adapter->stats.prc1522 += rd32(E1000_PRC1522);
4013 adapter->stats.symerrs += rd32(E1000_SYMERRS);
4014 adapter->stats.sec += rd32(E1000_SEC);
4016 mpc = rd32(E1000_MPC);
4017 adapter->stats.mpc += mpc;
4018 net_stats->rx_fifo_errors += mpc;
4019 adapter->stats.scc += rd32(E1000_SCC);
4020 adapter->stats.ecol += rd32(E1000_ECOL);
4021 adapter->stats.mcc += rd32(E1000_MCC);
4022 adapter->stats.latecol += rd32(E1000_LATECOL);
4023 adapter->stats.dc += rd32(E1000_DC);
4024 adapter->stats.rlec += rd32(E1000_RLEC);
4025 adapter->stats.xonrxc += rd32(E1000_XONRXC);
4026 adapter->stats.xontxc += rd32(E1000_XONTXC);
4027 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4028 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4029 adapter->stats.fcruc += rd32(E1000_FCRUC);
4030 adapter->stats.gptc += rd32(E1000_GPTC);
4031 adapter->stats.gotc += rd32(E1000_GOTCL);
4032 rd32(E1000_GOTCH); /* clear GOTCL */
4033 adapter->stats.rnbc += rd32(E1000_RNBC);
4034 adapter->stats.ruc += rd32(E1000_RUC);
4035 adapter->stats.rfc += rd32(E1000_RFC);
4036 adapter->stats.rjc += rd32(E1000_RJC);
4037 adapter->stats.tor += rd32(E1000_TORH);
4038 adapter->stats.tot += rd32(E1000_TOTH);
4039 adapter->stats.tpr += rd32(E1000_TPR);
4041 adapter->stats.ptc64 += rd32(E1000_PTC64);
4042 adapter->stats.ptc127 += rd32(E1000_PTC127);
4043 adapter->stats.ptc255 += rd32(E1000_PTC255);
4044 adapter->stats.ptc511 += rd32(E1000_PTC511);
4045 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4046 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4048 adapter->stats.mptc += rd32(E1000_MPTC);
4049 adapter->stats.bptc += rd32(E1000_BPTC);
4051 adapter->stats.tpt += rd32(E1000_TPT);
4052 adapter->stats.colc += rd32(E1000_COLC);
4054 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4055 /* read internal phy specific stats */
4056 reg = rd32(E1000_CTRL_EXT);
4057 if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4058 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4059 adapter->stats.tncrs += rd32(E1000_TNCRS);
4062 adapter->stats.tsctc += rd32(E1000_TSCTC);
4063 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4065 adapter->stats.iac += rd32(E1000_IAC);
4066 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4067 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4068 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4069 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4070 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4071 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4072 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4073 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4075 /* Fill out the OS statistics structure */
4076 net_stats->multicast = adapter->stats.mprc;
4077 net_stats->collisions = adapter->stats.colc;
4081 /* RLEC on some newer hardware can be incorrect so build
4082 * our own version based on RUC and ROC */
4083 net_stats->rx_errors = adapter->stats.rxerrc +
4084 adapter->stats.crcerrs + adapter->stats.algnerrc +
4085 adapter->stats.ruc + adapter->stats.roc +
4086 adapter->stats.cexterr;
4087 net_stats->rx_length_errors = adapter->stats.ruc +
4089 net_stats->rx_crc_errors = adapter->stats.crcerrs;
4090 net_stats->rx_frame_errors = adapter->stats.algnerrc;
4091 net_stats->rx_missed_errors = adapter->stats.mpc;
4094 net_stats->tx_errors = adapter->stats.ecol +
4095 adapter->stats.latecol;
4096 net_stats->tx_aborted_errors = adapter->stats.ecol;
4097 net_stats->tx_window_errors = adapter->stats.latecol;
4098 net_stats->tx_carrier_errors = adapter->stats.tncrs;
4100 /* Tx Dropped needs to be maintained elsewhere */
4103 if (hw->phy.media_type == e1000_media_type_copper) {
4104 if ((adapter->link_speed == SPEED_1000) &&
4105 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4106 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4107 adapter->phy_stats.idle_errors += phy_tmp;
4111 /* Management Stats */
4112 adapter->stats.mgptc += rd32(E1000_MGTPTC);
4113 adapter->stats.mgprc += rd32(E1000_MGTPRC);
4114 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4117 static irqreturn_t igb_msix_other(int irq, void *data)
4119 struct igb_adapter *adapter = data;
4120 struct e1000_hw *hw = &adapter->hw;
4121 u32 icr = rd32(E1000_ICR);
4122 /* reading ICR causes bit 31 of EICR to be cleared */
4124 if (icr & E1000_ICR_DRSTA)
4125 schedule_work(&adapter->reset_task);
4127 if (icr & E1000_ICR_DOUTSYNC) {
4128 /* HW is reporting DMA is out of sync */
4129 adapter->stats.doosync++;
4132 /* Check for a mailbox event */
4133 if (icr & E1000_ICR_VMMB)
4134 igb_msg_task(adapter);
4136 if (icr & E1000_ICR_LSC) {
4137 hw->mac.get_link_status = 1;
4138 /* guard against interrupt when we're going down */
4139 if (!test_bit(__IGB_DOWN, &adapter->state))
4140 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4143 if (adapter->vfs_allocated_count)
4144 wr32(E1000_IMS, E1000_IMS_LSC |
4146 E1000_IMS_DOUTSYNC);
4148 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4149 wr32(E1000_EIMS, adapter->eims_other);
4154 static void igb_write_itr(struct igb_q_vector *q_vector)
4156 struct igb_adapter *adapter = q_vector->adapter;
4157 u32 itr_val = q_vector->itr_val & 0x7FFC;
4159 if (!q_vector->set_itr)
4165 if (adapter->hw.mac.type == e1000_82575)
4166 itr_val |= itr_val << 16;
4168 itr_val |= 0x8000000;
4170 writel(itr_val, q_vector->itr_register);
4171 q_vector->set_itr = 0;
4174 static irqreturn_t igb_msix_ring(int irq, void *data)
4176 struct igb_q_vector *q_vector = data;
4178 /* Write the ITR value calculated from the previous interrupt. */
4179 igb_write_itr(q_vector);
4181 napi_schedule(&q_vector->napi);
4186 #ifdef CONFIG_IGB_DCA
4187 static void igb_update_dca(struct igb_q_vector *q_vector)
4189 struct igb_adapter *adapter = q_vector->adapter;
4190 struct e1000_hw *hw = &adapter->hw;
4191 int cpu = get_cpu();
4193 if (q_vector->cpu == cpu)
4196 if (q_vector->tx_ring) {
4197 int q = q_vector->tx_ring->reg_idx;
4198 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4199 if (hw->mac.type == e1000_82575) {
4200 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4201 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4203 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4204 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4205 E1000_DCA_TXCTRL_CPUID_SHIFT;
4207 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4208 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4210 if (q_vector->rx_ring) {
4211 int q = q_vector->rx_ring->reg_idx;
4212 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4213 if (hw->mac.type == e1000_82575) {
4214 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4215 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4217 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4218 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4219 E1000_DCA_RXCTRL_CPUID_SHIFT;
4221 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4222 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4223 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4224 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4226 q_vector->cpu = cpu;
4231 static void igb_setup_dca(struct igb_adapter *adapter)
4233 struct e1000_hw *hw = &adapter->hw;
4236 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4239 /* Always use CB2 mode, difference is masked in the CB driver. */
4240 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4242 for (i = 0; i < adapter->num_q_vectors; i++) {
4243 adapter->q_vector[i]->cpu = -1;
4244 igb_update_dca(adapter->q_vector[i]);
4248 static int __igb_notify_dca(struct device *dev, void *data)
4250 struct net_device *netdev = dev_get_drvdata(dev);
4251 struct igb_adapter *adapter = netdev_priv(netdev);
4252 struct pci_dev *pdev = adapter->pdev;
4253 struct e1000_hw *hw = &adapter->hw;
4254 unsigned long event = *(unsigned long *)data;
4257 case DCA_PROVIDER_ADD:
4258 /* if already enabled, don't do it again */
4259 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4261 if (dca_add_requester(dev) == 0) {
4262 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4263 dev_info(&pdev->dev, "DCA enabled\n");
4264 igb_setup_dca(adapter);
4267 /* Fall Through since DCA is disabled. */
4268 case DCA_PROVIDER_REMOVE:
4269 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4270 /* without this a class_device is left
4271 * hanging around in the sysfs model */
4272 dca_remove_requester(dev);
4273 dev_info(&pdev->dev, "DCA disabled\n");
4274 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4275 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4283 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4288 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4291 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4293 #endif /* CONFIG_IGB_DCA */
4295 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4297 struct e1000_hw *hw = &adapter->hw;
4301 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4302 ping = E1000_PF_CONTROL_MSG;
4303 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4304 ping |= E1000_VT_MSGTYPE_CTS;
4305 igb_write_mbx(hw, &ping, 1, i);
4309 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4311 struct e1000_hw *hw = &adapter->hw;
4312 u32 vmolr = rd32(E1000_VMOLR(vf));
4313 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4315 vf_data->flags |= ~(IGB_VF_FLAG_UNI_PROMISC |
4316 IGB_VF_FLAG_MULTI_PROMISC);
4317 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4319 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4320 vmolr |= E1000_VMOLR_MPME;
4321 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4324 * if we have hashes and we are clearing a multicast promisc
4325 * flag we need to write the hashes to the MTA as this step
4326 * was previously skipped
4328 if (vf_data->num_vf_mc_hashes > 30) {
4329 vmolr |= E1000_VMOLR_MPME;
4330 } else if (vf_data->num_vf_mc_hashes) {
4332 vmolr |= E1000_VMOLR_ROMPE;
4333 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4334 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4338 wr32(E1000_VMOLR(vf), vmolr);
4340 /* there are flags left unprocessed, likely not supported */
4341 if (*msgbuf & E1000_VT_MSGINFO_MASK)
4348 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4349 u32 *msgbuf, u32 vf)
4351 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4352 u16 *hash_list = (u16 *)&msgbuf[1];
4353 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4356 /* salt away the number of multicast addresses assigned
4357 * to this VF for later use to restore when the PF multi cast
4360 vf_data->num_vf_mc_hashes = n;
4362 /* only up to 30 hash values supported */
4366 /* store the hashes for later use */
4367 for (i = 0; i < n; i++)
4368 vf_data->vf_mc_hashes[i] = hash_list[i];
4370 /* Flush and reset the mta with the new values */
4371 igb_set_rx_mode(adapter->netdev);
4376 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4378 struct e1000_hw *hw = &adapter->hw;
4379 struct vf_data_storage *vf_data;
4382 for (i = 0; i < adapter->vfs_allocated_count; i++) {
4383 u32 vmolr = rd32(E1000_VMOLR(i));
4384 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4386 vf_data = &adapter->vf_data[i];
4388 if ((vf_data->num_vf_mc_hashes > 30) ||
4389 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4390 vmolr |= E1000_VMOLR_MPME;
4391 } else if (vf_data->num_vf_mc_hashes) {
4392 vmolr |= E1000_VMOLR_ROMPE;
4393 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4394 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4396 wr32(E1000_VMOLR(i), vmolr);
4400 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4402 struct e1000_hw *hw = &adapter->hw;
4403 u32 pool_mask, reg, vid;
4406 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4408 /* Find the vlan filter for this id */
4409 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4410 reg = rd32(E1000_VLVF(i));
4412 /* remove the vf from the pool */
4415 /* if pool is empty then remove entry from vfta */
4416 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4417 (reg & E1000_VLVF_VLANID_ENABLE)) {
4419 vid = reg & E1000_VLVF_VLANID_MASK;
4420 igb_vfta_set(hw, vid, false);
4423 wr32(E1000_VLVF(i), reg);
4426 adapter->vf_data[vf].vlans_enabled = 0;
4429 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4431 struct e1000_hw *hw = &adapter->hw;
4434 /* The vlvf table only exists on 82576 hardware and newer */
4435 if (hw->mac.type < e1000_82576)
4438 /* we only need to do this if VMDq is enabled */
4439 if (!adapter->vfs_allocated_count)
4442 /* Find the vlan filter for this id */
4443 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4444 reg = rd32(E1000_VLVF(i));
4445 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4446 vid == (reg & E1000_VLVF_VLANID_MASK))
4451 if (i == E1000_VLVF_ARRAY_SIZE) {
4452 /* Did not find a matching VLAN ID entry that was
4453 * enabled. Search for a free filter entry, i.e.
4454 * one without the enable bit set
4456 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4457 reg = rd32(E1000_VLVF(i));
4458 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4462 if (i < E1000_VLVF_ARRAY_SIZE) {
4463 /* Found an enabled/available entry */
4464 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4466 /* if !enabled we need to set this up in vfta */
4467 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4468 /* add VID to filter table */
4469 igb_vfta_set(hw, vid, true);
4470 reg |= E1000_VLVF_VLANID_ENABLE;
4472 reg &= ~E1000_VLVF_VLANID_MASK;
4474 wr32(E1000_VLVF(i), reg);
4476 /* do not modify RLPML for PF devices */
4477 if (vf >= adapter->vfs_allocated_count)
4480 if (!adapter->vf_data[vf].vlans_enabled) {
4482 reg = rd32(E1000_VMOLR(vf));
4483 size = reg & E1000_VMOLR_RLPML_MASK;
4485 reg &= ~E1000_VMOLR_RLPML_MASK;
4487 wr32(E1000_VMOLR(vf), reg);
4490 adapter->vf_data[vf].vlans_enabled++;
4494 if (i < E1000_VLVF_ARRAY_SIZE) {
4495 /* remove vf from the pool */
4496 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4497 /* if pool is empty then remove entry from vfta */
4498 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4500 igb_vfta_set(hw, vid, false);
4502 wr32(E1000_VLVF(i), reg);
4504 /* do not modify RLPML for PF devices */
4505 if (vf >= adapter->vfs_allocated_count)
4508 adapter->vf_data[vf].vlans_enabled--;
4509 if (!adapter->vf_data[vf].vlans_enabled) {
4511 reg = rd32(E1000_VMOLR(vf));
4512 size = reg & E1000_VMOLR_RLPML_MASK;
4514 reg &= ~E1000_VMOLR_RLPML_MASK;
4516 wr32(E1000_VMOLR(vf), reg);
4523 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
4525 struct e1000_hw *hw = &adapter->hw;
4528 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
4530 wr32(E1000_VMVIR(vf), 0);
4533 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
4534 int vf, u16 vlan, u8 qos)
4537 struct igb_adapter *adapter = netdev_priv(netdev);
4539 if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
4542 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
4545 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
4546 igb_set_vmolr(adapter, vf, !vlan);
4547 adapter->vf_data[vf].pf_vlan = vlan;
4548 adapter->vf_data[vf].pf_qos = qos;
4549 dev_info(&adapter->pdev->dev,
4550 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
4551 if (test_bit(__IGB_DOWN, &adapter->state)) {
4552 dev_warn(&adapter->pdev->dev,
4553 "The VF VLAN has been set,"
4554 " but the PF device is not up.\n");
4555 dev_warn(&adapter->pdev->dev,
4556 "Bring the PF device up before"
4557 " attempting to use the VF device.\n");
4560 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
4562 igb_set_vmvir(adapter, vlan, vf);
4563 igb_set_vmolr(adapter, vf, true);
4564 adapter->vf_data[vf].pf_vlan = 0;
4565 adapter->vf_data[vf].pf_qos = 0;
4571 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4573 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4574 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4576 return igb_vlvf_set(adapter, vid, add, vf);
4579 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
4582 adapter->vf_data[vf].flags &= ~(IGB_VF_FLAG_PF_SET_MAC);
4583 adapter->vf_data[vf].last_nack = jiffies;
4585 /* reset offloads to defaults */
4586 igb_set_vmolr(adapter, vf, true);
4588 /* reset vlans for device */
4589 igb_clear_vf_vfta(adapter, vf);
4590 if (adapter->vf_data[vf].pf_vlan)
4591 igb_ndo_set_vf_vlan(adapter->netdev, vf,
4592 adapter->vf_data[vf].pf_vlan,
4593 adapter->vf_data[vf].pf_qos);
4595 igb_clear_vf_vfta(adapter, vf);
4597 /* reset multicast table array for vf */
4598 adapter->vf_data[vf].num_vf_mc_hashes = 0;
4600 /* Flush and reset the mta with the new values */
4601 igb_set_rx_mode(adapter->netdev);
4604 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4606 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4608 /* generate a new mac address as we were hotplug removed/added */
4609 if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
4610 random_ether_addr(vf_mac);
4612 /* process remaining reset events */
4613 igb_vf_reset(adapter, vf);
4616 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4618 struct e1000_hw *hw = &adapter->hw;
4619 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4620 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4622 u8 *addr = (u8 *)(&msgbuf[1]);
4624 /* process all the same items cleared in a function level reset */
4625 igb_vf_reset(adapter, vf);
4627 /* set vf mac address */
4628 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4630 /* enable transmit and receive for vf */
4631 reg = rd32(E1000_VFTE);
4632 wr32(E1000_VFTE, reg | (1 << vf));
4633 reg = rd32(E1000_VFRE);
4634 wr32(E1000_VFRE, reg | (1 << vf));
4636 adapter->vf_data[vf].flags = IGB_VF_FLAG_CTS;
4638 /* reply to reset with ack and vf mac address */
4639 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4640 memcpy(addr, vf_mac, 6);
4641 igb_write_mbx(hw, msgbuf, 3, vf);
4644 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
4646 unsigned char *addr = (char *)&msg[1];
4649 if (is_valid_ether_addr(addr))
4650 err = igb_set_vf_mac(adapter, vf, addr);
4655 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
4657 struct e1000_hw *hw = &adapter->hw;
4658 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4659 u32 msg = E1000_VT_MSGTYPE_NACK;
4661 /* if device isn't clear to send it shouldn't be reading either */
4662 if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
4663 time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
4664 igb_write_mbx(hw, &msg, 1, vf);
4665 vf_data->last_nack = jiffies;
4669 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
4671 struct pci_dev *pdev = adapter->pdev;
4672 u32 msgbuf[E1000_VFMAILBOX_SIZE];
4673 struct e1000_hw *hw = &adapter->hw;
4674 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4677 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
4680 /* if receive failed revoke VF CTS stats and restart init */
4681 dev_err(&pdev->dev, "Error receiving message from VF\n");
4682 vf_data->flags &= ~IGB_VF_FLAG_CTS;
4683 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
4688 /* this is a message we already processed, do nothing */
4689 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
4693 * until the vf completes a reset it should not be
4694 * allowed to start any configuration.
4697 if (msgbuf[0] == E1000_VF_RESET) {
4698 igb_vf_reset_msg(adapter, vf);
4702 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
4703 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
4709 switch ((msgbuf[0] & 0xFFFF)) {
4710 case E1000_VF_SET_MAC_ADDR:
4711 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
4713 case E1000_VF_SET_PROMISC:
4714 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
4716 case E1000_VF_SET_MULTICAST:
4717 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
4719 case E1000_VF_SET_LPE:
4720 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
4722 case E1000_VF_SET_VLAN:
4723 if (adapter->vf_data[vf].pf_vlan)
4726 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
4729 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
4734 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
4736 /* notify the VF of the results of what it sent us */
4738 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4740 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
4742 igb_write_mbx(hw, msgbuf, 1, vf);
4745 static void igb_msg_task(struct igb_adapter *adapter)
4747 struct e1000_hw *hw = &adapter->hw;
4750 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
4751 /* process any reset requests */
4752 if (!igb_check_for_rst(hw, vf))
4753 igb_vf_reset_event(adapter, vf);
4755 /* process any messages pending */
4756 if (!igb_check_for_msg(hw, vf))
4757 igb_rcv_msg_from_vf(adapter, vf);
4759 /* process any acks */
4760 if (!igb_check_for_ack(hw, vf))
4761 igb_rcv_ack_from_vf(adapter, vf);
4766 * igb_set_uta - Set unicast filter table address
4767 * @adapter: board private structure
4769 * The unicast table address is a register array of 32-bit registers.
4770 * The table is meant to be used in a way similar to how the MTA is used
4771 * however due to certain limitations in the hardware it is necessary to
4772 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
4773 * enable bit to allow vlan tag stripping when promiscous mode is enabled
4775 static void igb_set_uta(struct igb_adapter *adapter)
4777 struct e1000_hw *hw = &adapter->hw;
4780 /* The UTA table only exists on 82576 hardware and newer */
4781 if (hw->mac.type < e1000_82576)
4784 /* we only need to do this if VMDq is enabled */
4785 if (!adapter->vfs_allocated_count)
4788 for (i = 0; i < hw->mac.uta_reg_count; i++)
4789 array_wr32(E1000_UTA, i, ~0);
4793 * igb_intr_msi - Interrupt Handler
4794 * @irq: interrupt number
4795 * @data: pointer to a network interface device structure
4797 static irqreturn_t igb_intr_msi(int irq, void *data)
4799 struct igb_adapter *adapter = data;
4800 struct igb_q_vector *q_vector = adapter->q_vector[0];
4801 struct e1000_hw *hw = &adapter->hw;
4802 /* read ICR disables interrupts using IAM */
4803 u32 icr = rd32(E1000_ICR);
4805 igb_write_itr(q_vector);
4807 if (icr & E1000_ICR_DRSTA)
4808 schedule_work(&adapter->reset_task);
4810 if (icr & E1000_ICR_DOUTSYNC) {
4811 /* HW is reporting DMA is out of sync */
4812 adapter->stats.doosync++;
4815 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4816 hw->mac.get_link_status = 1;
4817 if (!test_bit(__IGB_DOWN, &adapter->state))
4818 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4821 napi_schedule(&q_vector->napi);
4827 * igb_intr - Legacy Interrupt Handler
4828 * @irq: interrupt number
4829 * @data: pointer to a network interface device structure
4831 static irqreturn_t igb_intr(int irq, void *data)
4833 struct igb_adapter *adapter = data;
4834 struct igb_q_vector *q_vector = adapter->q_vector[0];
4835 struct e1000_hw *hw = &adapter->hw;
4836 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
4837 * need for the IMC write */
4838 u32 icr = rd32(E1000_ICR);
4840 return IRQ_NONE; /* Not our interrupt */
4842 igb_write_itr(q_vector);
4844 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
4845 * not set, then the adapter didn't send an interrupt */
4846 if (!(icr & E1000_ICR_INT_ASSERTED))
4849 if (icr & E1000_ICR_DRSTA)
4850 schedule_work(&adapter->reset_task);
4852 if (icr & E1000_ICR_DOUTSYNC) {
4853 /* HW is reporting DMA is out of sync */
4854 adapter->stats.doosync++;
4857 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4858 hw->mac.get_link_status = 1;
4859 /* guard against interrupt when we're going down */
4860 if (!test_bit(__IGB_DOWN, &adapter->state))
4861 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4864 napi_schedule(&q_vector->napi);
4869 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
4871 struct igb_adapter *adapter = q_vector->adapter;
4872 struct e1000_hw *hw = &adapter->hw;
4874 if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
4875 (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
4876 if (!adapter->msix_entries)
4877 igb_set_itr(adapter);
4879 igb_update_ring_itr(q_vector);
4882 if (!test_bit(__IGB_DOWN, &adapter->state)) {
4883 if (adapter->msix_entries)
4884 wr32(E1000_EIMS, q_vector->eims_value);
4886 igb_irq_enable(adapter);
4891 * igb_poll - NAPI Rx polling callback
4892 * @napi: napi polling structure
4893 * @budget: count of how many packets we should handle
4895 static int igb_poll(struct napi_struct *napi, int budget)
4897 struct igb_q_vector *q_vector = container_of(napi,
4898 struct igb_q_vector,
4900 int tx_clean_complete = 1, work_done = 0;
4902 #ifdef CONFIG_IGB_DCA
4903 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
4904 igb_update_dca(q_vector);
4906 if (q_vector->tx_ring)
4907 tx_clean_complete = igb_clean_tx_irq(q_vector);
4909 if (q_vector->rx_ring)
4910 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
4912 if (!tx_clean_complete)
4915 /* If not enough Rx work done, exit the polling mode */
4916 if (work_done < budget) {
4917 napi_complete(napi);
4918 igb_ring_irq_enable(q_vector);
4925 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
4926 * @adapter: board private structure
4927 * @shhwtstamps: timestamp structure to update
4928 * @regval: unsigned 64bit system time value.
4930 * We need to convert the system time value stored in the RX/TXSTMP registers
4931 * into a hwtstamp which can be used by the upper level timestamping functions
4933 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
4934 struct skb_shared_hwtstamps *shhwtstamps,
4940 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
4941 * 24 to match clock shift we setup earlier.
4943 if (adapter->hw.mac.type == e1000_82580)
4944 regval <<= IGB_82580_TSYNC_SHIFT;
4946 ns = timecounter_cyc2time(&adapter->clock, regval);
4947 timecompare_update(&adapter->compare, ns);
4948 memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
4949 shhwtstamps->hwtstamp = ns_to_ktime(ns);
4950 shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
4954 * igb_tx_hwtstamp - utility function which checks for TX time stamp
4955 * @q_vector: pointer to q_vector containing needed info
4956 * @skb: packet that was just sent
4958 * If we were asked to do hardware stamping and such a time stamp is
4959 * available, then it must have been for this skb here because we only
4960 * allow only one such packet into the queue.
4962 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb)
4964 struct igb_adapter *adapter = q_vector->adapter;
4965 union skb_shared_tx *shtx = skb_tx(skb);
4966 struct e1000_hw *hw = &adapter->hw;
4967 struct skb_shared_hwtstamps shhwtstamps;
4970 /* if skb does not support hw timestamp or TX stamp not valid exit */
4971 if (likely(!shtx->hardware) ||
4972 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
4975 regval = rd32(E1000_TXSTMPL);
4976 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
4978 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
4979 skb_tstamp_tx(skb, &shhwtstamps);
4983 * igb_clean_tx_irq - Reclaim resources after transmit completes
4984 * @q_vector: pointer to q_vector containing needed info
4985 * returns true if ring is completely cleaned
4987 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
4989 struct igb_adapter *adapter = q_vector->adapter;
4990 struct igb_ring *tx_ring = q_vector->tx_ring;
4991 struct net_device *netdev = tx_ring->netdev;
4992 struct e1000_hw *hw = &adapter->hw;
4993 struct igb_buffer *buffer_info;
4994 struct sk_buff *skb;
4995 union e1000_adv_tx_desc *tx_desc, *eop_desc;
4996 unsigned int total_bytes = 0, total_packets = 0;
4997 unsigned int i, eop, count = 0;
4998 bool cleaned = false;
5000 i = tx_ring->next_to_clean;
5001 eop = tx_ring->buffer_info[i].next_to_watch;
5002 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5004 while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5005 (count < tx_ring->count)) {
5006 for (cleaned = false; !cleaned; count++) {
5007 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5008 buffer_info = &tx_ring->buffer_info[i];
5009 cleaned = (i == eop);
5010 skb = buffer_info->skb;
5013 unsigned int segs, bytecount;
5014 /* gso_segs is currently only valid for tcp */
5015 segs = buffer_info->gso_segs;
5016 /* multiply data chunks by size of headers */
5017 bytecount = ((segs - 1) * skb_headlen(skb)) +
5019 total_packets += segs;
5020 total_bytes += bytecount;
5022 igb_tx_hwtstamp(q_vector, skb);
5025 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5026 tx_desc->wb.status = 0;
5029 if (i == tx_ring->count)
5032 eop = tx_ring->buffer_info[i].next_to_watch;
5033 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5036 tx_ring->next_to_clean = i;
5038 if (unlikely(count &&
5039 netif_carrier_ok(netdev) &&
5040 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5041 /* Make sure that anybody stopping the queue after this
5042 * sees the new next_to_clean.
5045 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5046 !(test_bit(__IGB_DOWN, &adapter->state))) {
5047 netif_wake_subqueue(netdev, tx_ring->queue_index);
5048 tx_ring->tx_stats.restart_queue++;
5052 if (tx_ring->detect_tx_hung) {
5053 /* Detect a transmit hang in hardware, this serializes the
5054 * check with the clearing of time_stamp and movement of i */
5055 tx_ring->detect_tx_hung = false;
5056 if (tx_ring->buffer_info[i].time_stamp &&
5057 time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5058 (adapter->tx_timeout_factor * HZ)) &&
5059 !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5061 /* detected Tx unit hang */
5062 dev_err(&tx_ring->pdev->dev,
5063 "Detected Tx Unit Hang\n"
5067 " next_to_use <%x>\n"
5068 " next_to_clean <%x>\n"
5069 "buffer_info[next_to_clean]\n"
5070 " time_stamp <%lx>\n"
5071 " next_to_watch <%x>\n"
5073 " desc.status <%x>\n",
5074 tx_ring->queue_index,
5075 readl(tx_ring->head),
5076 readl(tx_ring->tail),
5077 tx_ring->next_to_use,
5078 tx_ring->next_to_clean,
5079 tx_ring->buffer_info[eop].time_stamp,
5082 eop_desc->wb.status);
5083 netif_stop_subqueue(netdev, tx_ring->queue_index);
5086 tx_ring->total_bytes += total_bytes;
5087 tx_ring->total_packets += total_packets;
5088 tx_ring->tx_stats.bytes += total_bytes;
5089 tx_ring->tx_stats.packets += total_packets;
5090 return (count < tx_ring->count);
5094 * igb_receive_skb - helper function to handle rx indications
5095 * @q_vector: structure containing interrupt and ring information
5096 * @skb: packet to send up
5097 * @vlan_tag: vlan tag for packet
5099 static void igb_receive_skb(struct igb_q_vector *q_vector,
5100 struct sk_buff *skb,
5103 struct igb_adapter *adapter = q_vector->adapter;
5105 if (vlan_tag && adapter->vlgrp)
5106 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5109 napi_gro_receive(&q_vector->napi, skb);
5112 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5113 u32 status_err, struct sk_buff *skb)
5115 skb->ip_summed = CHECKSUM_NONE;
5117 /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5118 if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5119 (status_err & E1000_RXD_STAT_IXSM))
5122 /* TCP/UDP checksum error bit is set */
5124 (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5126 * work around errata with sctp packets where the TCPE aka
5127 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5128 * packets, (aka let the stack check the crc32c)
5130 if ((skb->len == 60) &&
5131 (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
5132 ring->rx_stats.csum_err++;
5134 /* let the stack verify checksum errors */
5137 /* It must be a TCP or UDP packet with a valid checksum */
5138 if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5139 skb->ip_summed = CHECKSUM_UNNECESSARY;
5141 dev_dbg(&ring->pdev->dev, "cksum success: bits %08X\n", status_err);
5144 static inline void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5145 struct sk_buff *skb)
5147 struct igb_adapter *adapter = q_vector->adapter;
5148 struct e1000_hw *hw = &adapter->hw;
5152 * If this bit is set, then the RX registers contain the time stamp. No
5153 * other packet will be time stamped until we read these registers, so
5154 * read the registers to make them available again. Because only one
5155 * packet can be time stamped at a time, we know that the register
5156 * values must belong to this one here and therefore we don't need to
5157 * compare any of the additional attributes stored for it.
5159 * If nothing went wrong, then it should have a skb_shared_tx that we
5160 * can turn into a skb_shared_hwtstamps.
5162 if (likely(!(staterr & E1000_RXDADV_STAT_TS)))
5164 if (!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5167 regval = rd32(E1000_RXSTMPL);
5168 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5170 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5172 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5173 union e1000_adv_rx_desc *rx_desc)
5175 /* HW will not DMA in data larger than the given buffer, even if it
5176 * parses the (NFS, of course) header to be larger. In that case, it
5177 * fills the header buffer and spills the rest into the page.
5179 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5180 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5181 if (hlen > rx_ring->rx_buffer_len)
5182 hlen = rx_ring->rx_buffer_len;
5186 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5187 int *work_done, int budget)
5189 struct igb_ring *rx_ring = q_vector->rx_ring;
5190 struct net_device *netdev = rx_ring->netdev;
5191 struct pci_dev *pdev = rx_ring->pdev;
5192 union e1000_adv_rx_desc *rx_desc , *next_rxd;
5193 struct igb_buffer *buffer_info , *next_buffer;
5194 struct sk_buff *skb;
5195 bool cleaned = false;
5196 int cleaned_count = 0;
5197 int current_node = numa_node_id();
5198 unsigned int total_bytes = 0, total_packets = 0;
5204 i = rx_ring->next_to_clean;
5205 buffer_info = &rx_ring->buffer_info[i];
5206 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5207 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5209 while (staterr & E1000_RXD_STAT_DD) {
5210 if (*work_done >= budget)
5214 skb = buffer_info->skb;
5215 prefetch(skb->data - NET_IP_ALIGN);
5216 buffer_info->skb = NULL;
5219 if (i == rx_ring->count)
5222 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5224 next_buffer = &rx_ring->buffer_info[i];
5226 length = le16_to_cpu(rx_desc->wb.upper.length);
5230 if (buffer_info->dma) {
5231 pci_unmap_single(pdev, buffer_info->dma,
5232 rx_ring->rx_buffer_len,
5233 PCI_DMA_FROMDEVICE);
5234 buffer_info->dma = 0;
5235 if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5236 skb_put(skb, length);
5239 skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5243 pci_unmap_page(pdev, buffer_info->page_dma,
5244 PAGE_SIZE / 2, PCI_DMA_FROMDEVICE);
5245 buffer_info->page_dma = 0;
5247 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++,
5249 buffer_info->page_offset,
5252 if ((page_count(buffer_info->page) != 1) ||
5253 (page_to_nid(buffer_info->page) != current_node))
5254 buffer_info->page = NULL;
5256 get_page(buffer_info->page);
5259 skb->data_len += length;
5260 skb->truesize += length;
5263 if (!(staterr & E1000_RXD_STAT_EOP)) {
5264 buffer_info->skb = next_buffer->skb;
5265 buffer_info->dma = next_buffer->dma;
5266 next_buffer->skb = skb;
5267 next_buffer->dma = 0;
5271 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5272 dev_kfree_skb_irq(skb);
5276 igb_rx_hwtstamp(q_vector, staterr, skb);
5277 total_bytes += skb->len;
5280 igb_rx_checksum_adv(rx_ring, staterr, skb);
5282 skb->protocol = eth_type_trans(skb, netdev);
5283 skb_record_rx_queue(skb, rx_ring->queue_index);
5285 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5286 le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5288 igb_receive_skb(q_vector, skb, vlan_tag);
5291 rx_desc->wb.upper.status_error = 0;
5293 /* return some buffers to hardware, one at a time is too slow */
5294 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5295 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5299 /* use prefetched values */
5301 buffer_info = next_buffer;
5302 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5305 rx_ring->next_to_clean = i;
5306 cleaned_count = igb_desc_unused(rx_ring);
5309 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5311 rx_ring->total_packets += total_packets;
5312 rx_ring->total_bytes += total_bytes;
5313 rx_ring->rx_stats.packets += total_packets;
5314 rx_ring->rx_stats.bytes += total_bytes;
5319 * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5320 * @adapter: address of board private structure
5322 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5324 struct net_device *netdev = rx_ring->netdev;
5325 union e1000_adv_rx_desc *rx_desc;
5326 struct igb_buffer *buffer_info;
5327 struct sk_buff *skb;
5331 i = rx_ring->next_to_use;
5332 buffer_info = &rx_ring->buffer_info[i];
5334 bufsz = rx_ring->rx_buffer_len;
5336 while (cleaned_count--) {
5337 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5339 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5340 if (!buffer_info->page) {
5341 buffer_info->page = netdev_alloc_page(netdev);
5342 if (!buffer_info->page) {
5343 rx_ring->rx_stats.alloc_failed++;
5346 buffer_info->page_offset = 0;
5348 buffer_info->page_offset ^= PAGE_SIZE / 2;
5350 buffer_info->page_dma =
5351 pci_map_page(rx_ring->pdev, buffer_info->page,
5352 buffer_info->page_offset,
5354 PCI_DMA_FROMDEVICE);
5355 if (pci_dma_mapping_error(rx_ring->pdev,
5356 buffer_info->page_dma)) {
5357 buffer_info->page_dma = 0;
5358 rx_ring->rx_stats.alloc_failed++;
5363 skb = buffer_info->skb;
5365 skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5367 rx_ring->rx_stats.alloc_failed++;
5371 buffer_info->skb = skb;
5373 if (!buffer_info->dma) {
5374 buffer_info->dma = pci_map_single(rx_ring->pdev,
5377 PCI_DMA_FROMDEVICE);
5378 if (pci_dma_mapping_error(rx_ring->pdev,
5379 buffer_info->dma)) {
5380 buffer_info->dma = 0;
5381 rx_ring->rx_stats.alloc_failed++;
5385 /* Refresh the desc even if buffer_addrs didn't change because
5386 * each write-back erases this info. */
5387 if (bufsz < IGB_RXBUFFER_1024) {
5388 rx_desc->read.pkt_addr =
5389 cpu_to_le64(buffer_info->page_dma);
5390 rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5392 rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5393 rx_desc->read.hdr_addr = 0;
5397 if (i == rx_ring->count)
5399 buffer_info = &rx_ring->buffer_info[i];
5403 if (rx_ring->next_to_use != i) {
5404 rx_ring->next_to_use = i;
5406 i = (rx_ring->count - 1);
5410 /* Force memory writes to complete before letting h/w
5411 * know there are new descriptors to fetch. (Only
5412 * applicable for weak-ordered memory model archs,
5413 * such as IA-64). */
5415 writel(i, rx_ring->tail);
5425 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5427 struct igb_adapter *adapter = netdev_priv(netdev);
5428 struct mii_ioctl_data *data = if_mii(ifr);
5430 if (adapter->hw.phy.media_type != e1000_media_type_copper)
5435 data->phy_id = adapter->hw.phy.addr;
5438 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5450 * igb_hwtstamp_ioctl - control hardware time stamping
5455 * Outgoing time stamping can be enabled and disabled. Play nice and
5456 * disable it when requested, although it shouldn't case any overhead
5457 * when no packet needs it. At most one packet in the queue may be
5458 * marked for time stamping, otherwise it would be impossible to tell
5459 * for sure to which packet the hardware time stamp belongs.
5461 * Incoming time stamping has to be configured via the hardware
5462 * filters. Not all combinations are supported, in particular event
5463 * type has to be specified. Matching the kind of event packet is
5464 * not supported, with the exception of "all V2 events regardless of
5468 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5469 struct ifreq *ifr, int cmd)
5471 struct igb_adapter *adapter = netdev_priv(netdev);
5472 struct e1000_hw *hw = &adapter->hw;
5473 struct hwtstamp_config config;
5474 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5475 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5476 u32 tsync_rx_cfg = 0;
5481 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5484 /* reserved for future extensions */
5488 switch (config.tx_type) {
5489 case HWTSTAMP_TX_OFF:
5491 case HWTSTAMP_TX_ON:
5497 switch (config.rx_filter) {
5498 case HWTSTAMP_FILTER_NONE:
5501 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5502 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5503 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5504 case HWTSTAMP_FILTER_ALL:
5506 * register TSYNCRXCFG must be set, therefore it is not
5507 * possible to time stamp both Sync and Delay_Req messages
5508 * => fall back to time stamping all packets
5510 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5511 config.rx_filter = HWTSTAMP_FILTER_ALL;
5513 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5514 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5515 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5518 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5519 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5520 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5523 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5524 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5525 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5526 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5529 config.rx_filter = HWTSTAMP_FILTER_SOME;
5531 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5532 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5533 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5534 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5537 config.rx_filter = HWTSTAMP_FILTER_SOME;
5539 case HWTSTAMP_FILTER_PTP_V2_EVENT:
5540 case HWTSTAMP_FILTER_PTP_V2_SYNC:
5541 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5542 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5543 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5550 if (hw->mac.type == e1000_82575) {
5551 if (tsync_rx_ctl | tsync_tx_ctl)
5556 /* enable/disable TX */
5557 regval = rd32(E1000_TSYNCTXCTL);
5558 regval &= ~E1000_TSYNCTXCTL_ENABLED;
5559 regval |= tsync_tx_ctl;
5560 wr32(E1000_TSYNCTXCTL, regval);
5562 /* enable/disable RX */
5563 regval = rd32(E1000_TSYNCRXCTL);
5564 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5565 regval |= tsync_rx_ctl;
5566 wr32(E1000_TSYNCRXCTL, regval);
5568 /* define which PTP packets are time stamped */
5569 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5571 /* define ethertype filter for timestamped packets */
5574 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5575 E1000_ETQF_1588 | /* enable timestamping */
5576 ETH_P_1588)); /* 1588 eth protocol type */
5578 wr32(E1000_ETQF(3), 0);
5580 #define PTP_PORT 319
5581 /* L4 Queue Filter[3]: filter by destination port and protocol */
5583 u32 ftqf = (IPPROTO_UDP /* UDP */
5584 | E1000_FTQF_VF_BP /* VF not compared */
5585 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
5586 | E1000_FTQF_MASK); /* mask all inputs */
5587 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
5589 wr32(E1000_IMIR(3), htons(PTP_PORT));
5590 wr32(E1000_IMIREXT(3),
5591 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
5592 if (hw->mac.type == e1000_82576) {
5593 /* enable source port check */
5594 wr32(E1000_SPQF(3), htons(PTP_PORT));
5595 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
5597 wr32(E1000_FTQF(3), ftqf);
5599 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
5603 adapter->hwtstamp_config = config;
5605 /* clear TX/RX time stamp registers, just to be sure */
5606 regval = rd32(E1000_TXSTMPH);
5607 regval = rd32(E1000_RXSTMPH);
5609 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5619 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5625 return igb_mii_ioctl(netdev, ifr, cmd);
5627 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5633 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5635 struct igb_adapter *adapter = hw->back;
5638 cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5640 return -E1000_ERR_CONFIG;
5642 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
5647 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5649 struct igb_adapter *adapter = hw->back;
5652 cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5654 return -E1000_ERR_CONFIG;
5656 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
5661 static void igb_vlan_rx_register(struct net_device *netdev,
5662 struct vlan_group *grp)
5664 struct igb_adapter *adapter = netdev_priv(netdev);
5665 struct e1000_hw *hw = &adapter->hw;
5668 igb_irq_disable(adapter);
5669 adapter->vlgrp = grp;
5672 /* enable VLAN tag insert/strip */
5673 ctrl = rd32(E1000_CTRL);
5674 ctrl |= E1000_CTRL_VME;
5675 wr32(E1000_CTRL, ctrl);
5677 /* Disable CFI check */
5678 rctl = rd32(E1000_RCTL);
5679 rctl &= ~E1000_RCTL_CFIEN;
5680 wr32(E1000_RCTL, rctl);
5682 /* disable VLAN tag insert/strip */
5683 ctrl = rd32(E1000_CTRL);
5684 ctrl &= ~E1000_CTRL_VME;
5685 wr32(E1000_CTRL, ctrl);
5688 igb_rlpml_set(adapter);
5690 if (!test_bit(__IGB_DOWN, &adapter->state))
5691 igb_irq_enable(adapter);
5694 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
5696 struct igb_adapter *adapter = netdev_priv(netdev);
5697 struct e1000_hw *hw = &adapter->hw;
5698 int pf_id = adapter->vfs_allocated_count;
5700 /* attempt to add filter to vlvf array */
5701 igb_vlvf_set(adapter, vid, true, pf_id);
5703 /* add the filter since PF can receive vlans w/o entry in vlvf */
5704 igb_vfta_set(hw, vid, true);
5707 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
5709 struct igb_adapter *adapter = netdev_priv(netdev);
5710 struct e1000_hw *hw = &adapter->hw;
5711 int pf_id = adapter->vfs_allocated_count;
5714 igb_irq_disable(adapter);
5715 vlan_group_set_device(adapter->vlgrp, vid, NULL);
5717 if (!test_bit(__IGB_DOWN, &adapter->state))
5718 igb_irq_enable(adapter);
5720 /* remove vlan from VLVF table array */
5721 err = igb_vlvf_set(adapter, vid, false, pf_id);
5723 /* if vid was not present in VLVF just remove it from table */
5725 igb_vfta_set(hw, vid, false);
5728 static void igb_restore_vlan(struct igb_adapter *adapter)
5730 igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
5732 if (adapter->vlgrp) {
5734 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
5735 if (!vlan_group_get_device(adapter->vlgrp, vid))
5737 igb_vlan_rx_add_vid(adapter->netdev, vid);
5742 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
5744 struct pci_dev *pdev = adapter->pdev;
5745 struct e1000_mac_info *mac = &adapter->hw.mac;
5750 case SPEED_10 + DUPLEX_HALF:
5751 mac->forced_speed_duplex = ADVERTISE_10_HALF;
5753 case SPEED_10 + DUPLEX_FULL:
5754 mac->forced_speed_duplex = ADVERTISE_10_FULL;
5756 case SPEED_100 + DUPLEX_HALF:
5757 mac->forced_speed_duplex = ADVERTISE_100_HALF;
5759 case SPEED_100 + DUPLEX_FULL:
5760 mac->forced_speed_duplex = ADVERTISE_100_FULL;
5762 case SPEED_1000 + DUPLEX_FULL:
5764 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
5766 case SPEED_1000 + DUPLEX_HALF: /* not supported */
5768 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
5774 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
5776 struct net_device *netdev = pci_get_drvdata(pdev);
5777 struct igb_adapter *adapter = netdev_priv(netdev);
5778 struct e1000_hw *hw = &adapter->hw;
5779 u32 ctrl, rctl, status;
5780 u32 wufc = adapter->wol;
5785 netif_device_detach(netdev);
5787 if (netif_running(netdev))
5790 igb_clear_interrupt_scheme(adapter);
5793 retval = pci_save_state(pdev);
5798 status = rd32(E1000_STATUS);
5799 if (status & E1000_STATUS_LU)
5800 wufc &= ~E1000_WUFC_LNKC;
5803 igb_setup_rctl(adapter);
5804 igb_set_rx_mode(netdev);
5806 /* turn on all-multi mode if wake on multicast is enabled */
5807 if (wufc & E1000_WUFC_MC) {
5808 rctl = rd32(E1000_RCTL);
5809 rctl |= E1000_RCTL_MPE;
5810 wr32(E1000_RCTL, rctl);
5813 ctrl = rd32(E1000_CTRL);
5814 /* advertise wake from D3Cold */
5815 #define E1000_CTRL_ADVD3WUC 0x00100000
5816 /* phy power management enable */
5817 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
5818 ctrl |= E1000_CTRL_ADVD3WUC;
5819 wr32(E1000_CTRL, ctrl);
5821 /* Allow time for pending master requests to run */
5822 igb_disable_pcie_master(hw);
5824 wr32(E1000_WUC, E1000_WUC_PME_EN);
5825 wr32(E1000_WUFC, wufc);
5828 wr32(E1000_WUFC, 0);
5831 *enable_wake = wufc || adapter->en_mng_pt;
5833 igb_power_down_link(adapter);
5835 igb_power_up_link(adapter);
5837 /* Release control of h/w to f/w. If f/w is AMT enabled, this
5838 * would have already happened in close and is redundant. */
5839 igb_release_hw_control(adapter);
5841 pci_disable_device(pdev);
5847 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
5852 retval = __igb_shutdown(pdev, &wake);
5857 pci_prepare_to_sleep(pdev);
5859 pci_wake_from_d3(pdev, false);
5860 pci_set_power_state(pdev, PCI_D3hot);
5866 static int igb_resume(struct pci_dev *pdev)
5868 struct net_device *netdev = pci_get_drvdata(pdev);
5869 struct igb_adapter *adapter = netdev_priv(netdev);
5870 struct e1000_hw *hw = &adapter->hw;
5873 pci_set_power_state(pdev, PCI_D0);
5874 pci_restore_state(pdev);
5875 pci_save_state(pdev);
5877 err = pci_enable_device_mem(pdev);
5880 "igb: Cannot enable PCI device from suspend\n");
5883 pci_set_master(pdev);
5885 pci_enable_wake(pdev, PCI_D3hot, 0);
5886 pci_enable_wake(pdev, PCI_D3cold, 0);
5888 if (igb_init_interrupt_scheme(adapter)) {
5889 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
5895 /* let the f/w know that the h/w is now under the control of the
5897 igb_get_hw_control(adapter);
5899 wr32(E1000_WUS, ~0);
5901 if (netif_running(netdev)) {
5902 err = igb_open(netdev);
5907 netif_device_attach(netdev);
5913 static void igb_shutdown(struct pci_dev *pdev)
5917 __igb_shutdown(pdev, &wake);
5919 if (system_state == SYSTEM_POWER_OFF) {
5920 pci_wake_from_d3(pdev, wake);
5921 pci_set_power_state(pdev, PCI_D3hot);
5925 #ifdef CONFIG_NET_POLL_CONTROLLER
5927 * Polling 'interrupt' - used by things like netconsole to send skbs
5928 * without having to re-enable interrupts. It's not called while
5929 * the interrupt routine is executing.
5931 static void igb_netpoll(struct net_device *netdev)
5933 struct igb_adapter *adapter = netdev_priv(netdev);
5934 struct e1000_hw *hw = &adapter->hw;
5937 if (!adapter->msix_entries) {
5938 struct igb_q_vector *q_vector = adapter->q_vector[0];
5939 igb_irq_disable(adapter);
5940 napi_schedule(&q_vector->napi);
5944 for (i = 0; i < adapter->num_q_vectors; i++) {
5945 struct igb_q_vector *q_vector = adapter->q_vector[i];
5946 wr32(E1000_EIMC, q_vector->eims_value);
5947 napi_schedule(&q_vector->napi);
5950 #endif /* CONFIG_NET_POLL_CONTROLLER */
5953 * igb_io_error_detected - called when PCI error is detected
5954 * @pdev: Pointer to PCI device
5955 * @state: The current pci connection state
5957 * This function is called after a PCI bus error affecting
5958 * this device has been detected.
5960 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
5961 pci_channel_state_t state)
5963 struct net_device *netdev = pci_get_drvdata(pdev);
5964 struct igb_adapter *adapter = netdev_priv(netdev);
5966 netif_device_detach(netdev);
5968 if (state == pci_channel_io_perm_failure)
5969 return PCI_ERS_RESULT_DISCONNECT;
5971 if (netif_running(netdev))
5973 pci_disable_device(pdev);
5975 /* Request a slot slot reset. */
5976 return PCI_ERS_RESULT_NEED_RESET;
5980 * igb_io_slot_reset - called after the pci bus has been reset.
5981 * @pdev: Pointer to PCI device
5983 * Restart the card from scratch, as if from a cold-boot. Implementation
5984 * resembles the first-half of the igb_resume routine.
5986 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
5988 struct net_device *netdev = pci_get_drvdata(pdev);
5989 struct igb_adapter *adapter = netdev_priv(netdev);
5990 struct e1000_hw *hw = &adapter->hw;
5991 pci_ers_result_t result;
5994 if (pci_enable_device_mem(pdev)) {
5996 "Cannot re-enable PCI device after reset.\n");
5997 result = PCI_ERS_RESULT_DISCONNECT;
5999 pci_set_master(pdev);
6000 pci_restore_state(pdev);
6001 pci_save_state(pdev);
6003 pci_enable_wake(pdev, PCI_D3hot, 0);
6004 pci_enable_wake(pdev, PCI_D3cold, 0);
6007 wr32(E1000_WUS, ~0);
6008 result = PCI_ERS_RESULT_RECOVERED;
6011 err = pci_cleanup_aer_uncorrect_error_status(pdev);
6013 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6014 "failed 0x%0x\n", err);
6015 /* non-fatal, continue */
6022 * igb_io_resume - called when traffic can start flowing again.
6023 * @pdev: Pointer to PCI device
6025 * This callback is called when the error recovery driver tells us that
6026 * its OK to resume normal operation. Implementation resembles the
6027 * second-half of the igb_resume routine.
6029 static void igb_io_resume(struct pci_dev *pdev)
6031 struct net_device *netdev = pci_get_drvdata(pdev);
6032 struct igb_adapter *adapter = netdev_priv(netdev);
6034 if (netif_running(netdev)) {
6035 if (igb_up(adapter)) {
6036 dev_err(&pdev->dev, "igb_up failed after reset\n");
6041 netif_device_attach(netdev);
6043 /* let the f/w know that the h/w is now under the control of the
6045 igb_get_hw_control(adapter);
6048 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6051 u32 rar_low, rar_high;
6052 struct e1000_hw *hw = &adapter->hw;
6054 /* HW expects these in little endian so we reverse the byte order
6055 * from network order (big endian) to little endian
6057 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6058 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6059 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6061 /* Indicate to hardware the Address is Valid. */
6062 rar_high |= E1000_RAH_AV;
6064 if (hw->mac.type == e1000_82575)
6065 rar_high |= E1000_RAH_POOL_1 * qsel;
6067 rar_high |= E1000_RAH_POOL_1 << qsel;
6069 wr32(E1000_RAL(index), rar_low);
6071 wr32(E1000_RAH(index), rar_high);
6075 static int igb_set_vf_mac(struct igb_adapter *adapter,
6076 int vf, unsigned char *mac_addr)
6078 struct e1000_hw *hw = &adapter->hw;
6079 /* VF MAC addresses start at end of receive addresses and moves
6080 * torwards the first, as a result a collision should not be possible */
6081 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6083 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6085 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6090 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6092 struct igb_adapter *adapter = netdev_priv(netdev);
6093 if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6095 adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6096 dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6097 dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6098 " change effective.");
6099 if (test_bit(__IGB_DOWN, &adapter->state)) {
6100 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6101 " but the PF device is not up.\n");
6102 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6103 " attempting to use the VF device.\n");
6105 return igb_set_vf_mac(adapter, vf, mac);
6108 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6113 static int igb_ndo_get_vf_config(struct net_device *netdev,
6114 int vf, struct ifla_vf_info *ivi)
6116 struct igb_adapter *adapter = netdev_priv(netdev);
6117 if (vf >= adapter->vfs_allocated_count)
6120 memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6122 ivi->vlan = adapter->vf_data[vf].pf_vlan;
6123 ivi->qos = adapter->vf_data[vf].pf_qos;
6127 static void igb_vmm_control(struct igb_adapter *adapter)
6129 struct e1000_hw *hw = &adapter->hw;
6132 /* replication is not supported for 82575 */
6133 if (hw->mac.type == e1000_82575)
6136 /* enable replication vlan tag stripping */
6137 reg = rd32(E1000_RPLOLR);
6138 reg |= E1000_RPLOLR_STRVLAN;
6139 wr32(E1000_RPLOLR, reg);
6141 /* notify HW that the MAC is adding vlan tags */
6142 reg = rd32(E1000_DTXCTL);
6143 reg |= E1000_DTXCTL_VLAN_ADDED;
6144 wr32(E1000_DTXCTL, reg);
6146 if (adapter->vfs_allocated_count) {
6147 igb_vmdq_set_loopback_pf(hw, true);
6148 igb_vmdq_set_replication_pf(hw, true);
6150 igb_vmdq_set_loopback_pf(hw, false);
6151 igb_vmdq_set_replication_pf(hw, false);