1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * IBM Power Virtual Ethernet Device Driver
5 * Copyright (C) IBM Corporation, 2003, 2010
7 * Authors: Dave Larson <larson1@us.ibm.com>
8 * Santiago Leon <santil@linux.vnet.ibm.com>
9 * Brian King <brking@linux.vnet.ibm.com>
10 * Robert Jennings <rcj@linux.vnet.ibm.com>
11 * Anton Blanchard <anton@au.ibm.com>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/errno.h>
17 #include <linux/dma-mapping.h>
18 #include <linux/kernel.h>
19 #include <linux/netdevice.h>
20 #include <linux/etherdevice.h>
21 #include <linux/skbuff.h>
22 #include <linux/init.h>
23 #include <linux/interrupt.h>
26 #include <linux/ethtool.h>
29 #include <linux/ipv6.h>
30 #include <linux/slab.h>
31 #include <asm/hvcall.h>
32 #include <linux/atomic.h>
34 #include <asm/iommu.h>
35 #include <asm/firmware.h>
37 #include <net/ip6_checksum.h>
41 static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance);
42 static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev);
44 static struct kobj_type ktype_veth_pool;
47 static const char ibmveth_driver_name[] = "ibmveth";
48 static const char ibmveth_driver_string[] = "IBM Power Virtual Ethernet Driver";
49 #define ibmveth_driver_version "1.06"
51 MODULE_AUTHOR("Santiago Leon <santil@linux.vnet.ibm.com>");
52 MODULE_DESCRIPTION("IBM Power Virtual Ethernet Driver");
53 MODULE_LICENSE("GPL");
54 MODULE_VERSION(ibmveth_driver_version);
56 static unsigned int tx_copybreak __read_mostly = 128;
57 module_param(tx_copybreak, uint, 0644);
58 MODULE_PARM_DESC(tx_copybreak,
59 "Maximum size of packet that is copied to a new buffer on transmit");
61 static unsigned int rx_copybreak __read_mostly = 128;
62 module_param(rx_copybreak, uint, 0644);
63 MODULE_PARM_DESC(rx_copybreak,
64 "Maximum size of packet that is copied to a new buffer on receive");
66 static unsigned int rx_flush __read_mostly = 0;
67 module_param(rx_flush, uint, 0644);
68 MODULE_PARM_DESC(rx_flush, "Flush receive buffers before use");
70 static bool old_large_send __read_mostly;
71 module_param(old_large_send, bool, 0444);
72 MODULE_PARM_DESC(old_large_send,
73 "Use old large send method on firmware that supports the new method");
76 char name[ETH_GSTRING_LEN];
80 #define IBMVETH_STAT_OFF(stat) offsetof(struct ibmveth_adapter, stat)
81 #define IBMVETH_GET_STAT(a, off) *((u64 *)(((unsigned long)(a)) + off))
83 static struct ibmveth_stat ibmveth_stats[] = {
84 { "replenish_task_cycles", IBMVETH_STAT_OFF(replenish_task_cycles) },
85 { "replenish_no_mem", IBMVETH_STAT_OFF(replenish_no_mem) },
86 { "replenish_add_buff_failure",
87 IBMVETH_STAT_OFF(replenish_add_buff_failure) },
88 { "replenish_add_buff_success",
89 IBMVETH_STAT_OFF(replenish_add_buff_success) },
90 { "rx_invalid_buffer", IBMVETH_STAT_OFF(rx_invalid_buffer) },
91 { "rx_no_buffer", IBMVETH_STAT_OFF(rx_no_buffer) },
92 { "tx_map_failed", IBMVETH_STAT_OFF(tx_map_failed) },
93 { "tx_send_failed", IBMVETH_STAT_OFF(tx_send_failed) },
94 { "fw_enabled_ipv4_csum", IBMVETH_STAT_OFF(fw_ipv4_csum_support) },
95 { "fw_enabled_ipv6_csum", IBMVETH_STAT_OFF(fw_ipv6_csum_support) },
96 { "tx_large_packets", IBMVETH_STAT_OFF(tx_large_packets) },
97 { "rx_large_packets", IBMVETH_STAT_OFF(rx_large_packets) },
98 { "fw_enabled_large_send", IBMVETH_STAT_OFF(fw_large_send_support) }
101 /* simple methods of getting data from the current rxq entry */
102 static inline u32 ibmveth_rxq_flags(struct ibmveth_adapter *adapter)
104 return be32_to_cpu(adapter->rx_queue.queue_addr[adapter->rx_queue.index].flags_off);
107 static inline int ibmveth_rxq_toggle(struct ibmveth_adapter *adapter)
109 return (ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_TOGGLE) >>
110 IBMVETH_RXQ_TOGGLE_SHIFT;
113 static inline int ibmveth_rxq_pending_buffer(struct ibmveth_adapter *adapter)
115 return ibmveth_rxq_toggle(adapter) == adapter->rx_queue.toggle;
118 static inline int ibmveth_rxq_buffer_valid(struct ibmveth_adapter *adapter)
120 return ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_VALID;
123 static inline int ibmveth_rxq_frame_offset(struct ibmveth_adapter *adapter)
125 return ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_OFF_MASK;
128 static inline int ibmveth_rxq_large_packet(struct ibmveth_adapter *adapter)
130 return ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_LRG_PKT;
133 static inline int ibmveth_rxq_frame_length(struct ibmveth_adapter *adapter)
135 return be32_to_cpu(adapter->rx_queue.queue_addr[adapter->rx_queue.index].length);
138 static inline int ibmveth_rxq_csum_good(struct ibmveth_adapter *adapter)
140 return ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_CSUM_GOOD;
143 static unsigned int ibmveth_real_max_tx_queues(void)
145 unsigned int n_cpu = num_online_cpus();
147 return min(n_cpu, IBMVETH_MAX_QUEUES);
150 /* setup the initial settings for a buffer pool */
151 static void ibmveth_init_buffer_pool(struct ibmveth_buff_pool *pool,
152 u32 pool_index, u32 pool_size,
153 u32 buff_size, u32 pool_active)
155 pool->size = pool_size;
156 pool->index = pool_index;
157 pool->buff_size = buff_size;
158 pool->threshold = pool_size * 7 / 8;
159 pool->active = pool_active;
162 /* allocate and setup an buffer pool - called during open */
163 static int ibmveth_alloc_buffer_pool(struct ibmveth_buff_pool *pool)
167 pool->free_map = kmalloc_array(pool->size, sizeof(u16), GFP_KERNEL);
172 pool->dma_addr = kcalloc(pool->size, sizeof(dma_addr_t), GFP_KERNEL);
173 if (!pool->dma_addr) {
174 kfree(pool->free_map);
175 pool->free_map = NULL;
179 pool->skbuff = kcalloc(pool->size, sizeof(void *), GFP_KERNEL);
182 kfree(pool->dma_addr);
183 pool->dma_addr = NULL;
185 kfree(pool->free_map);
186 pool->free_map = NULL;
190 for (i = 0; i < pool->size; ++i)
191 pool->free_map[i] = i;
193 atomic_set(&pool->available, 0);
194 pool->producer_index = 0;
195 pool->consumer_index = 0;
200 static inline void ibmveth_flush_buffer(void *addr, unsigned long length)
202 unsigned long offset;
204 for (offset = 0; offset < length; offset += SMP_CACHE_BYTES)
205 asm("dcbf %0,%1,1" :: "b" (addr), "r" (offset));
208 /* replenish the buffers for a pool. note that we don't need to
209 * skb_reserve these since they are used for incoming...
211 static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter,
212 struct ibmveth_buff_pool *pool)
215 u32 count = pool->size - atomic_read(&pool->available);
216 u32 buffers_added = 0;
218 unsigned int free_index, index;
220 unsigned long lpar_rc;
225 for (i = 0; i < count; ++i) {
226 union ibmveth_buf_desc desc;
228 free_index = pool->consumer_index;
229 index = pool->free_map[free_index];
232 if (WARN_ON(index == IBM_VETH_INVALID_MAP)) {
233 schedule_work(&adapter->work);
234 goto bad_index_failure;
237 /* are we allocating a new buffer or recycling an old one */
238 if (pool->skbuff[index])
241 skb = netdev_alloc_skb(adapter->netdev, pool->buff_size);
244 netdev_dbg(adapter->netdev,
245 "replenish: unable to allocate skb\n");
246 adapter->replenish_no_mem++;
250 dma_addr = dma_map_single(&adapter->vdev->dev, skb->data,
251 pool->buff_size, DMA_FROM_DEVICE);
253 if (dma_mapping_error(&adapter->vdev->dev, dma_addr))
256 pool->dma_addr[index] = dma_addr;
257 pool->skbuff[index] = skb;
260 unsigned int len = min(pool->buff_size,
261 adapter->netdev->mtu +
263 ibmveth_flush_buffer(skb->data, len);
266 dma_addr = pool->dma_addr[index];
267 desc.fields.flags_len = IBMVETH_BUF_VALID | pool->buff_size;
268 desc.fields.address = dma_addr;
270 correlator = ((u64)pool->index << 32) | index;
271 *(u64 *)pool->skbuff[index]->data = correlator;
273 lpar_rc = h_add_logical_lan_buffer(adapter->vdev->unit_address,
276 if (lpar_rc != H_SUCCESS) {
277 netdev_warn(adapter->netdev,
278 "%sadd_logical_lan failed %lu\n",
279 skb ? "" : "When recycling: ", lpar_rc);
283 pool->free_map[free_index] = IBM_VETH_INVALID_MAP;
284 pool->consumer_index++;
285 if (pool->consumer_index >= pool->size)
286 pool->consumer_index = 0;
289 adapter->replenish_add_buff_success++;
293 atomic_add(buffers_added, &(pool->available));
298 if (dma_addr && !dma_mapping_error(&adapter->vdev->dev, dma_addr))
299 dma_unmap_single(&adapter->vdev->dev,
300 pool->dma_addr[index], pool->buff_size,
302 dev_kfree_skb_any(pool->skbuff[index]);
303 pool->skbuff[index] = NULL;
305 adapter->replenish_add_buff_failure++;
308 atomic_add(buffers_added, &(pool->available));
312 * The final 8 bytes of the buffer list is a counter of frames dropped
313 * because there was not a buffer in the buffer list capable of holding
316 static void ibmveth_update_rx_no_buffer(struct ibmveth_adapter *adapter)
318 __be64 *p = adapter->buffer_list_addr + 4096 - 8;
320 adapter->rx_no_buffer = be64_to_cpup(p);
323 /* replenish routine */
324 static void ibmveth_replenish_task(struct ibmveth_adapter *adapter)
328 adapter->replenish_task_cycles++;
330 for (i = (IBMVETH_NUM_BUFF_POOLS - 1); i >= 0; i--) {
331 struct ibmveth_buff_pool *pool = &adapter->rx_buff_pool[i];
334 (atomic_read(&pool->available) < pool->threshold))
335 ibmveth_replenish_buffer_pool(adapter, pool);
338 ibmveth_update_rx_no_buffer(adapter);
341 /* empty and free ana buffer pool - also used to do cleanup in error paths */
342 static void ibmveth_free_buffer_pool(struct ibmveth_adapter *adapter,
343 struct ibmveth_buff_pool *pool)
347 kfree(pool->free_map);
348 pool->free_map = NULL;
350 if (pool->skbuff && pool->dma_addr) {
351 for (i = 0; i < pool->size; ++i) {
352 struct sk_buff *skb = pool->skbuff[i];
354 dma_unmap_single(&adapter->vdev->dev,
358 dev_kfree_skb_any(skb);
359 pool->skbuff[i] = NULL;
364 if (pool->dma_addr) {
365 kfree(pool->dma_addr);
366 pool->dma_addr = NULL;
376 * ibmveth_remove_buffer_from_pool - remove a buffer from a pool
377 * @adapter: adapter instance
378 * @correlator: identifies pool and index
379 * @reuse: whether to reuse buffer
383 * * %-EINVAL - correlator maps to pool or index out of range
384 * * %-EFAULT - pool and index map to null skb
386 static int ibmveth_remove_buffer_from_pool(struct ibmveth_adapter *adapter,
387 u64 correlator, bool reuse)
389 unsigned int pool = correlator >> 32;
390 unsigned int index = correlator & 0xffffffffUL;
391 unsigned int free_index;
394 if (WARN_ON(pool >= IBMVETH_NUM_BUFF_POOLS) ||
395 WARN_ON(index >= adapter->rx_buff_pool[pool].size)) {
396 schedule_work(&adapter->work);
400 skb = adapter->rx_buff_pool[pool].skbuff[index];
402 schedule_work(&adapter->work);
406 /* if we are going to reuse the buffer then keep the pointers around
407 * but mark index as available. replenish will see the skb pointer and
408 * assume it is to be recycled.
411 /* remove the skb pointer to mark free. actual freeing is done
412 * by upper level networking after gro_recieve
414 adapter->rx_buff_pool[pool].skbuff[index] = NULL;
416 dma_unmap_single(&adapter->vdev->dev,
417 adapter->rx_buff_pool[pool].dma_addr[index],
418 adapter->rx_buff_pool[pool].buff_size,
422 free_index = adapter->rx_buff_pool[pool].producer_index;
423 adapter->rx_buff_pool[pool].producer_index++;
424 if (adapter->rx_buff_pool[pool].producer_index >=
425 adapter->rx_buff_pool[pool].size)
426 adapter->rx_buff_pool[pool].producer_index = 0;
427 adapter->rx_buff_pool[pool].free_map[free_index] = index;
431 atomic_dec(&(adapter->rx_buff_pool[pool].available));
436 /* get the current buffer on the rx queue */
437 static inline struct sk_buff *ibmveth_rxq_get_buffer(struct ibmveth_adapter *adapter)
439 u64 correlator = adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator;
440 unsigned int pool = correlator >> 32;
441 unsigned int index = correlator & 0xffffffffUL;
443 if (WARN_ON(pool >= IBMVETH_NUM_BUFF_POOLS) ||
444 WARN_ON(index >= adapter->rx_buff_pool[pool].size)) {
445 schedule_work(&adapter->work);
449 return adapter->rx_buff_pool[pool].skbuff[index];
453 * ibmveth_rxq_harvest_buffer - Harvest buffer from pool
455 * @adapter: pointer to adapter
456 * @reuse: whether to reuse buffer
458 * Context: called from ibmveth_poll
462 * * other - non-zero return from ibmveth_remove_buffer_from_pool
464 static int ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter,
470 cor = adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator;
471 rc = ibmveth_remove_buffer_from_pool(adapter, cor, reuse);
475 if (++adapter->rx_queue.index == adapter->rx_queue.num_slots) {
476 adapter->rx_queue.index = 0;
477 adapter->rx_queue.toggle = !adapter->rx_queue.toggle;
483 static void ibmveth_free_tx_ltb(struct ibmveth_adapter *adapter, int idx)
485 dma_unmap_single(&adapter->vdev->dev, adapter->tx_ltb_dma[idx],
486 adapter->tx_ltb_size, DMA_TO_DEVICE);
487 kfree(adapter->tx_ltb_ptr[idx]);
488 adapter->tx_ltb_ptr[idx] = NULL;
491 static int ibmveth_allocate_tx_ltb(struct ibmveth_adapter *adapter, int idx)
493 adapter->tx_ltb_ptr[idx] = kzalloc(adapter->tx_ltb_size,
495 if (!adapter->tx_ltb_ptr[idx]) {
496 netdev_err(adapter->netdev,
497 "unable to allocate tx long term buffer\n");
500 adapter->tx_ltb_dma[idx] = dma_map_single(&adapter->vdev->dev,
501 adapter->tx_ltb_ptr[idx],
502 adapter->tx_ltb_size,
504 if (dma_mapping_error(&adapter->vdev->dev, adapter->tx_ltb_dma[idx])) {
505 netdev_err(adapter->netdev,
506 "unable to DMA map tx long term buffer\n");
507 kfree(adapter->tx_ltb_ptr[idx]);
508 adapter->tx_ltb_ptr[idx] = NULL;
515 static int ibmveth_register_logical_lan(struct ibmveth_adapter *adapter,
516 union ibmveth_buf_desc rxq_desc, u64 mac_address)
518 int rc, try_again = 1;
521 * After a kexec the adapter will still be open, so our attempt to
522 * open it will fail. So if we get a failure we free the adapter and
523 * try again, but only once.
526 rc = h_register_logical_lan(adapter->vdev->unit_address,
527 adapter->buffer_list_dma, rxq_desc.desc,
528 adapter->filter_list_dma, mac_address);
530 if (rc != H_SUCCESS && try_again) {
532 rc = h_free_logical_lan(adapter->vdev->unit_address);
533 } while (H_IS_LONG_BUSY(rc) || (rc == H_BUSY));
542 static int ibmveth_open(struct net_device *netdev)
544 struct ibmveth_adapter *adapter = netdev_priv(netdev);
547 unsigned long lpar_rc;
549 union ibmveth_buf_desc rxq_desc;
553 netdev_dbg(netdev, "open starting\n");
555 napi_enable(&adapter->napi);
557 for(i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++)
558 rxq_entries += adapter->rx_buff_pool[i].size;
561 adapter->buffer_list_addr = (void*) get_zeroed_page(GFP_KERNEL);
562 if (!adapter->buffer_list_addr) {
563 netdev_err(netdev, "unable to allocate list pages\n");
567 adapter->filter_list_addr = (void*) get_zeroed_page(GFP_KERNEL);
568 if (!adapter->filter_list_addr) {
569 netdev_err(netdev, "unable to allocate filter pages\n");
570 goto out_free_buffer_list;
573 dev = &adapter->vdev->dev;
575 adapter->rx_queue.queue_len = sizeof(struct ibmveth_rx_q_entry) *
577 adapter->rx_queue.queue_addr =
578 dma_alloc_coherent(dev, adapter->rx_queue.queue_len,
579 &adapter->rx_queue.queue_dma, GFP_KERNEL);
580 if (!adapter->rx_queue.queue_addr)
581 goto out_free_filter_list;
583 adapter->buffer_list_dma = dma_map_single(dev,
584 adapter->buffer_list_addr, 4096, DMA_BIDIRECTIONAL);
585 if (dma_mapping_error(dev, adapter->buffer_list_dma)) {
586 netdev_err(netdev, "unable to map buffer list pages\n");
587 goto out_free_queue_mem;
590 adapter->filter_list_dma = dma_map_single(dev,
591 adapter->filter_list_addr, 4096, DMA_BIDIRECTIONAL);
592 if (dma_mapping_error(dev, adapter->filter_list_dma)) {
593 netdev_err(netdev, "unable to map filter list pages\n");
594 goto out_unmap_buffer_list;
597 for (i = 0; i < netdev->real_num_tx_queues; i++) {
598 if (ibmveth_allocate_tx_ltb(adapter, i))
599 goto out_free_tx_ltb;
602 adapter->rx_queue.index = 0;
603 adapter->rx_queue.num_slots = rxq_entries;
604 adapter->rx_queue.toggle = 1;
606 mac_address = ether_addr_to_u64(netdev->dev_addr);
608 rxq_desc.fields.flags_len = IBMVETH_BUF_VALID |
609 adapter->rx_queue.queue_len;
610 rxq_desc.fields.address = adapter->rx_queue.queue_dma;
612 netdev_dbg(netdev, "buffer list @ 0x%p\n", adapter->buffer_list_addr);
613 netdev_dbg(netdev, "filter list @ 0x%p\n", adapter->filter_list_addr);
614 netdev_dbg(netdev, "receive q @ 0x%p\n", adapter->rx_queue.queue_addr);
616 h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_DISABLE);
618 lpar_rc = ibmveth_register_logical_lan(adapter, rxq_desc, mac_address);
620 if (lpar_rc != H_SUCCESS) {
621 netdev_err(netdev, "h_register_logical_lan failed with %ld\n",
623 netdev_err(netdev, "buffer TCE:0x%llx filter TCE:0x%llx rxq "
624 "desc:0x%llx MAC:0x%llx\n",
625 adapter->buffer_list_dma,
626 adapter->filter_list_dma,
630 goto out_unmap_filter_list;
633 for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) {
634 if (!adapter->rx_buff_pool[i].active)
636 if (ibmveth_alloc_buffer_pool(&adapter->rx_buff_pool[i])) {
637 netdev_err(netdev, "unable to alloc pool\n");
638 adapter->rx_buff_pool[i].active = 0;
640 goto out_free_buffer_pools;
644 netdev_dbg(netdev, "registering irq 0x%x\n", netdev->irq);
645 rc = request_irq(netdev->irq, ibmveth_interrupt, 0, netdev->name,
648 netdev_err(netdev, "unable to request irq 0x%x, rc %d\n",
651 lpar_rc = h_free_logical_lan(adapter->vdev->unit_address);
652 } while (H_IS_LONG_BUSY(lpar_rc) || (lpar_rc == H_BUSY));
654 goto out_free_buffer_pools;
659 netdev_dbg(netdev, "initial replenish cycle\n");
660 ibmveth_interrupt(netdev->irq, netdev);
662 netif_tx_start_all_queues(netdev);
664 netdev_dbg(netdev, "open complete\n");
668 out_free_buffer_pools:
670 if (adapter->rx_buff_pool[i].active)
671 ibmveth_free_buffer_pool(adapter,
672 &adapter->rx_buff_pool[i]);
674 out_unmap_filter_list:
675 dma_unmap_single(dev, adapter->filter_list_dma, 4096,
680 ibmveth_free_tx_ltb(adapter, i);
683 out_unmap_buffer_list:
684 dma_unmap_single(dev, adapter->buffer_list_dma, 4096,
687 dma_free_coherent(dev, adapter->rx_queue.queue_len,
688 adapter->rx_queue.queue_addr,
689 adapter->rx_queue.queue_dma);
690 out_free_filter_list:
691 free_page((unsigned long)adapter->filter_list_addr);
692 out_free_buffer_list:
693 free_page((unsigned long)adapter->buffer_list_addr);
695 napi_disable(&adapter->napi);
699 static int ibmveth_close(struct net_device *netdev)
701 struct ibmveth_adapter *adapter = netdev_priv(netdev);
702 struct device *dev = &adapter->vdev->dev;
706 netdev_dbg(netdev, "close starting\n");
708 napi_disable(&adapter->napi);
710 netif_tx_stop_all_queues(netdev);
712 h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_DISABLE);
715 lpar_rc = h_free_logical_lan(adapter->vdev->unit_address);
716 } while (H_IS_LONG_BUSY(lpar_rc) || (lpar_rc == H_BUSY));
718 if (lpar_rc != H_SUCCESS) {
719 netdev_err(netdev, "h_free_logical_lan failed with %lx, "
720 "continuing with close\n", lpar_rc);
723 free_irq(netdev->irq, netdev);
725 ibmveth_update_rx_no_buffer(adapter);
727 dma_unmap_single(dev, adapter->buffer_list_dma, 4096,
729 free_page((unsigned long)adapter->buffer_list_addr);
731 dma_unmap_single(dev, adapter->filter_list_dma, 4096,
733 free_page((unsigned long)adapter->filter_list_addr);
735 dma_free_coherent(dev, adapter->rx_queue.queue_len,
736 adapter->rx_queue.queue_addr,
737 adapter->rx_queue.queue_dma);
739 for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++)
740 if (adapter->rx_buff_pool[i].active)
741 ibmveth_free_buffer_pool(adapter,
742 &adapter->rx_buff_pool[i]);
744 for (i = 0; i < netdev->real_num_tx_queues; i++)
745 ibmveth_free_tx_ltb(adapter, i);
747 netdev_dbg(netdev, "close complete\n");
753 * ibmveth_reset - Handle scheduled reset work
755 * @w: pointer to work_struct embedded in adapter structure
757 * Context: This routine acquires rtnl_mutex and disables its NAPI through
758 * ibmveth_close. It can't be called directly in a context that has
759 * already acquired rtnl_mutex or disabled its NAPI, or directly from
764 static void ibmveth_reset(struct work_struct *w)
766 struct ibmveth_adapter *adapter = container_of(w, struct ibmveth_adapter, work);
767 struct net_device *netdev = adapter->netdev;
769 netdev_dbg(netdev, "reset starting\n");
773 dev_close(adapter->netdev);
774 dev_open(adapter->netdev, NULL);
778 netdev_dbg(netdev, "reset complete\n");
781 static int ibmveth_set_link_ksettings(struct net_device *dev,
782 const struct ethtool_link_ksettings *cmd)
784 struct ibmveth_adapter *adapter = netdev_priv(dev);
786 return ethtool_virtdev_set_link_ksettings(dev, cmd,
791 static int ibmveth_get_link_ksettings(struct net_device *dev,
792 struct ethtool_link_ksettings *cmd)
794 struct ibmveth_adapter *adapter = netdev_priv(dev);
796 cmd->base.speed = adapter->speed;
797 cmd->base.duplex = adapter->duplex;
798 cmd->base.port = PORT_OTHER;
803 static void ibmveth_init_link_settings(struct net_device *dev)
805 struct ibmveth_adapter *adapter = netdev_priv(dev);
807 adapter->speed = SPEED_1000;
808 adapter->duplex = DUPLEX_FULL;
811 static void netdev_get_drvinfo(struct net_device *dev,
812 struct ethtool_drvinfo *info)
814 strscpy(info->driver, ibmveth_driver_name, sizeof(info->driver));
815 strscpy(info->version, ibmveth_driver_version, sizeof(info->version));
818 static netdev_features_t ibmveth_fix_features(struct net_device *dev,
819 netdev_features_t features)
822 * Since the ibmveth firmware interface does not have the
823 * concept of separate tx/rx checksum offload enable, if rx
824 * checksum is disabled we also have to disable tx checksum
825 * offload. Once we disable rx checksum offload, we are no
826 * longer allowed to send tx buffers that are not properly
830 if (!(features & NETIF_F_RXCSUM))
831 features &= ~NETIF_F_CSUM_MASK;
836 static int ibmveth_set_csum_offload(struct net_device *dev, u32 data)
838 struct ibmveth_adapter *adapter = netdev_priv(dev);
839 unsigned long set_attr, clr_attr, ret_attr;
840 unsigned long set_attr6, clr_attr6;
841 long ret, ret4, ret6;
842 int rc1 = 0, rc2 = 0;
845 if (netif_running(dev)) {
856 set_attr = IBMVETH_ILLAN_IPV4_TCP_CSUM;
857 set_attr6 = IBMVETH_ILLAN_IPV6_TCP_CSUM;
859 clr_attr = IBMVETH_ILLAN_IPV4_TCP_CSUM;
860 clr_attr6 = IBMVETH_ILLAN_IPV6_TCP_CSUM;
863 ret = h_illan_attributes(adapter->vdev->unit_address, 0, 0, &ret_attr);
865 if (ret == H_SUCCESS &&
866 (ret_attr & IBMVETH_ILLAN_PADDED_PKT_CSUM)) {
867 ret4 = h_illan_attributes(adapter->vdev->unit_address, clr_attr,
868 set_attr, &ret_attr);
870 if (ret4 != H_SUCCESS) {
871 netdev_err(dev, "unable to change IPv4 checksum "
872 "offload settings. %d rc=%ld\n",
875 h_illan_attributes(adapter->vdev->unit_address,
876 set_attr, clr_attr, &ret_attr);
879 dev->features &= ~NETIF_F_IP_CSUM;
882 adapter->fw_ipv4_csum_support = data;
885 ret6 = h_illan_attributes(adapter->vdev->unit_address,
886 clr_attr6, set_attr6, &ret_attr);
888 if (ret6 != H_SUCCESS) {
889 netdev_err(dev, "unable to change IPv6 checksum "
890 "offload settings. %d rc=%ld\n",
893 h_illan_attributes(adapter->vdev->unit_address,
894 set_attr6, clr_attr6, &ret_attr);
897 dev->features &= ~NETIF_F_IPV6_CSUM;
900 adapter->fw_ipv6_csum_support = data;
902 if (ret4 == H_SUCCESS || ret6 == H_SUCCESS)
903 adapter->rx_csum = data;
908 netdev_err(dev, "unable to change checksum offload settings."
909 " %d rc=%ld ret_attr=%lx\n", data, ret,
914 rc2 = ibmveth_open(dev);
916 return rc1 ? rc1 : rc2;
919 static int ibmveth_set_tso(struct net_device *dev, u32 data)
921 struct ibmveth_adapter *adapter = netdev_priv(dev);
922 unsigned long set_attr, clr_attr, ret_attr;
924 int rc1 = 0, rc2 = 0;
927 if (netif_running(dev)) {
936 set_attr = IBMVETH_ILLAN_LRG_SR_ENABLED;
938 clr_attr = IBMVETH_ILLAN_LRG_SR_ENABLED;
940 ret1 = h_illan_attributes(adapter->vdev->unit_address, 0, 0, &ret_attr);
942 if (ret1 == H_SUCCESS && (ret_attr & IBMVETH_ILLAN_LRG_SND_SUPPORT) &&
944 ret2 = h_illan_attributes(adapter->vdev->unit_address, clr_attr,
945 set_attr, &ret_attr);
947 if (ret2 != H_SUCCESS) {
948 netdev_err(dev, "unable to change tso settings. %d rc=%ld\n",
951 h_illan_attributes(adapter->vdev->unit_address,
952 set_attr, clr_attr, &ret_attr);
955 dev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6);
959 adapter->fw_large_send_support = data;
960 adapter->large_send = data;
963 /* Older firmware version of large send offload does not
967 dev->features &= ~NETIF_F_TSO6;
968 netdev_info(dev, "TSO feature requires all partitions to have updated driver");
970 adapter->large_send = data;
974 rc2 = ibmveth_open(dev);
976 return rc1 ? rc1 : rc2;
979 static int ibmveth_set_features(struct net_device *dev,
980 netdev_features_t features)
982 struct ibmveth_adapter *adapter = netdev_priv(dev);
983 int rx_csum = !!(features & NETIF_F_RXCSUM);
984 int large_send = !!(features & (NETIF_F_TSO | NETIF_F_TSO6));
985 int rc1 = 0, rc2 = 0;
987 if (rx_csum != adapter->rx_csum) {
988 rc1 = ibmveth_set_csum_offload(dev, rx_csum);
989 if (rc1 && !adapter->rx_csum)
991 features & ~(NETIF_F_CSUM_MASK |
995 if (large_send != adapter->large_send) {
996 rc2 = ibmveth_set_tso(dev, large_send);
997 if (rc2 && !adapter->large_send)
999 features & ~(NETIF_F_TSO | NETIF_F_TSO6);
1002 return rc1 ? rc1 : rc2;
1005 static void ibmveth_get_strings(struct net_device *dev, u32 stringset, u8 *data)
1009 if (stringset != ETH_SS_STATS)
1012 for (i = 0; i < ARRAY_SIZE(ibmveth_stats); i++, data += ETH_GSTRING_LEN)
1013 memcpy(data, ibmveth_stats[i].name, ETH_GSTRING_LEN);
1016 static int ibmveth_get_sset_count(struct net_device *dev, int sset)
1020 return ARRAY_SIZE(ibmveth_stats);
1026 static void ibmveth_get_ethtool_stats(struct net_device *dev,
1027 struct ethtool_stats *stats, u64 *data)
1030 struct ibmveth_adapter *adapter = netdev_priv(dev);
1032 for (i = 0; i < ARRAY_SIZE(ibmveth_stats); i++)
1033 data[i] = IBMVETH_GET_STAT(adapter, ibmveth_stats[i].offset);
1036 static void ibmveth_get_channels(struct net_device *netdev,
1037 struct ethtool_channels *channels)
1039 channels->max_tx = ibmveth_real_max_tx_queues();
1040 channels->tx_count = netdev->real_num_tx_queues;
1042 channels->max_rx = netdev->real_num_rx_queues;
1043 channels->rx_count = netdev->real_num_rx_queues;
1046 static int ibmveth_set_channels(struct net_device *netdev,
1047 struct ethtool_channels *channels)
1049 struct ibmveth_adapter *adapter = netdev_priv(netdev);
1050 unsigned int old = netdev->real_num_tx_queues,
1051 goal = channels->tx_count;
1054 /* If ndo_open has not been called yet then don't allocate, just set
1055 * desired netdev_queue's and return
1057 if (!(netdev->flags & IFF_UP))
1058 return netif_set_real_num_tx_queues(netdev, goal);
1060 /* We have IBMVETH_MAX_QUEUES netdev_queue's allocated
1061 * but we may need to alloc/free the ltb's.
1063 netif_tx_stop_all_queues(netdev);
1065 /* Allocate any queue that we need */
1066 for (i = old; i < goal; i++) {
1067 if (adapter->tx_ltb_ptr[i])
1070 rc = ibmveth_allocate_tx_ltb(adapter, i);
1074 /* if something goes wrong, free everything we just allocated */
1075 netdev_err(netdev, "Failed to allocate more tx queues, returning to %d queues\n",
1081 rc = netif_set_real_num_tx_queues(netdev, goal);
1083 netdev_err(netdev, "Failed to set real tx queues, returning to %d queues\n",
1088 /* Free any that are no longer needed */
1089 for (i = old; i > goal; i--) {
1090 if (adapter->tx_ltb_ptr[i - 1])
1091 ibmveth_free_tx_ltb(adapter, i - 1);
1094 netif_tx_wake_all_queues(netdev);
1099 static const struct ethtool_ops netdev_ethtool_ops = {
1100 .get_drvinfo = netdev_get_drvinfo,
1101 .get_link = ethtool_op_get_link,
1102 .get_strings = ibmveth_get_strings,
1103 .get_sset_count = ibmveth_get_sset_count,
1104 .get_ethtool_stats = ibmveth_get_ethtool_stats,
1105 .get_link_ksettings = ibmveth_get_link_ksettings,
1106 .set_link_ksettings = ibmveth_set_link_ksettings,
1107 .get_channels = ibmveth_get_channels,
1108 .set_channels = ibmveth_set_channels
1111 static int ibmveth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1116 static int ibmveth_send(struct ibmveth_adapter *adapter,
1117 unsigned long desc, unsigned long mss)
1119 unsigned long correlator;
1120 unsigned int retry_count;
1124 * The retry count sets a maximum for the number of broadcast and
1125 * multicast destinations within the system.
1130 ret = h_send_logical_lan(adapter->vdev->unit_address, desc,
1131 correlator, &correlator, mss,
1132 adapter->fw_large_send_support);
1133 } while ((ret == H_BUSY) && (retry_count--));
1135 if (ret != H_SUCCESS && ret != H_DROPPED) {
1136 netdev_err(adapter->netdev, "tx: h_send_logical_lan failed "
1137 "with rc=%ld\n", ret);
1144 static int ibmveth_is_packet_unsupported(struct sk_buff *skb,
1145 struct net_device *netdev)
1147 struct ethhdr *ether_header;
1150 ether_header = eth_hdr(skb);
1152 if (ether_addr_equal(ether_header->h_dest, netdev->dev_addr)) {
1153 netdev_dbg(netdev, "veth doesn't support loopback packets, dropping packet.\n");
1154 netdev->stats.tx_dropped++;
1161 static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb,
1162 struct net_device *netdev)
1164 struct ibmveth_adapter *adapter = netdev_priv(netdev);
1165 unsigned int desc_flags, total_bytes;
1166 union ibmveth_buf_desc desc;
1167 int i, queue_num = skb_get_queue_mapping(skb);
1168 unsigned long mss = 0;
1170 if (ibmveth_is_packet_unsupported(skb, netdev))
1172 /* veth can't checksum offload UDP */
1173 if (skb->ip_summed == CHECKSUM_PARTIAL &&
1174 ((skb->protocol == htons(ETH_P_IP) &&
1175 ip_hdr(skb)->protocol != IPPROTO_TCP) ||
1176 (skb->protocol == htons(ETH_P_IPV6) &&
1177 ipv6_hdr(skb)->nexthdr != IPPROTO_TCP)) &&
1178 skb_checksum_help(skb)) {
1180 netdev_err(netdev, "tx: failed to checksum packet\n");
1181 netdev->stats.tx_dropped++;
1185 desc_flags = IBMVETH_BUF_VALID;
1187 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1188 unsigned char *buf = skb_transport_header(skb) +
1191 desc_flags |= (IBMVETH_BUF_NO_CSUM | IBMVETH_BUF_CSUM_GOOD);
1193 /* Need to zero out the checksum */
1197 if (skb_is_gso(skb) && adapter->fw_large_send_support)
1198 desc_flags |= IBMVETH_BUF_LRG_SND;
1201 if (skb->ip_summed == CHECKSUM_PARTIAL && skb_is_gso(skb)) {
1202 if (adapter->fw_large_send_support) {
1203 mss = (unsigned long)skb_shinfo(skb)->gso_size;
1204 adapter->tx_large_packets++;
1205 } else if (!skb_is_gso_v6(skb)) {
1206 /* Put -1 in the IP checksum to tell phyp it
1207 * is a largesend packet. Put the mss in
1210 ip_hdr(skb)->check = 0xffff;
1211 tcp_hdr(skb)->check =
1212 cpu_to_be16(skb_shinfo(skb)->gso_size);
1213 adapter->tx_large_packets++;
1217 /* Copy header into mapped buffer */
1218 if (unlikely(skb->len > adapter->tx_ltb_size)) {
1219 netdev_err(adapter->netdev, "tx: packet size (%u) exceeds ltb (%u)\n",
1220 skb->len, adapter->tx_ltb_size);
1221 netdev->stats.tx_dropped++;
1224 memcpy(adapter->tx_ltb_ptr[queue_num], skb->data, skb_headlen(skb));
1225 total_bytes = skb_headlen(skb);
1226 /* Copy frags into mapped buffers */
1227 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1228 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1230 memcpy(adapter->tx_ltb_ptr[queue_num] + total_bytes,
1231 skb_frag_address_safe(frag), skb_frag_size(frag));
1232 total_bytes += skb_frag_size(frag);
1235 if (unlikely(total_bytes != skb->len)) {
1236 netdev_err(adapter->netdev, "tx: incorrect packet len copied into ltb (%u != %u)\n",
1237 skb->len, total_bytes);
1238 netdev->stats.tx_dropped++;
1241 desc.fields.flags_len = desc_flags | skb->len;
1242 desc.fields.address = adapter->tx_ltb_dma[queue_num];
1243 /* finish writing to long_term_buff before VIOS accessing it */
1246 if (ibmveth_send(adapter, desc.desc, mss)) {
1247 adapter->tx_send_failed++;
1248 netdev->stats.tx_dropped++;
1250 netdev->stats.tx_packets++;
1251 netdev->stats.tx_bytes += skb->len;
1255 dev_consume_skb_any(skb);
1256 return NETDEV_TX_OK;
1261 static void ibmveth_rx_mss_helper(struct sk_buff *skb, u16 mss, int lrg_pkt)
1263 struct tcphdr *tcph;
1267 /* only TCP packets will be aggregated */
1268 if (skb->protocol == htons(ETH_P_IP)) {
1269 struct iphdr *iph = (struct iphdr *)skb->data;
1271 if (iph->protocol == IPPROTO_TCP) {
1272 offset = iph->ihl * 4;
1273 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
1277 } else if (skb->protocol == htons(ETH_P_IPV6)) {
1278 struct ipv6hdr *iph6 = (struct ipv6hdr *)skb->data;
1280 if (iph6->nexthdr == IPPROTO_TCP) {
1281 offset = sizeof(struct ipv6hdr);
1282 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
1289 /* if mss is not set through Large Packet bit/mss in rx buffer,
1290 * expect that the mss will be written to the tcp header checksum.
1292 tcph = (struct tcphdr *)(skb->data + offset);
1294 skb_shinfo(skb)->gso_size = mss;
1295 } else if (offset) {
1296 skb_shinfo(skb)->gso_size = ntohs(tcph->check);
1300 if (skb_shinfo(skb)->gso_size) {
1301 hdr_len = offset + tcph->doff * 4;
1302 skb_shinfo(skb)->gso_segs =
1303 DIV_ROUND_UP(skb->len - hdr_len,
1304 skb_shinfo(skb)->gso_size);
1308 static void ibmveth_rx_csum_helper(struct sk_buff *skb,
1309 struct ibmveth_adapter *adapter)
1311 struct iphdr *iph = NULL;
1312 struct ipv6hdr *iph6 = NULL;
1313 __be16 skb_proto = 0;
1318 skb_proto = be16_to_cpu(skb->protocol);
1320 if (skb_proto == ETH_P_IP) {
1321 iph = (struct iphdr *)skb->data;
1323 /* If the IP checksum is not offloaded and if the packet
1324 * is large send, the checksum must be rebuilt.
1326 if (iph->check == 0xffff) {
1328 iph->check = ip_fast_csum((unsigned char *)iph,
1332 iphlen = iph->ihl * 4;
1333 iph_proto = iph->protocol;
1334 } else if (skb_proto == ETH_P_IPV6) {
1335 iph6 = (struct ipv6hdr *)skb->data;
1336 iphlen = sizeof(struct ipv6hdr);
1337 iph_proto = iph6->nexthdr;
1340 /* When CSO is enabled the TCP checksum may have be set to NULL by
1341 * the sender given that we zeroed out TCP checksum field in
1342 * transmit path (refer ibmveth_start_xmit routine). In this case set
1343 * up CHECKSUM_PARTIAL. If the packet is forwarded, the checksum will
1344 * then be recalculated by the destination NIC (CSO must be enabled
1345 * on the destination NIC).
1347 * In an OVS environment, when a flow is not cached, specifically for a
1348 * new TCP connection, the first packet information is passed up to
1349 * the user space for finding a flow. During this process, OVS computes
1350 * checksum on the first packet when CHECKSUM_PARTIAL flag is set.
1352 * So, re-compute TCP pseudo header checksum.
1355 if (iph_proto == IPPROTO_TCP) {
1356 struct tcphdr *tcph = (struct tcphdr *)(skb->data + iphlen);
1358 if (tcph->check == 0x0000) {
1359 /* Recompute TCP pseudo header checksum */
1360 tcphdrlen = skb->len - iphlen;
1361 if (skb_proto == ETH_P_IP)
1363 ~csum_tcpudp_magic(iph->saddr,
1364 iph->daddr, tcphdrlen, iph_proto, 0);
1365 else if (skb_proto == ETH_P_IPV6)
1367 ~csum_ipv6_magic(&iph6->saddr,
1368 &iph6->daddr, tcphdrlen, iph_proto, 0);
1369 /* Setup SKB fields for checksum offload */
1370 skb_partial_csum_set(skb, iphlen,
1371 offsetof(struct tcphdr, check));
1372 skb_reset_network_header(skb);
1377 static int ibmveth_poll(struct napi_struct *napi, int budget)
1379 struct ibmveth_adapter *adapter =
1380 container_of(napi, struct ibmveth_adapter, napi);
1381 struct net_device *netdev = adapter->netdev;
1382 int frames_processed = 0;
1383 unsigned long lpar_rc;
1387 while (frames_processed < budget) {
1388 if (!ibmveth_rxq_pending_buffer(adapter))
1392 if (!ibmveth_rxq_buffer_valid(adapter)) {
1393 wmb(); /* suggested by larson1 */
1394 adapter->rx_invalid_buffer++;
1395 netdev_dbg(netdev, "recycling invalid buffer\n");
1396 if (unlikely(ibmveth_rxq_harvest_buffer(adapter, true)))
1399 struct sk_buff *skb, *new_skb;
1400 int length = ibmveth_rxq_frame_length(adapter);
1401 int offset = ibmveth_rxq_frame_offset(adapter);
1402 int csum_good = ibmveth_rxq_csum_good(adapter);
1403 int lrg_pkt = ibmveth_rxq_large_packet(adapter);
1404 __sum16 iph_check = 0;
1406 skb = ibmveth_rxq_get_buffer(adapter);
1410 /* if the large packet bit is set in the rx queue
1411 * descriptor, the mss will be written by PHYP eight
1412 * bytes from the start of the rx buffer, which is
1413 * skb->data at this stage
1416 __be64 *rxmss = (__be64 *)(skb->data + 8);
1418 mss = (u16)be64_to_cpu(*rxmss);
1422 if (length < rx_copybreak)
1423 new_skb = netdev_alloc_skb(netdev, length);
1426 skb_copy_to_linear_data(new_skb,
1430 ibmveth_flush_buffer(skb->data,
1432 if (unlikely(ibmveth_rxq_harvest_buffer(adapter, true)))
1436 if (unlikely(ibmveth_rxq_harvest_buffer(adapter, false)))
1438 skb_reserve(skb, offset);
1441 skb_put(skb, length);
1442 skb->protocol = eth_type_trans(skb, netdev);
1444 /* PHYP without PLSO support places a -1 in the ip
1445 * checksum for large send frames.
1447 if (skb->protocol == cpu_to_be16(ETH_P_IP)) {
1448 struct iphdr *iph = (struct iphdr *)skb->data;
1450 iph_check = iph->check;
1453 if ((length > netdev->mtu + ETH_HLEN) ||
1454 lrg_pkt || iph_check == 0xffff) {
1455 ibmveth_rx_mss_helper(skb, mss, lrg_pkt);
1456 adapter->rx_large_packets++;
1460 skb->ip_summed = CHECKSUM_UNNECESSARY;
1461 ibmveth_rx_csum_helper(skb, adapter);
1464 napi_gro_receive(napi, skb); /* send it up */
1466 netdev->stats.rx_packets++;
1467 netdev->stats.rx_bytes += length;
1472 ibmveth_replenish_task(adapter);
1474 if (frames_processed == budget)
1477 if (!napi_complete_done(napi, frames_processed))
1480 /* We think we are done - reenable interrupts,
1481 * then check once more to make sure we are done.
1483 lpar_rc = h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_ENABLE);
1484 if (WARN_ON(lpar_rc != H_SUCCESS)) {
1485 schedule_work(&adapter->work);
1489 if (ibmveth_rxq_pending_buffer(adapter) && napi_schedule(napi)) {
1490 lpar_rc = h_vio_signal(adapter->vdev->unit_address,
1496 return frames_processed;
1499 static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance)
1501 struct net_device *netdev = dev_instance;
1502 struct ibmveth_adapter *adapter = netdev_priv(netdev);
1503 unsigned long lpar_rc;
1505 if (napi_schedule_prep(&adapter->napi)) {
1506 lpar_rc = h_vio_signal(adapter->vdev->unit_address,
1508 WARN_ON(lpar_rc != H_SUCCESS);
1509 __napi_schedule(&adapter->napi);
1514 static void ibmveth_set_multicast_list(struct net_device *netdev)
1516 struct ibmveth_adapter *adapter = netdev_priv(netdev);
1517 unsigned long lpar_rc;
1519 if ((netdev->flags & IFF_PROMISC) ||
1520 (netdev_mc_count(netdev) > adapter->mcastFilterSize)) {
1521 lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address,
1522 IbmVethMcastEnableRecv |
1523 IbmVethMcastDisableFiltering,
1525 if (lpar_rc != H_SUCCESS) {
1526 netdev_err(netdev, "h_multicast_ctrl rc=%ld when "
1527 "entering promisc mode\n", lpar_rc);
1530 struct netdev_hw_addr *ha;
1531 /* clear the filter table & disable filtering */
1532 lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address,
1533 IbmVethMcastEnableRecv |
1534 IbmVethMcastDisableFiltering |
1535 IbmVethMcastClearFilterTable,
1537 if (lpar_rc != H_SUCCESS) {
1538 netdev_err(netdev, "h_multicast_ctrl rc=%ld when "
1539 "attempting to clear filter table\n",
1542 /* add the addresses to the filter table */
1543 netdev_for_each_mc_addr(ha, netdev) {
1544 /* add the multicast address to the filter table */
1546 mcast_addr = ether_addr_to_u64(ha->addr);
1547 lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address,
1548 IbmVethMcastAddFilter,
1550 if (lpar_rc != H_SUCCESS) {
1551 netdev_err(netdev, "h_multicast_ctrl rc=%ld "
1552 "when adding an entry to the filter "
1553 "table\n", lpar_rc);
1557 /* re-enable filtering */
1558 lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address,
1559 IbmVethMcastEnableFiltering,
1561 if (lpar_rc != H_SUCCESS) {
1562 netdev_err(netdev, "h_multicast_ctrl rc=%ld when "
1563 "enabling filtering\n", lpar_rc);
1568 static int ibmveth_change_mtu(struct net_device *dev, int new_mtu)
1570 struct ibmveth_adapter *adapter = netdev_priv(dev);
1571 struct vio_dev *viodev = adapter->vdev;
1572 int new_mtu_oh = new_mtu + IBMVETH_BUFF_OH;
1574 int need_restart = 0;
1576 for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++)
1577 if (new_mtu_oh <= adapter->rx_buff_pool[i].buff_size)
1580 if (i == IBMVETH_NUM_BUFF_POOLS)
1583 /* Deactivate all the buffer pools so that the next loop can activate
1584 only the buffer pools necessary to hold the new MTU */
1585 if (netif_running(adapter->netdev)) {
1587 ibmveth_close(adapter->netdev);
1590 /* Look for an active buffer pool that can hold the new MTU */
1591 for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) {
1592 adapter->rx_buff_pool[i].active = 1;
1594 if (new_mtu_oh <= adapter->rx_buff_pool[i].buff_size) {
1595 WRITE_ONCE(dev->mtu, new_mtu);
1596 vio_cmo_set_dev_desired(viodev,
1597 ibmveth_get_desired_dma
1600 return ibmveth_open(adapter->netdev);
1606 if (need_restart && (rc = ibmveth_open(adapter->netdev)))
1612 #ifdef CONFIG_NET_POLL_CONTROLLER
1613 static void ibmveth_poll_controller(struct net_device *dev)
1615 ibmveth_replenish_task(netdev_priv(dev));
1616 ibmveth_interrupt(dev->irq, dev);
1621 * ibmveth_get_desired_dma - Calculate IO memory desired by the driver
1623 * @vdev: struct vio_dev for the device whose desired IO mem is to be returned
1626 * Number of bytes of IO data the driver will need to perform well.
1628 static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev)
1630 struct net_device *netdev = dev_get_drvdata(&vdev->dev);
1631 struct ibmveth_adapter *adapter;
1632 struct iommu_table *tbl;
1637 tbl = get_iommu_table_base(&vdev->dev);
1639 /* netdev inits at probe time along with the structures we need below*/
1641 return IOMMU_PAGE_ALIGN(IBMVETH_IO_ENTITLEMENT_DEFAULT, tbl);
1643 adapter = netdev_priv(netdev);
1645 ret = IBMVETH_BUFF_LIST_SIZE + IBMVETH_FILT_LIST_SIZE;
1646 ret += IOMMU_PAGE_ALIGN(netdev->mtu, tbl);
1647 /* add size of mapped tx buffers */
1648 ret += IOMMU_PAGE_ALIGN(IBMVETH_MAX_TX_BUF_SIZE, tbl);
1650 for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) {
1651 /* add the size of the active receive buffers */
1652 if (adapter->rx_buff_pool[i].active)
1654 adapter->rx_buff_pool[i].size *
1655 IOMMU_PAGE_ALIGN(adapter->rx_buff_pool[i].
1657 rxqentries += adapter->rx_buff_pool[i].size;
1659 /* add the size of the receive queue entries */
1660 ret += IOMMU_PAGE_ALIGN(
1661 rxqentries * sizeof(struct ibmveth_rx_q_entry), tbl);
1666 static int ibmveth_set_mac_addr(struct net_device *dev, void *p)
1668 struct ibmveth_adapter *adapter = netdev_priv(dev);
1669 struct sockaddr *addr = p;
1673 if (!is_valid_ether_addr(addr->sa_data))
1674 return -EADDRNOTAVAIL;
1676 mac_address = ether_addr_to_u64(addr->sa_data);
1677 rc = h_change_logical_lan_mac(adapter->vdev->unit_address, mac_address);
1679 netdev_err(adapter->netdev, "h_change_logical_lan_mac failed with rc=%d\n", rc);
1683 eth_hw_addr_set(dev, addr->sa_data);
1688 static const struct net_device_ops ibmveth_netdev_ops = {
1689 .ndo_open = ibmveth_open,
1690 .ndo_stop = ibmveth_close,
1691 .ndo_start_xmit = ibmveth_start_xmit,
1692 .ndo_set_rx_mode = ibmveth_set_multicast_list,
1693 .ndo_eth_ioctl = ibmveth_ioctl,
1694 .ndo_change_mtu = ibmveth_change_mtu,
1695 .ndo_fix_features = ibmveth_fix_features,
1696 .ndo_set_features = ibmveth_set_features,
1697 .ndo_validate_addr = eth_validate_addr,
1698 .ndo_set_mac_address = ibmveth_set_mac_addr,
1699 #ifdef CONFIG_NET_POLL_CONTROLLER
1700 .ndo_poll_controller = ibmveth_poll_controller,
1704 static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
1707 struct net_device *netdev;
1708 struct ibmveth_adapter *adapter;
1709 unsigned char *mac_addr_p;
1710 __be32 *mcastFilterSize_p;
1712 unsigned long ret_attr;
1714 dev_dbg(&dev->dev, "entering ibmveth_probe for UA 0x%x\n",
1717 mac_addr_p = (unsigned char *)vio_get_attribute(dev, VETH_MAC_ADDR,
1720 dev_err(&dev->dev, "Can't find VETH_MAC_ADDR attribute\n");
1723 /* Workaround for old/broken pHyp */
1726 else if (mac_len != 6) {
1727 dev_err(&dev->dev, "VETH_MAC_ADDR attribute wrong len %d\n",
1732 mcastFilterSize_p = (__be32 *)vio_get_attribute(dev,
1733 VETH_MCAST_FILTER_SIZE,
1735 if (!mcastFilterSize_p) {
1736 dev_err(&dev->dev, "Can't find VETH_MCAST_FILTER_SIZE "
1741 netdev = alloc_etherdev_mqs(sizeof(struct ibmveth_adapter), IBMVETH_MAX_QUEUES, 1);
1745 adapter = netdev_priv(netdev);
1746 dev_set_drvdata(&dev->dev, netdev);
1748 adapter->vdev = dev;
1749 adapter->netdev = netdev;
1750 INIT_WORK(&adapter->work, ibmveth_reset);
1751 adapter->mcastFilterSize = be32_to_cpu(*mcastFilterSize_p);
1752 ibmveth_init_link_settings(netdev);
1754 netif_napi_add_weight(netdev, &adapter->napi, ibmveth_poll, 16);
1756 netdev->irq = dev->irq;
1757 netdev->netdev_ops = &ibmveth_netdev_ops;
1758 netdev->ethtool_ops = &netdev_ethtool_ops;
1759 SET_NETDEV_DEV(netdev, &dev->dev);
1760 netdev->hw_features = NETIF_F_SG;
1761 if (vio_get_attribute(dev, "ibm,illan-options", NULL) != NULL) {
1762 netdev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
1766 netdev->features |= netdev->hw_features;
1768 ret = h_illan_attributes(adapter->vdev->unit_address, 0, 0, &ret_attr);
1770 /* If running older firmware, TSO should not be enabled by default */
1771 if (ret == H_SUCCESS && (ret_attr & IBMVETH_ILLAN_LRG_SND_SUPPORT) &&
1773 netdev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6;
1774 netdev->features |= netdev->hw_features;
1776 netdev->hw_features |= NETIF_F_TSO;
1779 adapter->is_active_trunk = false;
1780 if (ret == H_SUCCESS && (ret_attr & IBMVETH_ILLAN_ACTIVE_TRUNK)) {
1781 adapter->is_active_trunk = true;
1782 netdev->hw_features |= NETIF_F_FRAGLIST;
1783 netdev->features |= NETIF_F_FRAGLIST;
1786 netdev->min_mtu = IBMVETH_MIN_MTU;
1787 netdev->max_mtu = ETH_MAX_MTU - IBMVETH_BUFF_OH;
1789 eth_hw_addr_set(netdev, mac_addr_p);
1791 if (firmware_has_feature(FW_FEATURE_CMO))
1792 memcpy(pool_count, pool_count_cmo, sizeof(pool_count));
1794 for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) {
1795 struct kobject *kobj = &adapter->rx_buff_pool[i].kobj;
1798 ibmveth_init_buffer_pool(&adapter->rx_buff_pool[i], i,
1799 pool_count[i], pool_size[i],
1801 error = kobject_init_and_add(kobj, &ktype_veth_pool,
1802 &dev->dev.kobj, "pool%d", i);
1804 kobject_uevent(kobj, KOBJ_ADD);
1807 rc = netif_set_real_num_tx_queues(netdev, min(num_online_cpus(),
1808 IBMVETH_DEFAULT_QUEUES));
1810 netdev_dbg(netdev, "failed to set number of tx queues rc=%d\n",
1812 free_netdev(netdev);
1815 adapter->tx_ltb_size = PAGE_ALIGN(IBMVETH_MAX_TX_BUF_SIZE);
1816 for (i = 0; i < IBMVETH_MAX_QUEUES; i++)
1817 adapter->tx_ltb_ptr[i] = NULL;
1819 netdev_dbg(netdev, "adapter @ 0x%p\n", adapter);
1820 netdev_dbg(netdev, "registering netdev...\n");
1822 ibmveth_set_features(netdev, netdev->features);
1824 rc = register_netdev(netdev);
1827 netdev_dbg(netdev, "failed to register netdev rc=%d\n", rc);
1828 free_netdev(netdev);
1832 netdev_dbg(netdev, "registered\n");
1837 static void ibmveth_remove(struct vio_dev *dev)
1839 struct net_device *netdev = dev_get_drvdata(&dev->dev);
1840 struct ibmveth_adapter *adapter = netdev_priv(netdev);
1843 cancel_work_sync(&adapter->work);
1845 for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++)
1846 kobject_put(&adapter->rx_buff_pool[i].kobj);
1848 unregister_netdev(netdev);
1850 free_netdev(netdev);
1851 dev_set_drvdata(&dev->dev, NULL);
1854 static struct attribute veth_active_attr;
1855 static struct attribute veth_num_attr;
1856 static struct attribute veth_size_attr;
1858 static ssize_t veth_pool_show(struct kobject *kobj,
1859 struct attribute *attr, char *buf)
1861 struct ibmveth_buff_pool *pool = container_of(kobj,
1862 struct ibmveth_buff_pool,
1865 if (attr == &veth_active_attr)
1866 return sprintf(buf, "%d\n", pool->active);
1867 else if (attr == &veth_num_attr)
1868 return sprintf(buf, "%d\n", pool->size);
1869 else if (attr == &veth_size_attr)
1870 return sprintf(buf, "%d\n", pool->buff_size);
1875 * veth_pool_store - sysfs store handler for pool attributes
1876 * @kobj: kobject embedded in pool
1877 * @attr: attribute being changed
1878 * @buf: value being stored
1879 * @count: length of @buf in bytes
1881 * Stores new value in pool attribute. Verifies the range of the new value for
1882 * size and buff_size. Verifies that at least one pool remains available to
1883 * receive MTU-sized packets.
1885 * Context: Process context.
1886 * Takes and releases rtnl_mutex to ensure correct ordering of close
1889 * * %-EPERM - Not allowed to disabled all MTU-sized buffer pools
1890 * * %-EINVAL - New pool size or buffer size is out of range
1891 * * count - Return count for success
1892 * * other - Return value from a failed ibmveth_open call
1894 static ssize_t veth_pool_store(struct kobject *kobj, struct attribute *attr,
1895 const char *buf, size_t count)
1897 struct ibmveth_buff_pool *pool = container_of(kobj,
1898 struct ibmveth_buff_pool,
1900 struct net_device *netdev = dev_get_drvdata(kobj_to_dev(kobj->parent));
1901 struct ibmveth_adapter *adapter = netdev_priv(netdev);
1902 long value = simple_strtol(buf, NULL, 10);
1903 bool change = false;
1914 oldbuff_size = pool->buff_size;
1915 oldactive = pool->active;
1916 oldsize = pool->size;
1918 newbuff_size = oldbuff_size;
1919 newactive = oldactive;
1922 if (attr == &veth_active_attr) {
1923 if (value && !oldactive) {
1926 } else if (!value && oldactive) {
1927 int mtu = netdev->mtu + IBMVETH_BUFF_OH;
1929 /* Make sure there is a buffer pool with buffers that
1930 can hold a packet of the size of the MTU */
1931 for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) {
1932 if (pool == &adapter->rx_buff_pool[i])
1934 if (!adapter->rx_buff_pool[i].active)
1936 if (mtu <= adapter->rx_buff_pool[i].buff_size)
1940 if (i == IBMVETH_NUM_BUFF_POOLS) {
1941 netdev_err(netdev, "no active pool >= MTU\n");
1949 } else if (attr == &veth_num_attr) {
1950 if (value <= 0 || value > IBMVETH_MAX_POOL_COUNT) {
1954 if (value != oldsize) {
1958 } else if (attr == &veth_size_attr) {
1959 if (value <= IBMVETH_BUFF_OH || value > IBMVETH_MAX_BUF_SIZE) {
1963 if (value != oldbuff_size) {
1964 newbuff_size = value;
1970 if (netif_running(netdev))
1971 ibmveth_close(netdev);
1973 pool->active = newactive;
1974 pool->buff_size = newbuff_size;
1975 pool->size = newsize;
1977 if (netif_running(netdev)) {
1978 rc = ibmveth_open(netdev);
1980 pool->active = oldactive;
1981 pool->buff_size = oldbuff_size;
1982 pool->size = oldsize;
1989 /* kick the interrupt handler to allocate/deallocate pools */
1990 ibmveth_interrupt(netdev->irq, netdev);
1999 #define ATTR(_name, _mode) \
2000 struct attribute veth_##_name##_attr = { \
2001 .name = __stringify(_name), .mode = _mode, \
2004 static ATTR(active, 0644);
2005 static ATTR(num, 0644);
2006 static ATTR(size, 0644);
2008 static struct attribute *veth_pool_attrs[] = {
2014 ATTRIBUTE_GROUPS(veth_pool);
2016 static const struct sysfs_ops veth_pool_ops = {
2017 .show = veth_pool_show,
2018 .store = veth_pool_store,
2021 static struct kobj_type ktype_veth_pool = {
2023 .sysfs_ops = &veth_pool_ops,
2024 .default_groups = veth_pool_groups,
2027 static int ibmveth_resume(struct device *dev)
2029 struct net_device *netdev = dev_get_drvdata(dev);
2030 ibmveth_interrupt(netdev->irq, netdev);
2034 static const struct vio_device_id ibmveth_device_table[] = {
2035 { "network", "IBM,l-lan"},
2038 MODULE_DEVICE_TABLE(vio, ibmveth_device_table);
2040 static const struct dev_pm_ops ibmveth_pm_ops = {
2041 .resume = ibmveth_resume
2044 static struct vio_driver ibmveth_driver = {
2045 .id_table = ibmveth_device_table,
2046 .probe = ibmveth_probe,
2047 .remove = ibmveth_remove,
2048 .get_desired_dma = ibmveth_get_desired_dma,
2049 .name = ibmveth_driver_name,
2050 .pm = &ibmveth_pm_ops,
2053 static int __init ibmveth_module_init(void)
2055 printk(KERN_DEBUG "%s: %s %s\n", ibmveth_driver_name,
2056 ibmveth_driver_string, ibmveth_driver_version);
2058 return vio_register_driver(&ibmveth_driver);
2061 static void __exit ibmveth_module_exit(void)
2063 vio_unregister_driver(&ibmveth_driver);
2066 module_init(ibmveth_module_init);
2067 module_exit(ibmveth_module_exit);
2069 #ifdef CONFIG_IBMVETH_KUNIT_TEST
2070 #include <kunit/test.h>
2073 * ibmveth_reset_kunit - reset routine for running in KUnit environment
2075 * @w: pointer to work_struct embedded in adapter structure
2077 * Context: Called in the KUnit environment. Does nothing.
2081 static void ibmveth_reset_kunit(struct work_struct *w)
2083 netdev_dbg(NULL, "reset_kunit starting\n");
2084 netdev_dbg(NULL, "reset_kunit complete\n");
2088 * ibmveth_remove_buffer_from_pool_test - unit test for some of
2089 * ibmveth_remove_buffer_from_pool
2090 * @test: pointer to kunit structure
2092 * Tests the error returns from ibmveth_remove_buffer_from_pool.
2093 * ibmveth_remove_buffer_from_pool also calls WARN_ON, so dmesg should be
2094 * checked to see that these warnings happened.
2098 static void ibmveth_remove_buffer_from_pool_test(struct kunit *test)
2100 struct ibmveth_adapter *adapter = kunit_kzalloc(test, sizeof(*adapter), GFP_KERNEL);
2101 struct ibmveth_buff_pool *pool;
2104 KUNIT_ASSERT_NOT_ERR_OR_NULL(test, adapter);
2106 INIT_WORK(&adapter->work, ibmveth_reset_kunit);
2108 /* Set sane values for buffer pools */
2109 for (int i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++)
2110 ibmveth_init_buffer_pool(&adapter->rx_buff_pool[i], i,
2111 pool_count[i], pool_size[i],
2114 pool = &adapter->rx_buff_pool[0];
2115 pool->skbuff = kunit_kcalloc(test, pool->size, sizeof(void *), GFP_KERNEL);
2116 KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pool->skbuff);
2118 correlator = ((u64)IBMVETH_NUM_BUFF_POOLS << 32) | 0;
2119 KUNIT_EXPECT_EQ(test, -EINVAL, ibmveth_remove_buffer_from_pool(adapter, correlator, false));
2120 KUNIT_EXPECT_EQ(test, -EINVAL, ibmveth_remove_buffer_from_pool(adapter, correlator, true));
2122 correlator = ((u64)0 << 32) | adapter->rx_buff_pool[0].size;
2123 KUNIT_EXPECT_EQ(test, -EINVAL, ibmveth_remove_buffer_from_pool(adapter, correlator, false));
2124 KUNIT_EXPECT_EQ(test, -EINVAL, ibmveth_remove_buffer_from_pool(adapter, correlator, true));
2126 correlator = (u64)0 | 0;
2127 pool->skbuff[0] = NULL;
2128 KUNIT_EXPECT_EQ(test, -EFAULT, ibmveth_remove_buffer_from_pool(adapter, correlator, false));
2129 KUNIT_EXPECT_EQ(test, -EFAULT, ibmveth_remove_buffer_from_pool(adapter, correlator, true));
2131 flush_work(&adapter->work);
2135 * ibmveth_rxq_get_buffer_test - unit test for ibmveth_rxq_get_buffer
2136 * @test: pointer to kunit structure
2138 * Tests ibmveth_rxq_get_buffer. ibmveth_rxq_get_buffer also calls WARN_ON for
2139 * the NULL returns, so dmesg should be checked to see that these warnings
2144 static void ibmveth_rxq_get_buffer_test(struct kunit *test)
2146 struct ibmveth_adapter *adapter = kunit_kzalloc(test, sizeof(*adapter), GFP_KERNEL);
2147 struct sk_buff *skb = kunit_kzalloc(test, sizeof(*skb), GFP_KERNEL);
2148 struct ibmveth_buff_pool *pool;
2150 KUNIT_ASSERT_NOT_ERR_OR_NULL(test, adapter);
2151 KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb);
2153 INIT_WORK(&adapter->work, ibmveth_reset_kunit);
2155 adapter->rx_queue.queue_len = 1;
2156 adapter->rx_queue.index = 0;
2157 adapter->rx_queue.queue_addr = kunit_kzalloc(test, sizeof(struct ibmveth_rx_q_entry),
2159 KUNIT_ASSERT_NOT_ERR_OR_NULL(test, adapter->rx_queue.queue_addr);
2161 /* Set sane values for buffer pools */
2162 for (int i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++)
2163 ibmveth_init_buffer_pool(&adapter->rx_buff_pool[i], i,
2164 pool_count[i], pool_size[i],
2167 pool = &adapter->rx_buff_pool[0];
2168 pool->skbuff = kunit_kcalloc(test, pool->size, sizeof(void *), GFP_KERNEL);
2169 KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pool->skbuff);
2171 adapter->rx_queue.queue_addr[0].correlator = (u64)IBMVETH_NUM_BUFF_POOLS << 32 | 0;
2172 KUNIT_EXPECT_PTR_EQ(test, NULL, ibmveth_rxq_get_buffer(adapter));
2174 adapter->rx_queue.queue_addr[0].correlator = (u64)0 << 32 | adapter->rx_buff_pool[0].size;
2175 KUNIT_EXPECT_PTR_EQ(test, NULL, ibmveth_rxq_get_buffer(adapter));
2177 pool->skbuff[0] = skb;
2178 adapter->rx_queue.queue_addr[0].correlator = (u64)0 << 32 | 0;
2179 KUNIT_EXPECT_PTR_EQ(test, skb, ibmveth_rxq_get_buffer(adapter));
2181 flush_work(&adapter->work);
2184 static struct kunit_case ibmveth_test_cases[] = {
2185 KUNIT_CASE(ibmveth_remove_buffer_from_pool_test),
2186 KUNIT_CASE(ibmveth_rxq_get_buffer_test),
2190 static struct kunit_suite ibmveth_test_suite = {
2191 .name = "ibmveth-kunit-test",
2192 .test_cases = ibmveth_test_cases,
2195 kunit_test_suite(ibmveth_test_suite);