24046fe16634458dae10351e8327d327bca96964
[linux-2.6-block.git] / drivers / net / ethernet / ibm / ibmveth.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * IBM Power Virtual Ethernet Device Driver
4  *
5  * Copyright (C) IBM Corporation, 2003, 2010
6  *
7  * Authors: Dave Larson <larson1@us.ibm.com>
8  *          Santiago Leon <santil@linux.vnet.ibm.com>
9  *          Brian King <brking@linux.vnet.ibm.com>
10  *          Robert Jennings <rcj@linux.vnet.ibm.com>
11  *          Anton Blanchard <anton@au.ibm.com>
12  */
13
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/errno.h>
17 #include <linux/dma-mapping.h>
18 #include <linux/kernel.h>
19 #include <linux/netdevice.h>
20 #include <linux/etherdevice.h>
21 #include <linux/skbuff.h>
22 #include <linux/init.h>
23 #include <linux/interrupt.h>
24 #include <linux/mm.h>
25 #include <linux/pm.h>
26 #include <linux/ethtool.h>
27 #include <linux/in.h>
28 #include <linux/ip.h>
29 #include <linux/ipv6.h>
30 #include <linux/slab.h>
31 #include <asm/hvcall.h>
32 #include <linux/atomic.h>
33 #include <asm/vio.h>
34 #include <asm/iommu.h>
35 #include <asm/firmware.h>
36 #include <net/tcp.h>
37 #include <net/ip6_checksum.h>
38
39 #include "ibmveth.h"
40
41 static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance);
42 static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev);
43
44 static struct kobj_type ktype_veth_pool;
45
46
47 static const char ibmveth_driver_name[] = "ibmveth";
48 static const char ibmveth_driver_string[] = "IBM Power Virtual Ethernet Driver";
49 #define ibmveth_driver_version "1.06"
50
51 MODULE_AUTHOR("Santiago Leon <santil@linux.vnet.ibm.com>");
52 MODULE_DESCRIPTION("IBM Power Virtual Ethernet Driver");
53 MODULE_LICENSE("GPL");
54 MODULE_VERSION(ibmveth_driver_version);
55
56 static unsigned int tx_copybreak __read_mostly = 128;
57 module_param(tx_copybreak, uint, 0644);
58 MODULE_PARM_DESC(tx_copybreak,
59         "Maximum size of packet that is copied to a new buffer on transmit");
60
61 static unsigned int rx_copybreak __read_mostly = 128;
62 module_param(rx_copybreak, uint, 0644);
63 MODULE_PARM_DESC(rx_copybreak,
64         "Maximum size of packet that is copied to a new buffer on receive");
65
66 static unsigned int rx_flush __read_mostly = 0;
67 module_param(rx_flush, uint, 0644);
68 MODULE_PARM_DESC(rx_flush, "Flush receive buffers before use");
69
70 static bool old_large_send __read_mostly;
71 module_param(old_large_send, bool, 0444);
72 MODULE_PARM_DESC(old_large_send,
73         "Use old large send method on firmware that supports the new method");
74
75 struct ibmveth_stat {
76         char name[ETH_GSTRING_LEN];
77         int offset;
78 };
79
80 #define IBMVETH_STAT_OFF(stat) offsetof(struct ibmveth_adapter, stat)
81 #define IBMVETH_GET_STAT(a, off) *((u64 *)(((unsigned long)(a)) + off))
82
83 static struct ibmveth_stat ibmveth_stats[] = {
84         { "replenish_task_cycles", IBMVETH_STAT_OFF(replenish_task_cycles) },
85         { "replenish_no_mem", IBMVETH_STAT_OFF(replenish_no_mem) },
86         { "replenish_add_buff_failure",
87                         IBMVETH_STAT_OFF(replenish_add_buff_failure) },
88         { "replenish_add_buff_success",
89                         IBMVETH_STAT_OFF(replenish_add_buff_success) },
90         { "rx_invalid_buffer", IBMVETH_STAT_OFF(rx_invalid_buffer) },
91         { "rx_no_buffer", IBMVETH_STAT_OFF(rx_no_buffer) },
92         { "tx_map_failed", IBMVETH_STAT_OFF(tx_map_failed) },
93         { "tx_send_failed", IBMVETH_STAT_OFF(tx_send_failed) },
94         { "fw_enabled_ipv4_csum", IBMVETH_STAT_OFF(fw_ipv4_csum_support) },
95         { "fw_enabled_ipv6_csum", IBMVETH_STAT_OFF(fw_ipv6_csum_support) },
96         { "tx_large_packets", IBMVETH_STAT_OFF(tx_large_packets) },
97         { "rx_large_packets", IBMVETH_STAT_OFF(rx_large_packets) },
98         { "fw_enabled_large_send", IBMVETH_STAT_OFF(fw_large_send_support) }
99 };
100
101 /* simple methods of getting data from the current rxq entry */
102 static inline u32 ibmveth_rxq_flags(struct ibmveth_adapter *adapter)
103 {
104         return be32_to_cpu(adapter->rx_queue.queue_addr[adapter->rx_queue.index].flags_off);
105 }
106
107 static inline int ibmveth_rxq_toggle(struct ibmveth_adapter *adapter)
108 {
109         return (ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_TOGGLE) >>
110                         IBMVETH_RXQ_TOGGLE_SHIFT;
111 }
112
113 static inline int ibmveth_rxq_pending_buffer(struct ibmveth_adapter *adapter)
114 {
115         return ibmveth_rxq_toggle(adapter) == adapter->rx_queue.toggle;
116 }
117
118 static inline int ibmveth_rxq_buffer_valid(struct ibmveth_adapter *adapter)
119 {
120         return ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_VALID;
121 }
122
123 static inline int ibmveth_rxq_frame_offset(struct ibmveth_adapter *adapter)
124 {
125         return ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_OFF_MASK;
126 }
127
128 static inline int ibmveth_rxq_large_packet(struct ibmveth_adapter *adapter)
129 {
130         return ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_LRG_PKT;
131 }
132
133 static inline int ibmveth_rxq_frame_length(struct ibmveth_adapter *adapter)
134 {
135         return be32_to_cpu(adapter->rx_queue.queue_addr[adapter->rx_queue.index].length);
136 }
137
138 static inline int ibmveth_rxq_csum_good(struct ibmveth_adapter *adapter)
139 {
140         return ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_CSUM_GOOD;
141 }
142
143 static unsigned int ibmveth_real_max_tx_queues(void)
144 {
145         unsigned int n_cpu = num_online_cpus();
146
147         return min(n_cpu, IBMVETH_MAX_QUEUES);
148 }
149
150 /* setup the initial settings for a buffer pool */
151 static void ibmveth_init_buffer_pool(struct ibmveth_buff_pool *pool,
152                                      u32 pool_index, u32 pool_size,
153                                      u32 buff_size, u32 pool_active)
154 {
155         pool->size = pool_size;
156         pool->index = pool_index;
157         pool->buff_size = buff_size;
158         pool->threshold = pool_size * 7 / 8;
159         pool->active = pool_active;
160 }
161
162 /* allocate and setup an buffer pool - called during open */
163 static int ibmveth_alloc_buffer_pool(struct ibmveth_buff_pool *pool)
164 {
165         int i;
166
167         pool->free_map = kmalloc_array(pool->size, sizeof(u16), GFP_KERNEL);
168
169         if (!pool->free_map)
170                 return -1;
171
172         pool->dma_addr = kcalloc(pool->size, sizeof(dma_addr_t), GFP_KERNEL);
173         if (!pool->dma_addr) {
174                 kfree(pool->free_map);
175                 pool->free_map = NULL;
176                 return -1;
177         }
178
179         pool->skbuff = kcalloc(pool->size, sizeof(void *), GFP_KERNEL);
180
181         if (!pool->skbuff) {
182                 kfree(pool->dma_addr);
183                 pool->dma_addr = NULL;
184
185                 kfree(pool->free_map);
186                 pool->free_map = NULL;
187                 return -1;
188         }
189
190         for (i = 0; i < pool->size; ++i)
191                 pool->free_map[i] = i;
192
193         atomic_set(&pool->available, 0);
194         pool->producer_index = 0;
195         pool->consumer_index = 0;
196
197         return 0;
198 }
199
200 static inline void ibmveth_flush_buffer(void *addr, unsigned long length)
201 {
202         unsigned long offset;
203
204         for (offset = 0; offset < length; offset += SMP_CACHE_BYTES)
205                 asm("dcbf %0,%1,1" :: "b" (addr), "r" (offset));
206 }
207
208 /* replenish the buffers for a pool.  note that we don't need to
209  * skb_reserve these since they are used for incoming...
210  */
211 static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter,
212                                           struct ibmveth_buff_pool *pool)
213 {
214         u32 i;
215         u32 count = pool->size - atomic_read(&pool->available);
216         u32 buffers_added = 0;
217         struct sk_buff *skb;
218         unsigned int free_index, index;
219         u64 correlator;
220         unsigned long lpar_rc;
221         dma_addr_t dma_addr;
222
223         mb();
224
225         for (i = 0; i < count; ++i) {
226                 union ibmveth_buf_desc desc;
227
228                 free_index = pool->consumer_index;
229                 index = pool->free_map[free_index];
230                 skb = NULL;
231
232                 if (WARN_ON(index == IBM_VETH_INVALID_MAP)) {
233                         schedule_work(&adapter->work);
234                         goto bad_index_failure;
235                 }
236
237                 /* are we allocating a new buffer or recycling an old one */
238                 if (pool->skbuff[index])
239                         goto reuse;
240
241                 skb = netdev_alloc_skb(adapter->netdev, pool->buff_size);
242
243                 if (!skb) {
244                         netdev_dbg(adapter->netdev,
245                                    "replenish: unable to allocate skb\n");
246                         adapter->replenish_no_mem++;
247                         break;
248                 }
249
250                 dma_addr = dma_map_single(&adapter->vdev->dev, skb->data,
251                                 pool->buff_size, DMA_FROM_DEVICE);
252
253                 if (dma_mapping_error(&adapter->vdev->dev, dma_addr))
254                         goto failure;
255
256                 pool->dma_addr[index] = dma_addr;
257                 pool->skbuff[index] = skb;
258
259                 if (rx_flush) {
260                         unsigned int len = min(pool->buff_size,
261                                                adapter->netdev->mtu +
262                                                IBMVETH_BUFF_OH);
263                         ibmveth_flush_buffer(skb->data, len);
264                 }
265 reuse:
266                 dma_addr = pool->dma_addr[index];
267                 desc.fields.flags_len = IBMVETH_BUF_VALID | pool->buff_size;
268                 desc.fields.address = dma_addr;
269
270                 correlator = ((u64)pool->index << 32) | index;
271                 *(u64 *)pool->skbuff[index]->data = correlator;
272
273                 lpar_rc = h_add_logical_lan_buffer(adapter->vdev->unit_address,
274                                                    desc.desc);
275
276                 if (lpar_rc != H_SUCCESS) {
277                         netdev_warn(adapter->netdev,
278                                     "%sadd_logical_lan failed %lu\n",
279                                     skb ? "" : "When recycling: ", lpar_rc);
280                         goto failure;
281                 }
282
283                 pool->free_map[free_index] = IBM_VETH_INVALID_MAP;
284                 pool->consumer_index++;
285                 if (pool->consumer_index >= pool->size)
286                         pool->consumer_index = 0;
287
288                 buffers_added++;
289                 adapter->replenish_add_buff_success++;
290         }
291
292         mb();
293         atomic_add(buffers_added, &(pool->available));
294         return;
295
296 failure:
297
298         if (dma_addr && !dma_mapping_error(&adapter->vdev->dev, dma_addr))
299                 dma_unmap_single(&adapter->vdev->dev,
300                                  pool->dma_addr[index], pool->buff_size,
301                                  DMA_FROM_DEVICE);
302         dev_kfree_skb_any(pool->skbuff[index]);
303         pool->skbuff[index] = NULL;
304 bad_index_failure:
305         adapter->replenish_add_buff_failure++;
306
307         mb();
308         atomic_add(buffers_added, &(pool->available));
309 }
310
311 /*
312  * The final 8 bytes of the buffer list is a counter of frames dropped
313  * because there was not a buffer in the buffer list capable of holding
314  * the frame.
315  */
316 static void ibmveth_update_rx_no_buffer(struct ibmveth_adapter *adapter)
317 {
318         __be64 *p = adapter->buffer_list_addr + 4096 - 8;
319
320         adapter->rx_no_buffer = be64_to_cpup(p);
321 }
322
323 /* replenish routine */
324 static void ibmveth_replenish_task(struct ibmveth_adapter *adapter)
325 {
326         int i;
327
328         adapter->replenish_task_cycles++;
329
330         for (i = (IBMVETH_NUM_BUFF_POOLS - 1); i >= 0; i--) {
331                 struct ibmveth_buff_pool *pool = &adapter->rx_buff_pool[i];
332
333                 if (pool->active &&
334                     (atomic_read(&pool->available) < pool->threshold))
335                         ibmveth_replenish_buffer_pool(adapter, pool);
336         }
337
338         ibmveth_update_rx_no_buffer(adapter);
339 }
340
341 /* empty and free ana buffer pool - also used to do cleanup in error paths */
342 static void ibmveth_free_buffer_pool(struct ibmveth_adapter *adapter,
343                                      struct ibmveth_buff_pool *pool)
344 {
345         int i;
346
347         kfree(pool->free_map);
348         pool->free_map = NULL;
349
350         if (pool->skbuff && pool->dma_addr) {
351                 for (i = 0; i < pool->size; ++i) {
352                         struct sk_buff *skb = pool->skbuff[i];
353                         if (skb) {
354                                 dma_unmap_single(&adapter->vdev->dev,
355                                                  pool->dma_addr[i],
356                                                  pool->buff_size,
357                                                  DMA_FROM_DEVICE);
358                                 dev_kfree_skb_any(skb);
359                                 pool->skbuff[i] = NULL;
360                         }
361                 }
362         }
363
364         if (pool->dma_addr) {
365                 kfree(pool->dma_addr);
366                 pool->dma_addr = NULL;
367         }
368
369         if (pool->skbuff) {
370                 kfree(pool->skbuff);
371                 pool->skbuff = NULL;
372         }
373 }
374
375 /**
376  * ibmveth_remove_buffer_from_pool - remove a buffer from a pool
377  * @adapter: adapter instance
378  * @correlator: identifies pool and index
379  * @reuse: whether to reuse buffer
380  *
381  * Return:
382  * * %0       - success
383  * * %-EINVAL - correlator maps to pool or index out of range
384  * * %-EFAULT - pool and index map to null skb
385  */
386 static int ibmveth_remove_buffer_from_pool(struct ibmveth_adapter *adapter,
387                                            u64 correlator, bool reuse)
388 {
389         unsigned int pool  = correlator >> 32;
390         unsigned int index = correlator & 0xffffffffUL;
391         unsigned int free_index;
392         struct sk_buff *skb;
393
394         if (WARN_ON(pool >= IBMVETH_NUM_BUFF_POOLS) ||
395             WARN_ON(index >= adapter->rx_buff_pool[pool].size)) {
396                 schedule_work(&adapter->work);
397                 return -EINVAL;
398         }
399
400         skb = adapter->rx_buff_pool[pool].skbuff[index];
401         if (WARN_ON(!skb)) {
402                 schedule_work(&adapter->work);
403                 return -EFAULT;
404         }
405
406         /* if we are going to reuse the buffer then keep the pointers around
407          * but mark index as available. replenish will see the skb pointer and
408          * assume it is to be recycled.
409          */
410         if (!reuse) {
411                 /* remove the skb pointer to mark free. actual freeing is done
412                  * by upper level networking after gro_recieve
413                  */
414                 adapter->rx_buff_pool[pool].skbuff[index] = NULL;
415
416                 dma_unmap_single(&adapter->vdev->dev,
417                                  adapter->rx_buff_pool[pool].dma_addr[index],
418                                  adapter->rx_buff_pool[pool].buff_size,
419                                  DMA_FROM_DEVICE);
420         }
421
422         free_index = adapter->rx_buff_pool[pool].producer_index;
423         adapter->rx_buff_pool[pool].producer_index++;
424         if (adapter->rx_buff_pool[pool].producer_index >=
425             adapter->rx_buff_pool[pool].size)
426                 adapter->rx_buff_pool[pool].producer_index = 0;
427         adapter->rx_buff_pool[pool].free_map[free_index] = index;
428
429         mb();
430
431         atomic_dec(&(adapter->rx_buff_pool[pool].available));
432
433         return 0;
434 }
435
436 /* get the current buffer on the rx queue */
437 static inline struct sk_buff *ibmveth_rxq_get_buffer(struct ibmveth_adapter *adapter)
438 {
439         u64 correlator = adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator;
440         unsigned int pool = correlator >> 32;
441         unsigned int index = correlator & 0xffffffffUL;
442
443         if (WARN_ON(pool >= IBMVETH_NUM_BUFF_POOLS) ||
444             WARN_ON(index >= adapter->rx_buff_pool[pool].size)) {
445                 schedule_work(&adapter->work);
446                 return NULL;
447         }
448
449         return adapter->rx_buff_pool[pool].skbuff[index];
450 }
451
452 /**
453  * ibmveth_rxq_harvest_buffer - Harvest buffer from pool
454  *
455  * @adapter: pointer to adapter
456  * @reuse:   whether to reuse buffer
457  *
458  * Context: called from ibmveth_poll
459  *
460  * Return:
461  * * %0    - success
462  * * other - non-zero return from ibmveth_remove_buffer_from_pool
463  */
464 static int ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter,
465                                       bool reuse)
466 {
467         u64 cor;
468         int rc;
469
470         cor = adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator;
471         rc = ibmveth_remove_buffer_from_pool(adapter, cor, reuse);
472         if (unlikely(rc))
473                 return rc;
474
475         if (++adapter->rx_queue.index == adapter->rx_queue.num_slots) {
476                 adapter->rx_queue.index = 0;
477                 adapter->rx_queue.toggle = !adapter->rx_queue.toggle;
478         }
479
480         return 0;
481 }
482
483 static void ibmveth_free_tx_ltb(struct ibmveth_adapter *adapter, int idx)
484 {
485         dma_unmap_single(&adapter->vdev->dev, adapter->tx_ltb_dma[idx],
486                          adapter->tx_ltb_size, DMA_TO_DEVICE);
487         kfree(adapter->tx_ltb_ptr[idx]);
488         adapter->tx_ltb_ptr[idx] = NULL;
489 }
490
491 static int ibmveth_allocate_tx_ltb(struct ibmveth_adapter *adapter, int idx)
492 {
493         adapter->tx_ltb_ptr[idx] = kzalloc(adapter->tx_ltb_size,
494                                            GFP_KERNEL);
495         if (!adapter->tx_ltb_ptr[idx]) {
496                 netdev_err(adapter->netdev,
497                            "unable to allocate tx long term buffer\n");
498                 return -ENOMEM;
499         }
500         adapter->tx_ltb_dma[idx] = dma_map_single(&adapter->vdev->dev,
501                                                   adapter->tx_ltb_ptr[idx],
502                                                   adapter->tx_ltb_size,
503                                                   DMA_TO_DEVICE);
504         if (dma_mapping_error(&adapter->vdev->dev, adapter->tx_ltb_dma[idx])) {
505                 netdev_err(adapter->netdev,
506                            "unable to DMA map tx long term buffer\n");
507                 kfree(adapter->tx_ltb_ptr[idx]);
508                 adapter->tx_ltb_ptr[idx] = NULL;
509                 return -ENOMEM;
510         }
511
512         return 0;
513 }
514
515 static int ibmveth_register_logical_lan(struct ibmveth_adapter *adapter,
516         union ibmveth_buf_desc rxq_desc, u64 mac_address)
517 {
518         int rc, try_again = 1;
519
520         /*
521          * After a kexec the adapter will still be open, so our attempt to
522          * open it will fail. So if we get a failure we free the adapter and
523          * try again, but only once.
524          */
525 retry:
526         rc = h_register_logical_lan(adapter->vdev->unit_address,
527                                     adapter->buffer_list_dma, rxq_desc.desc,
528                                     adapter->filter_list_dma, mac_address);
529
530         if (rc != H_SUCCESS && try_again) {
531                 do {
532                         rc = h_free_logical_lan(adapter->vdev->unit_address);
533                 } while (H_IS_LONG_BUSY(rc) || (rc == H_BUSY));
534
535                 try_again = 0;
536                 goto retry;
537         }
538
539         return rc;
540 }
541
542 static int ibmveth_open(struct net_device *netdev)
543 {
544         struct ibmveth_adapter *adapter = netdev_priv(netdev);
545         u64 mac_address;
546         int rxq_entries = 1;
547         unsigned long lpar_rc;
548         int rc;
549         union ibmveth_buf_desc rxq_desc;
550         int i;
551         struct device *dev;
552
553         netdev_dbg(netdev, "open starting\n");
554
555         napi_enable(&adapter->napi);
556
557         for(i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++)
558                 rxq_entries += adapter->rx_buff_pool[i].size;
559
560         rc = -ENOMEM;
561         adapter->buffer_list_addr = (void*) get_zeroed_page(GFP_KERNEL);
562         if (!adapter->buffer_list_addr) {
563                 netdev_err(netdev, "unable to allocate list pages\n");
564                 goto out;
565         }
566
567         adapter->filter_list_addr = (void*) get_zeroed_page(GFP_KERNEL);
568         if (!adapter->filter_list_addr) {
569                 netdev_err(netdev, "unable to allocate filter pages\n");
570                 goto out_free_buffer_list;
571         }
572
573         dev = &adapter->vdev->dev;
574
575         adapter->rx_queue.queue_len = sizeof(struct ibmveth_rx_q_entry) *
576                                                 rxq_entries;
577         adapter->rx_queue.queue_addr =
578                 dma_alloc_coherent(dev, adapter->rx_queue.queue_len,
579                                    &adapter->rx_queue.queue_dma, GFP_KERNEL);
580         if (!adapter->rx_queue.queue_addr)
581                 goto out_free_filter_list;
582
583         adapter->buffer_list_dma = dma_map_single(dev,
584                         adapter->buffer_list_addr, 4096, DMA_BIDIRECTIONAL);
585         if (dma_mapping_error(dev, adapter->buffer_list_dma)) {
586                 netdev_err(netdev, "unable to map buffer list pages\n");
587                 goto out_free_queue_mem;
588         }
589
590         adapter->filter_list_dma = dma_map_single(dev,
591                         adapter->filter_list_addr, 4096, DMA_BIDIRECTIONAL);
592         if (dma_mapping_error(dev, adapter->filter_list_dma)) {
593                 netdev_err(netdev, "unable to map filter list pages\n");
594                 goto out_unmap_buffer_list;
595         }
596
597         for (i = 0; i < netdev->real_num_tx_queues; i++) {
598                 if (ibmveth_allocate_tx_ltb(adapter, i))
599                         goto out_free_tx_ltb;
600         }
601
602         adapter->rx_queue.index = 0;
603         adapter->rx_queue.num_slots = rxq_entries;
604         adapter->rx_queue.toggle = 1;
605
606         mac_address = ether_addr_to_u64(netdev->dev_addr);
607
608         rxq_desc.fields.flags_len = IBMVETH_BUF_VALID |
609                                         adapter->rx_queue.queue_len;
610         rxq_desc.fields.address = adapter->rx_queue.queue_dma;
611
612         netdev_dbg(netdev, "buffer list @ 0x%p\n", adapter->buffer_list_addr);
613         netdev_dbg(netdev, "filter list @ 0x%p\n", adapter->filter_list_addr);
614         netdev_dbg(netdev, "receive q   @ 0x%p\n", adapter->rx_queue.queue_addr);
615
616         h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_DISABLE);
617
618         lpar_rc = ibmveth_register_logical_lan(adapter, rxq_desc, mac_address);
619
620         if (lpar_rc != H_SUCCESS) {
621                 netdev_err(netdev, "h_register_logical_lan failed with %ld\n",
622                            lpar_rc);
623                 netdev_err(netdev, "buffer TCE:0x%llx filter TCE:0x%llx rxq "
624                            "desc:0x%llx MAC:0x%llx\n",
625                                      adapter->buffer_list_dma,
626                                      adapter->filter_list_dma,
627                                      rxq_desc.desc,
628                                      mac_address);
629                 rc = -ENONET;
630                 goto out_unmap_filter_list;
631         }
632
633         for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) {
634                 if (!adapter->rx_buff_pool[i].active)
635                         continue;
636                 if (ibmveth_alloc_buffer_pool(&adapter->rx_buff_pool[i])) {
637                         netdev_err(netdev, "unable to alloc pool\n");
638                         adapter->rx_buff_pool[i].active = 0;
639                         rc = -ENOMEM;
640                         goto out_free_buffer_pools;
641                 }
642         }
643
644         netdev_dbg(netdev, "registering irq 0x%x\n", netdev->irq);
645         rc = request_irq(netdev->irq, ibmveth_interrupt, 0, netdev->name,
646                          netdev);
647         if (rc != 0) {
648                 netdev_err(netdev, "unable to request irq 0x%x, rc %d\n",
649                            netdev->irq, rc);
650                 do {
651                         lpar_rc = h_free_logical_lan(adapter->vdev->unit_address);
652                 } while (H_IS_LONG_BUSY(lpar_rc) || (lpar_rc == H_BUSY));
653
654                 goto out_free_buffer_pools;
655         }
656
657         rc = -ENOMEM;
658
659         netdev_dbg(netdev, "initial replenish cycle\n");
660         ibmveth_interrupt(netdev->irq, netdev);
661
662         netif_tx_start_all_queues(netdev);
663
664         netdev_dbg(netdev, "open complete\n");
665
666         return 0;
667
668 out_free_buffer_pools:
669         while (--i >= 0) {
670                 if (adapter->rx_buff_pool[i].active)
671                         ibmveth_free_buffer_pool(adapter,
672                                                  &adapter->rx_buff_pool[i]);
673         }
674 out_unmap_filter_list:
675         dma_unmap_single(dev, adapter->filter_list_dma, 4096,
676                          DMA_BIDIRECTIONAL);
677
678 out_free_tx_ltb:
679         while (--i >= 0) {
680                 ibmveth_free_tx_ltb(adapter, i);
681         }
682
683 out_unmap_buffer_list:
684         dma_unmap_single(dev, adapter->buffer_list_dma, 4096,
685                          DMA_BIDIRECTIONAL);
686 out_free_queue_mem:
687         dma_free_coherent(dev, adapter->rx_queue.queue_len,
688                           adapter->rx_queue.queue_addr,
689                           adapter->rx_queue.queue_dma);
690 out_free_filter_list:
691         free_page((unsigned long)adapter->filter_list_addr);
692 out_free_buffer_list:
693         free_page((unsigned long)adapter->buffer_list_addr);
694 out:
695         napi_disable(&adapter->napi);
696         return rc;
697 }
698
699 static int ibmveth_close(struct net_device *netdev)
700 {
701         struct ibmveth_adapter *adapter = netdev_priv(netdev);
702         struct device *dev = &adapter->vdev->dev;
703         long lpar_rc;
704         int i;
705
706         netdev_dbg(netdev, "close starting\n");
707
708         napi_disable(&adapter->napi);
709
710         netif_tx_stop_all_queues(netdev);
711
712         h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_DISABLE);
713
714         do {
715                 lpar_rc = h_free_logical_lan(adapter->vdev->unit_address);
716         } while (H_IS_LONG_BUSY(lpar_rc) || (lpar_rc == H_BUSY));
717
718         if (lpar_rc != H_SUCCESS) {
719                 netdev_err(netdev, "h_free_logical_lan failed with %lx, "
720                            "continuing with close\n", lpar_rc);
721         }
722
723         free_irq(netdev->irq, netdev);
724
725         ibmveth_update_rx_no_buffer(adapter);
726
727         dma_unmap_single(dev, adapter->buffer_list_dma, 4096,
728                          DMA_BIDIRECTIONAL);
729         free_page((unsigned long)adapter->buffer_list_addr);
730
731         dma_unmap_single(dev, adapter->filter_list_dma, 4096,
732                          DMA_BIDIRECTIONAL);
733         free_page((unsigned long)adapter->filter_list_addr);
734
735         dma_free_coherent(dev, adapter->rx_queue.queue_len,
736                           adapter->rx_queue.queue_addr,
737                           adapter->rx_queue.queue_dma);
738
739         for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++)
740                 if (adapter->rx_buff_pool[i].active)
741                         ibmveth_free_buffer_pool(adapter,
742                                                  &adapter->rx_buff_pool[i]);
743
744         for (i = 0; i < netdev->real_num_tx_queues; i++)
745                 ibmveth_free_tx_ltb(adapter, i);
746
747         netdev_dbg(netdev, "close complete\n");
748
749         return 0;
750 }
751
752 /**
753  * ibmveth_reset - Handle scheduled reset work
754  *
755  * @w: pointer to work_struct embedded in adapter structure
756  *
757  * Context: This routine acquires rtnl_mutex and disables its NAPI through
758  *          ibmveth_close. It can't be called directly in a context that has
759  *          already acquired rtnl_mutex or disabled its NAPI, or directly from
760  *          a poll routine.
761  *
762  * Return: void
763  */
764 static void ibmveth_reset(struct work_struct *w)
765 {
766         struct ibmveth_adapter *adapter = container_of(w, struct ibmveth_adapter, work);
767         struct net_device *netdev = adapter->netdev;
768
769         netdev_dbg(netdev, "reset starting\n");
770
771         rtnl_lock();
772
773         dev_close(adapter->netdev);
774         dev_open(adapter->netdev, NULL);
775
776         rtnl_unlock();
777
778         netdev_dbg(netdev, "reset complete\n");
779 }
780
781 static int ibmveth_set_link_ksettings(struct net_device *dev,
782                                       const struct ethtool_link_ksettings *cmd)
783 {
784         struct ibmveth_adapter *adapter = netdev_priv(dev);
785
786         return ethtool_virtdev_set_link_ksettings(dev, cmd,
787                                                   &adapter->speed,
788                                                   &adapter->duplex);
789 }
790
791 static int ibmveth_get_link_ksettings(struct net_device *dev,
792                                       struct ethtool_link_ksettings *cmd)
793 {
794         struct ibmveth_adapter *adapter = netdev_priv(dev);
795
796         cmd->base.speed = adapter->speed;
797         cmd->base.duplex = adapter->duplex;
798         cmd->base.port = PORT_OTHER;
799
800         return 0;
801 }
802
803 static void ibmveth_init_link_settings(struct net_device *dev)
804 {
805         struct ibmveth_adapter *adapter = netdev_priv(dev);
806
807         adapter->speed = SPEED_1000;
808         adapter->duplex = DUPLEX_FULL;
809 }
810
811 static void netdev_get_drvinfo(struct net_device *dev,
812                                struct ethtool_drvinfo *info)
813 {
814         strscpy(info->driver, ibmveth_driver_name, sizeof(info->driver));
815         strscpy(info->version, ibmveth_driver_version, sizeof(info->version));
816 }
817
818 static netdev_features_t ibmveth_fix_features(struct net_device *dev,
819         netdev_features_t features)
820 {
821         /*
822          * Since the ibmveth firmware interface does not have the
823          * concept of separate tx/rx checksum offload enable, if rx
824          * checksum is disabled we also have to disable tx checksum
825          * offload. Once we disable rx checksum offload, we are no
826          * longer allowed to send tx buffers that are not properly
827          * checksummed.
828          */
829
830         if (!(features & NETIF_F_RXCSUM))
831                 features &= ~NETIF_F_CSUM_MASK;
832
833         return features;
834 }
835
836 static int ibmveth_set_csum_offload(struct net_device *dev, u32 data)
837 {
838         struct ibmveth_adapter *adapter = netdev_priv(dev);
839         unsigned long set_attr, clr_attr, ret_attr;
840         unsigned long set_attr6, clr_attr6;
841         long ret, ret4, ret6;
842         int rc1 = 0, rc2 = 0;
843         int restart = 0;
844
845         if (netif_running(dev)) {
846                 restart = 1;
847                 ibmveth_close(dev);
848         }
849
850         set_attr = 0;
851         clr_attr = 0;
852         set_attr6 = 0;
853         clr_attr6 = 0;
854
855         if (data) {
856                 set_attr = IBMVETH_ILLAN_IPV4_TCP_CSUM;
857                 set_attr6 = IBMVETH_ILLAN_IPV6_TCP_CSUM;
858         } else {
859                 clr_attr = IBMVETH_ILLAN_IPV4_TCP_CSUM;
860                 clr_attr6 = IBMVETH_ILLAN_IPV6_TCP_CSUM;
861         }
862
863         ret = h_illan_attributes(adapter->vdev->unit_address, 0, 0, &ret_attr);
864
865         if (ret == H_SUCCESS &&
866             (ret_attr & IBMVETH_ILLAN_PADDED_PKT_CSUM)) {
867                 ret4 = h_illan_attributes(adapter->vdev->unit_address, clr_attr,
868                                          set_attr, &ret_attr);
869
870                 if (ret4 != H_SUCCESS) {
871                         netdev_err(dev, "unable to change IPv4 checksum "
872                                         "offload settings. %d rc=%ld\n",
873                                         data, ret4);
874
875                         h_illan_attributes(adapter->vdev->unit_address,
876                                            set_attr, clr_attr, &ret_attr);
877
878                         if (data == 1)
879                                 dev->features &= ~NETIF_F_IP_CSUM;
880
881                 } else {
882                         adapter->fw_ipv4_csum_support = data;
883                 }
884
885                 ret6 = h_illan_attributes(adapter->vdev->unit_address,
886                                          clr_attr6, set_attr6, &ret_attr);
887
888                 if (ret6 != H_SUCCESS) {
889                         netdev_err(dev, "unable to change IPv6 checksum "
890                                         "offload settings. %d rc=%ld\n",
891                                         data, ret6);
892
893                         h_illan_attributes(adapter->vdev->unit_address,
894                                            set_attr6, clr_attr6, &ret_attr);
895
896                         if (data == 1)
897                                 dev->features &= ~NETIF_F_IPV6_CSUM;
898
899                 } else
900                         adapter->fw_ipv6_csum_support = data;
901
902                 if (ret4 == H_SUCCESS || ret6 == H_SUCCESS)
903                         adapter->rx_csum = data;
904                 else
905                         rc1 = -EIO;
906         } else {
907                 rc1 = -EIO;
908                 netdev_err(dev, "unable to change checksum offload settings."
909                                      " %d rc=%ld ret_attr=%lx\n", data, ret,
910                                      ret_attr);
911         }
912
913         if (restart)
914                 rc2 = ibmveth_open(dev);
915
916         return rc1 ? rc1 : rc2;
917 }
918
919 static int ibmveth_set_tso(struct net_device *dev, u32 data)
920 {
921         struct ibmveth_adapter *adapter = netdev_priv(dev);
922         unsigned long set_attr, clr_attr, ret_attr;
923         long ret1, ret2;
924         int rc1 = 0, rc2 = 0;
925         int restart = 0;
926
927         if (netif_running(dev)) {
928                 restart = 1;
929                 ibmveth_close(dev);
930         }
931
932         set_attr = 0;
933         clr_attr = 0;
934
935         if (data)
936                 set_attr = IBMVETH_ILLAN_LRG_SR_ENABLED;
937         else
938                 clr_attr = IBMVETH_ILLAN_LRG_SR_ENABLED;
939
940         ret1 = h_illan_attributes(adapter->vdev->unit_address, 0, 0, &ret_attr);
941
942         if (ret1 == H_SUCCESS && (ret_attr & IBMVETH_ILLAN_LRG_SND_SUPPORT) &&
943             !old_large_send) {
944                 ret2 = h_illan_attributes(adapter->vdev->unit_address, clr_attr,
945                                           set_attr, &ret_attr);
946
947                 if (ret2 != H_SUCCESS) {
948                         netdev_err(dev, "unable to change tso settings. %d rc=%ld\n",
949                                    data, ret2);
950
951                         h_illan_attributes(adapter->vdev->unit_address,
952                                            set_attr, clr_attr, &ret_attr);
953
954                         if (data == 1)
955                                 dev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6);
956                         rc1 = -EIO;
957
958                 } else {
959                         adapter->fw_large_send_support = data;
960                         adapter->large_send = data;
961                 }
962         } else {
963                 /* Older firmware version of large send offload does not
964                  * support tcp6/ipv6
965                  */
966                 if (data == 1) {
967                         dev->features &= ~NETIF_F_TSO6;
968                         netdev_info(dev, "TSO feature requires all partitions to have updated driver");
969                 }
970                 adapter->large_send = data;
971         }
972
973         if (restart)
974                 rc2 = ibmveth_open(dev);
975
976         return rc1 ? rc1 : rc2;
977 }
978
979 static int ibmveth_set_features(struct net_device *dev,
980         netdev_features_t features)
981 {
982         struct ibmveth_adapter *adapter = netdev_priv(dev);
983         int rx_csum = !!(features & NETIF_F_RXCSUM);
984         int large_send = !!(features & (NETIF_F_TSO | NETIF_F_TSO6));
985         int rc1 = 0, rc2 = 0;
986
987         if (rx_csum != adapter->rx_csum) {
988                 rc1 = ibmveth_set_csum_offload(dev, rx_csum);
989                 if (rc1 && !adapter->rx_csum)
990                         dev->features =
991                                 features & ~(NETIF_F_CSUM_MASK |
992                                              NETIF_F_RXCSUM);
993         }
994
995         if (large_send != adapter->large_send) {
996                 rc2 = ibmveth_set_tso(dev, large_send);
997                 if (rc2 && !adapter->large_send)
998                         dev->features =
999                                 features & ~(NETIF_F_TSO | NETIF_F_TSO6);
1000         }
1001
1002         return rc1 ? rc1 : rc2;
1003 }
1004
1005 static void ibmveth_get_strings(struct net_device *dev, u32 stringset, u8 *data)
1006 {
1007         int i;
1008
1009         if (stringset != ETH_SS_STATS)
1010                 return;
1011
1012         for (i = 0; i < ARRAY_SIZE(ibmveth_stats); i++, data += ETH_GSTRING_LEN)
1013                 memcpy(data, ibmveth_stats[i].name, ETH_GSTRING_LEN);
1014 }
1015
1016 static int ibmveth_get_sset_count(struct net_device *dev, int sset)
1017 {
1018         switch (sset) {
1019         case ETH_SS_STATS:
1020                 return ARRAY_SIZE(ibmveth_stats);
1021         default:
1022                 return -EOPNOTSUPP;
1023         }
1024 }
1025
1026 static void ibmveth_get_ethtool_stats(struct net_device *dev,
1027                                       struct ethtool_stats *stats, u64 *data)
1028 {
1029         int i;
1030         struct ibmveth_adapter *adapter = netdev_priv(dev);
1031
1032         for (i = 0; i < ARRAY_SIZE(ibmveth_stats); i++)
1033                 data[i] = IBMVETH_GET_STAT(adapter, ibmveth_stats[i].offset);
1034 }
1035
1036 static void ibmveth_get_channels(struct net_device *netdev,
1037                                  struct ethtool_channels *channels)
1038 {
1039         channels->max_tx = ibmveth_real_max_tx_queues();
1040         channels->tx_count = netdev->real_num_tx_queues;
1041
1042         channels->max_rx = netdev->real_num_rx_queues;
1043         channels->rx_count = netdev->real_num_rx_queues;
1044 }
1045
1046 static int ibmveth_set_channels(struct net_device *netdev,
1047                                 struct ethtool_channels *channels)
1048 {
1049         struct ibmveth_adapter *adapter = netdev_priv(netdev);
1050         unsigned int old = netdev->real_num_tx_queues,
1051                      goal = channels->tx_count;
1052         int rc, i;
1053
1054         /* If ndo_open has not been called yet then don't allocate, just set
1055          * desired netdev_queue's and return
1056          */
1057         if (!(netdev->flags & IFF_UP))
1058                 return netif_set_real_num_tx_queues(netdev, goal);
1059
1060         /* We have IBMVETH_MAX_QUEUES netdev_queue's allocated
1061          * but we may need to alloc/free the ltb's.
1062          */
1063         netif_tx_stop_all_queues(netdev);
1064
1065         /* Allocate any queue that we need */
1066         for (i = old; i < goal; i++) {
1067                 if (adapter->tx_ltb_ptr[i])
1068                         continue;
1069
1070                 rc = ibmveth_allocate_tx_ltb(adapter, i);
1071                 if (!rc)
1072                         continue;
1073
1074                 /* if something goes wrong, free everything we just allocated */
1075                 netdev_err(netdev, "Failed to allocate more tx queues, returning to %d queues\n",
1076                            old);
1077                 goal = old;
1078                 old = i;
1079                 break;
1080         }
1081         rc = netif_set_real_num_tx_queues(netdev, goal);
1082         if (rc) {
1083                 netdev_err(netdev, "Failed to set real tx queues, returning to %d queues\n",
1084                            old);
1085                 goal = old;
1086                 old = i;
1087         }
1088         /* Free any that are no longer needed */
1089         for (i = old; i > goal; i--) {
1090                 if (adapter->tx_ltb_ptr[i - 1])
1091                         ibmveth_free_tx_ltb(adapter, i - 1);
1092         }
1093
1094         netif_tx_wake_all_queues(netdev);
1095
1096         return rc;
1097 }
1098
1099 static const struct ethtool_ops netdev_ethtool_ops = {
1100         .get_drvinfo                     = netdev_get_drvinfo,
1101         .get_link                        = ethtool_op_get_link,
1102         .get_strings                     = ibmveth_get_strings,
1103         .get_sset_count                  = ibmveth_get_sset_count,
1104         .get_ethtool_stats               = ibmveth_get_ethtool_stats,
1105         .get_link_ksettings              = ibmveth_get_link_ksettings,
1106         .set_link_ksettings              = ibmveth_set_link_ksettings,
1107         .get_channels                    = ibmveth_get_channels,
1108         .set_channels                    = ibmveth_set_channels
1109 };
1110
1111 static int ibmveth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1112 {
1113         return -EOPNOTSUPP;
1114 }
1115
1116 static int ibmveth_send(struct ibmveth_adapter *adapter,
1117                         unsigned long desc, unsigned long mss)
1118 {
1119         unsigned long correlator;
1120         unsigned int retry_count;
1121         unsigned long ret;
1122
1123         /*
1124          * The retry count sets a maximum for the number of broadcast and
1125          * multicast destinations within the system.
1126          */
1127         retry_count = 1024;
1128         correlator = 0;
1129         do {
1130                 ret = h_send_logical_lan(adapter->vdev->unit_address, desc,
1131                                          correlator, &correlator, mss,
1132                                          adapter->fw_large_send_support);
1133         } while ((ret == H_BUSY) && (retry_count--));
1134
1135         if (ret != H_SUCCESS && ret != H_DROPPED) {
1136                 netdev_err(adapter->netdev, "tx: h_send_logical_lan failed "
1137                            "with rc=%ld\n", ret);
1138                 return 1;
1139         }
1140
1141         return 0;
1142 }
1143
1144 static int ibmveth_is_packet_unsupported(struct sk_buff *skb,
1145                                          struct net_device *netdev)
1146 {
1147         struct ethhdr *ether_header;
1148         int ret = 0;
1149
1150         ether_header = eth_hdr(skb);
1151
1152         if (ether_addr_equal(ether_header->h_dest, netdev->dev_addr)) {
1153                 netdev_dbg(netdev, "veth doesn't support loopback packets, dropping packet.\n");
1154                 netdev->stats.tx_dropped++;
1155                 ret = -EOPNOTSUPP;
1156         }
1157
1158         return ret;
1159 }
1160
1161 static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb,
1162                                       struct net_device *netdev)
1163 {
1164         struct ibmveth_adapter *adapter = netdev_priv(netdev);
1165         unsigned int desc_flags, total_bytes;
1166         union ibmveth_buf_desc desc;
1167         int i, queue_num = skb_get_queue_mapping(skb);
1168         unsigned long mss = 0;
1169
1170         if (ibmveth_is_packet_unsupported(skb, netdev))
1171                 goto out;
1172         /* veth can't checksum offload UDP */
1173         if (skb->ip_summed == CHECKSUM_PARTIAL &&
1174             ((skb->protocol == htons(ETH_P_IP) &&
1175               ip_hdr(skb)->protocol != IPPROTO_TCP) ||
1176              (skb->protocol == htons(ETH_P_IPV6) &&
1177               ipv6_hdr(skb)->nexthdr != IPPROTO_TCP)) &&
1178             skb_checksum_help(skb)) {
1179
1180                 netdev_err(netdev, "tx: failed to checksum packet\n");
1181                 netdev->stats.tx_dropped++;
1182                 goto out;
1183         }
1184
1185         desc_flags = IBMVETH_BUF_VALID;
1186
1187         if (skb->ip_summed == CHECKSUM_PARTIAL) {
1188                 unsigned char *buf = skb_transport_header(skb) +
1189                                                 skb->csum_offset;
1190
1191                 desc_flags |= (IBMVETH_BUF_NO_CSUM | IBMVETH_BUF_CSUM_GOOD);
1192
1193                 /* Need to zero out the checksum */
1194                 buf[0] = 0;
1195                 buf[1] = 0;
1196
1197                 if (skb_is_gso(skb) && adapter->fw_large_send_support)
1198                         desc_flags |= IBMVETH_BUF_LRG_SND;
1199         }
1200
1201         if (skb->ip_summed == CHECKSUM_PARTIAL && skb_is_gso(skb)) {
1202                 if (adapter->fw_large_send_support) {
1203                         mss = (unsigned long)skb_shinfo(skb)->gso_size;
1204                         adapter->tx_large_packets++;
1205                 } else if (!skb_is_gso_v6(skb)) {
1206                         /* Put -1 in the IP checksum to tell phyp it
1207                          * is a largesend packet. Put the mss in
1208                          * the TCP checksum.
1209                          */
1210                         ip_hdr(skb)->check = 0xffff;
1211                         tcp_hdr(skb)->check =
1212                                 cpu_to_be16(skb_shinfo(skb)->gso_size);
1213                         adapter->tx_large_packets++;
1214                 }
1215         }
1216
1217         /* Copy header into mapped buffer */
1218         if (unlikely(skb->len > adapter->tx_ltb_size)) {
1219                 netdev_err(adapter->netdev, "tx: packet size (%u) exceeds ltb (%u)\n",
1220                            skb->len, adapter->tx_ltb_size);
1221                 netdev->stats.tx_dropped++;
1222                 goto out;
1223         }
1224         memcpy(adapter->tx_ltb_ptr[queue_num], skb->data, skb_headlen(skb));
1225         total_bytes = skb_headlen(skb);
1226         /* Copy frags into mapped buffers */
1227         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1228                 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1229
1230                 memcpy(adapter->tx_ltb_ptr[queue_num] + total_bytes,
1231                        skb_frag_address_safe(frag), skb_frag_size(frag));
1232                 total_bytes += skb_frag_size(frag);
1233         }
1234
1235         if (unlikely(total_bytes != skb->len)) {
1236                 netdev_err(adapter->netdev, "tx: incorrect packet len copied into ltb (%u != %u)\n",
1237                            skb->len, total_bytes);
1238                 netdev->stats.tx_dropped++;
1239                 goto out;
1240         }
1241         desc.fields.flags_len = desc_flags | skb->len;
1242         desc.fields.address = adapter->tx_ltb_dma[queue_num];
1243         /* finish writing to long_term_buff before VIOS accessing it */
1244         dma_wmb();
1245
1246         if (ibmveth_send(adapter, desc.desc, mss)) {
1247                 adapter->tx_send_failed++;
1248                 netdev->stats.tx_dropped++;
1249         } else {
1250                 netdev->stats.tx_packets++;
1251                 netdev->stats.tx_bytes += skb->len;
1252         }
1253
1254 out:
1255         dev_consume_skb_any(skb);
1256         return NETDEV_TX_OK;
1257
1258
1259 }
1260
1261 static void ibmveth_rx_mss_helper(struct sk_buff *skb, u16 mss, int lrg_pkt)
1262 {
1263         struct tcphdr *tcph;
1264         int offset = 0;
1265         int hdr_len;
1266
1267         /* only TCP packets will be aggregated */
1268         if (skb->protocol == htons(ETH_P_IP)) {
1269                 struct iphdr *iph = (struct iphdr *)skb->data;
1270
1271                 if (iph->protocol == IPPROTO_TCP) {
1272                         offset = iph->ihl * 4;
1273                         skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
1274                 } else {
1275                         return;
1276                 }
1277         } else if (skb->protocol == htons(ETH_P_IPV6)) {
1278                 struct ipv6hdr *iph6 = (struct ipv6hdr *)skb->data;
1279
1280                 if (iph6->nexthdr == IPPROTO_TCP) {
1281                         offset = sizeof(struct ipv6hdr);
1282                         skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
1283                 } else {
1284                         return;
1285                 }
1286         } else {
1287                 return;
1288         }
1289         /* if mss is not set through Large Packet bit/mss in rx buffer,
1290          * expect that the mss will be written to the tcp header checksum.
1291          */
1292         tcph = (struct tcphdr *)(skb->data + offset);
1293         if (lrg_pkt) {
1294                 skb_shinfo(skb)->gso_size = mss;
1295         } else if (offset) {
1296                 skb_shinfo(skb)->gso_size = ntohs(tcph->check);
1297                 tcph->check = 0;
1298         }
1299
1300         if (skb_shinfo(skb)->gso_size) {
1301                 hdr_len = offset + tcph->doff * 4;
1302                 skb_shinfo(skb)->gso_segs =
1303                                 DIV_ROUND_UP(skb->len - hdr_len,
1304                                              skb_shinfo(skb)->gso_size);
1305         }
1306 }
1307
1308 static void ibmveth_rx_csum_helper(struct sk_buff *skb,
1309                                    struct ibmveth_adapter *adapter)
1310 {
1311         struct iphdr *iph = NULL;
1312         struct ipv6hdr *iph6 = NULL;
1313         __be16 skb_proto = 0;
1314         u16 iphlen = 0;
1315         u16 iph_proto = 0;
1316         u16 tcphdrlen = 0;
1317
1318         skb_proto = be16_to_cpu(skb->protocol);
1319
1320         if (skb_proto == ETH_P_IP) {
1321                 iph = (struct iphdr *)skb->data;
1322
1323                 /* If the IP checksum is not offloaded and if the packet
1324                  *  is large send, the checksum must be rebuilt.
1325                  */
1326                 if (iph->check == 0xffff) {
1327                         iph->check = 0;
1328                         iph->check = ip_fast_csum((unsigned char *)iph,
1329                                                   iph->ihl);
1330                 }
1331
1332                 iphlen = iph->ihl * 4;
1333                 iph_proto = iph->protocol;
1334         } else if (skb_proto == ETH_P_IPV6) {
1335                 iph6 = (struct ipv6hdr *)skb->data;
1336                 iphlen = sizeof(struct ipv6hdr);
1337                 iph_proto = iph6->nexthdr;
1338         }
1339
1340         /* When CSO is enabled the TCP checksum may have be set to NULL by
1341          * the sender given that we zeroed out TCP checksum field in
1342          * transmit path (refer ibmveth_start_xmit routine). In this case set
1343          * up CHECKSUM_PARTIAL. If the packet is forwarded, the checksum will
1344          * then be recalculated by the destination NIC (CSO must be enabled
1345          * on the destination NIC).
1346          *
1347          * In an OVS environment, when a flow is not cached, specifically for a
1348          * new TCP connection, the first packet information is passed up to
1349          * the user space for finding a flow. During this process, OVS computes
1350          * checksum on the first packet when CHECKSUM_PARTIAL flag is set.
1351          *
1352          * So, re-compute TCP pseudo header checksum.
1353          */
1354
1355         if (iph_proto == IPPROTO_TCP) {
1356                 struct tcphdr *tcph = (struct tcphdr *)(skb->data + iphlen);
1357
1358                 if (tcph->check == 0x0000) {
1359                         /* Recompute TCP pseudo header checksum  */
1360                         tcphdrlen = skb->len - iphlen;
1361                         if (skb_proto == ETH_P_IP)
1362                                 tcph->check =
1363                                  ~csum_tcpudp_magic(iph->saddr,
1364                                 iph->daddr, tcphdrlen, iph_proto, 0);
1365                         else if (skb_proto == ETH_P_IPV6)
1366                                 tcph->check =
1367                                  ~csum_ipv6_magic(&iph6->saddr,
1368                                 &iph6->daddr, tcphdrlen, iph_proto, 0);
1369                         /* Setup SKB fields for checksum offload */
1370                         skb_partial_csum_set(skb, iphlen,
1371                                              offsetof(struct tcphdr, check));
1372                         skb_reset_network_header(skb);
1373                 }
1374         }
1375 }
1376
1377 static int ibmveth_poll(struct napi_struct *napi, int budget)
1378 {
1379         struct ibmveth_adapter *adapter =
1380                         container_of(napi, struct ibmveth_adapter, napi);
1381         struct net_device *netdev = adapter->netdev;
1382         int frames_processed = 0;
1383         unsigned long lpar_rc;
1384         u16 mss = 0;
1385
1386 restart_poll:
1387         while (frames_processed < budget) {
1388                 if (!ibmveth_rxq_pending_buffer(adapter))
1389                         break;
1390
1391                 smp_rmb();
1392                 if (!ibmveth_rxq_buffer_valid(adapter)) {
1393                         wmb(); /* suggested by larson1 */
1394                         adapter->rx_invalid_buffer++;
1395                         netdev_dbg(netdev, "recycling invalid buffer\n");
1396                         if (unlikely(ibmveth_rxq_harvest_buffer(adapter, true)))
1397                                 break;
1398                 } else {
1399                         struct sk_buff *skb, *new_skb;
1400                         int length = ibmveth_rxq_frame_length(adapter);
1401                         int offset = ibmveth_rxq_frame_offset(adapter);
1402                         int csum_good = ibmveth_rxq_csum_good(adapter);
1403                         int lrg_pkt = ibmveth_rxq_large_packet(adapter);
1404                         __sum16 iph_check = 0;
1405
1406                         skb = ibmveth_rxq_get_buffer(adapter);
1407                         if (unlikely(!skb))
1408                                 break;
1409
1410                         /* if the large packet bit is set in the rx queue
1411                          * descriptor, the mss will be written by PHYP eight
1412                          * bytes from the start of the rx buffer, which is
1413                          * skb->data at this stage
1414                          */
1415                         if (lrg_pkt) {
1416                                 __be64 *rxmss = (__be64 *)(skb->data + 8);
1417
1418                                 mss = (u16)be64_to_cpu(*rxmss);
1419                         }
1420
1421                         new_skb = NULL;
1422                         if (length < rx_copybreak)
1423                                 new_skb = netdev_alloc_skb(netdev, length);
1424
1425                         if (new_skb) {
1426                                 skb_copy_to_linear_data(new_skb,
1427                                                         skb->data + offset,
1428                                                         length);
1429                                 if (rx_flush)
1430                                         ibmveth_flush_buffer(skb->data,
1431                                                 length + offset);
1432                                 if (unlikely(ibmveth_rxq_harvest_buffer(adapter, true)))
1433                                         break;
1434                                 skb = new_skb;
1435                         } else {
1436                                 if (unlikely(ibmveth_rxq_harvest_buffer(adapter, false)))
1437                                         break;
1438                                 skb_reserve(skb, offset);
1439                         }
1440
1441                         skb_put(skb, length);
1442                         skb->protocol = eth_type_trans(skb, netdev);
1443
1444                         /* PHYP without PLSO support places a -1 in the ip
1445                          * checksum for large send frames.
1446                          */
1447                         if (skb->protocol == cpu_to_be16(ETH_P_IP)) {
1448                                 struct iphdr *iph = (struct iphdr *)skb->data;
1449
1450                                 iph_check = iph->check;
1451                         }
1452
1453                         if ((length > netdev->mtu + ETH_HLEN) ||
1454                             lrg_pkt || iph_check == 0xffff) {
1455                                 ibmveth_rx_mss_helper(skb, mss, lrg_pkt);
1456                                 adapter->rx_large_packets++;
1457                         }
1458
1459                         if (csum_good) {
1460                                 skb->ip_summed = CHECKSUM_UNNECESSARY;
1461                                 ibmveth_rx_csum_helper(skb, adapter);
1462                         }
1463
1464                         napi_gro_receive(napi, skb);    /* send it up */
1465
1466                         netdev->stats.rx_packets++;
1467                         netdev->stats.rx_bytes += length;
1468                         frames_processed++;
1469                 }
1470         }
1471
1472         ibmveth_replenish_task(adapter);
1473
1474         if (frames_processed == budget)
1475                 goto out;
1476
1477         if (!napi_complete_done(napi, frames_processed))
1478                 goto out;
1479
1480         /* We think we are done - reenable interrupts,
1481          * then check once more to make sure we are done.
1482          */
1483         lpar_rc = h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_ENABLE);
1484         if (WARN_ON(lpar_rc != H_SUCCESS)) {
1485                 schedule_work(&adapter->work);
1486                 goto out;
1487         }
1488
1489         if (ibmveth_rxq_pending_buffer(adapter) && napi_schedule(napi)) {
1490                 lpar_rc = h_vio_signal(adapter->vdev->unit_address,
1491                                        VIO_IRQ_DISABLE);
1492                 goto restart_poll;
1493         }
1494
1495 out:
1496         return frames_processed;
1497 }
1498
1499 static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance)
1500 {
1501         struct net_device *netdev = dev_instance;
1502         struct ibmveth_adapter *adapter = netdev_priv(netdev);
1503         unsigned long lpar_rc;
1504
1505         if (napi_schedule_prep(&adapter->napi)) {
1506                 lpar_rc = h_vio_signal(adapter->vdev->unit_address,
1507                                        VIO_IRQ_DISABLE);
1508                 WARN_ON(lpar_rc != H_SUCCESS);
1509                 __napi_schedule(&adapter->napi);
1510         }
1511         return IRQ_HANDLED;
1512 }
1513
1514 static void ibmveth_set_multicast_list(struct net_device *netdev)
1515 {
1516         struct ibmveth_adapter *adapter = netdev_priv(netdev);
1517         unsigned long lpar_rc;
1518
1519         if ((netdev->flags & IFF_PROMISC) ||
1520             (netdev_mc_count(netdev) > adapter->mcastFilterSize)) {
1521                 lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address,
1522                                            IbmVethMcastEnableRecv |
1523                                            IbmVethMcastDisableFiltering,
1524                                            0);
1525                 if (lpar_rc != H_SUCCESS) {
1526                         netdev_err(netdev, "h_multicast_ctrl rc=%ld when "
1527                                    "entering promisc mode\n", lpar_rc);
1528                 }
1529         } else {
1530                 struct netdev_hw_addr *ha;
1531                 /* clear the filter table & disable filtering */
1532                 lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address,
1533                                            IbmVethMcastEnableRecv |
1534                                            IbmVethMcastDisableFiltering |
1535                                            IbmVethMcastClearFilterTable,
1536                                            0);
1537                 if (lpar_rc != H_SUCCESS) {
1538                         netdev_err(netdev, "h_multicast_ctrl rc=%ld when "
1539                                    "attempting to clear filter table\n",
1540                                    lpar_rc);
1541                 }
1542                 /* add the addresses to the filter table */
1543                 netdev_for_each_mc_addr(ha, netdev) {
1544                         /* add the multicast address to the filter table */
1545                         u64 mcast_addr;
1546                         mcast_addr = ether_addr_to_u64(ha->addr);
1547                         lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address,
1548                                                    IbmVethMcastAddFilter,
1549                                                    mcast_addr);
1550                         if (lpar_rc != H_SUCCESS) {
1551                                 netdev_err(netdev, "h_multicast_ctrl rc=%ld "
1552                                            "when adding an entry to the filter "
1553                                            "table\n", lpar_rc);
1554                         }
1555                 }
1556
1557                 /* re-enable filtering */
1558                 lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address,
1559                                            IbmVethMcastEnableFiltering,
1560                                            0);
1561                 if (lpar_rc != H_SUCCESS) {
1562                         netdev_err(netdev, "h_multicast_ctrl rc=%ld when "
1563                                    "enabling filtering\n", lpar_rc);
1564                 }
1565         }
1566 }
1567
1568 static int ibmveth_change_mtu(struct net_device *dev, int new_mtu)
1569 {
1570         struct ibmveth_adapter *adapter = netdev_priv(dev);
1571         struct vio_dev *viodev = adapter->vdev;
1572         int new_mtu_oh = new_mtu + IBMVETH_BUFF_OH;
1573         int i, rc;
1574         int need_restart = 0;
1575
1576         for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++)
1577                 if (new_mtu_oh <= adapter->rx_buff_pool[i].buff_size)
1578                         break;
1579
1580         if (i == IBMVETH_NUM_BUFF_POOLS)
1581                 return -EINVAL;
1582
1583         /* Deactivate all the buffer pools so that the next loop can activate
1584            only the buffer pools necessary to hold the new MTU */
1585         if (netif_running(adapter->netdev)) {
1586                 need_restart = 1;
1587                 ibmveth_close(adapter->netdev);
1588         }
1589
1590         /* Look for an active buffer pool that can hold the new MTU */
1591         for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) {
1592                 adapter->rx_buff_pool[i].active = 1;
1593
1594                 if (new_mtu_oh <= adapter->rx_buff_pool[i].buff_size) {
1595                         WRITE_ONCE(dev->mtu, new_mtu);
1596                         vio_cmo_set_dev_desired(viodev,
1597                                                 ibmveth_get_desired_dma
1598                                                 (viodev));
1599                         if (need_restart) {
1600                                 return ibmveth_open(adapter->netdev);
1601                         }
1602                         return 0;
1603                 }
1604         }
1605
1606         if (need_restart && (rc = ibmveth_open(adapter->netdev)))
1607                 return rc;
1608
1609         return -EINVAL;
1610 }
1611
1612 #ifdef CONFIG_NET_POLL_CONTROLLER
1613 static void ibmveth_poll_controller(struct net_device *dev)
1614 {
1615         ibmveth_replenish_task(netdev_priv(dev));
1616         ibmveth_interrupt(dev->irq, dev);
1617 }
1618 #endif
1619
1620 /**
1621  * ibmveth_get_desired_dma - Calculate IO memory desired by the driver
1622  *
1623  * @vdev: struct vio_dev for the device whose desired IO mem is to be returned
1624  *
1625  * Return value:
1626  *      Number of bytes of IO data the driver will need to perform well.
1627  */
1628 static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev)
1629 {
1630         struct net_device *netdev = dev_get_drvdata(&vdev->dev);
1631         struct ibmveth_adapter *adapter;
1632         struct iommu_table *tbl;
1633         unsigned long ret;
1634         int i;
1635         int rxqentries = 1;
1636
1637         tbl = get_iommu_table_base(&vdev->dev);
1638
1639         /* netdev inits at probe time along with the structures we need below*/
1640         if (netdev == NULL)
1641                 return IOMMU_PAGE_ALIGN(IBMVETH_IO_ENTITLEMENT_DEFAULT, tbl);
1642
1643         adapter = netdev_priv(netdev);
1644
1645         ret = IBMVETH_BUFF_LIST_SIZE + IBMVETH_FILT_LIST_SIZE;
1646         ret += IOMMU_PAGE_ALIGN(netdev->mtu, tbl);
1647         /* add size of mapped tx buffers */
1648         ret += IOMMU_PAGE_ALIGN(IBMVETH_MAX_TX_BUF_SIZE, tbl);
1649
1650         for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) {
1651                 /* add the size of the active receive buffers */
1652                 if (adapter->rx_buff_pool[i].active)
1653                         ret +=
1654                             adapter->rx_buff_pool[i].size *
1655                             IOMMU_PAGE_ALIGN(adapter->rx_buff_pool[i].
1656                                              buff_size, tbl);
1657                 rxqentries += adapter->rx_buff_pool[i].size;
1658         }
1659         /* add the size of the receive queue entries */
1660         ret += IOMMU_PAGE_ALIGN(
1661                 rxqentries * sizeof(struct ibmveth_rx_q_entry), tbl);
1662
1663         return ret;
1664 }
1665
1666 static int ibmveth_set_mac_addr(struct net_device *dev, void *p)
1667 {
1668         struct ibmveth_adapter *adapter = netdev_priv(dev);
1669         struct sockaddr *addr = p;
1670         u64 mac_address;
1671         int rc;
1672
1673         if (!is_valid_ether_addr(addr->sa_data))
1674                 return -EADDRNOTAVAIL;
1675
1676         mac_address = ether_addr_to_u64(addr->sa_data);
1677         rc = h_change_logical_lan_mac(adapter->vdev->unit_address, mac_address);
1678         if (rc) {
1679                 netdev_err(adapter->netdev, "h_change_logical_lan_mac failed with rc=%d\n", rc);
1680                 return rc;
1681         }
1682
1683         eth_hw_addr_set(dev, addr->sa_data);
1684
1685         return 0;
1686 }
1687
1688 static const struct net_device_ops ibmveth_netdev_ops = {
1689         .ndo_open               = ibmveth_open,
1690         .ndo_stop               = ibmveth_close,
1691         .ndo_start_xmit         = ibmveth_start_xmit,
1692         .ndo_set_rx_mode        = ibmveth_set_multicast_list,
1693         .ndo_eth_ioctl          = ibmveth_ioctl,
1694         .ndo_change_mtu         = ibmveth_change_mtu,
1695         .ndo_fix_features       = ibmveth_fix_features,
1696         .ndo_set_features       = ibmveth_set_features,
1697         .ndo_validate_addr      = eth_validate_addr,
1698         .ndo_set_mac_address    = ibmveth_set_mac_addr,
1699 #ifdef CONFIG_NET_POLL_CONTROLLER
1700         .ndo_poll_controller    = ibmveth_poll_controller,
1701 #endif
1702 };
1703
1704 static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
1705 {
1706         int rc, i, mac_len;
1707         struct net_device *netdev;
1708         struct ibmveth_adapter *adapter;
1709         unsigned char *mac_addr_p;
1710         __be32 *mcastFilterSize_p;
1711         long ret;
1712         unsigned long ret_attr;
1713
1714         dev_dbg(&dev->dev, "entering ibmveth_probe for UA 0x%x\n",
1715                 dev->unit_address);
1716
1717         mac_addr_p = (unsigned char *)vio_get_attribute(dev, VETH_MAC_ADDR,
1718                                                         &mac_len);
1719         if (!mac_addr_p) {
1720                 dev_err(&dev->dev, "Can't find VETH_MAC_ADDR attribute\n");
1721                 return -EINVAL;
1722         }
1723         /* Workaround for old/broken pHyp */
1724         if (mac_len == 8)
1725                 mac_addr_p += 2;
1726         else if (mac_len != 6) {
1727                 dev_err(&dev->dev, "VETH_MAC_ADDR attribute wrong len %d\n",
1728                         mac_len);
1729                 return -EINVAL;
1730         }
1731
1732         mcastFilterSize_p = (__be32 *)vio_get_attribute(dev,
1733                                                         VETH_MCAST_FILTER_SIZE,
1734                                                         NULL);
1735         if (!mcastFilterSize_p) {
1736                 dev_err(&dev->dev, "Can't find VETH_MCAST_FILTER_SIZE "
1737                         "attribute\n");
1738                 return -EINVAL;
1739         }
1740
1741         netdev = alloc_etherdev_mqs(sizeof(struct ibmveth_adapter), IBMVETH_MAX_QUEUES, 1);
1742         if (!netdev)
1743                 return -ENOMEM;
1744
1745         adapter = netdev_priv(netdev);
1746         dev_set_drvdata(&dev->dev, netdev);
1747
1748         adapter->vdev = dev;
1749         adapter->netdev = netdev;
1750         INIT_WORK(&adapter->work, ibmveth_reset);
1751         adapter->mcastFilterSize = be32_to_cpu(*mcastFilterSize_p);
1752         ibmveth_init_link_settings(netdev);
1753
1754         netif_napi_add_weight(netdev, &adapter->napi, ibmveth_poll, 16);
1755
1756         netdev->irq = dev->irq;
1757         netdev->netdev_ops = &ibmveth_netdev_ops;
1758         netdev->ethtool_ops = &netdev_ethtool_ops;
1759         SET_NETDEV_DEV(netdev, &dev->dev);
1760         netdev->hw_features = NETIF_F_SG;
1761         if (vio_get_attribute(dev, "ibm,illan-options", NULL) != NULL) {
1762                 netdev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
1763                                        NETIF_F_RXCSUM;
1764         }
1765
1766         netdev->features |= netdev->hw_features;
1767
1768         ret = h_illan_attributes(adapter->vdev->unit_address, 0, 0, &ret_attr);
1769
1770         /* If running older firmware, TSO should not be enabled by default */
1771         if (ret == H_SUCCESS && (ret_attr & IBMVETH_ILLAN_LRG_SND_SUPPORT) &&
1772             !old_large_send) {
1773                 netdev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6;
1774                 netdev->features |= netdev->hw_features;
1775         } else {
1776                 netdev->hw_features |= NETIF_F_TSO;
1777         }
1778
1779         adapter->is_active_trunk = false;
1780         if (ret == H_SUCCESS && (ret_attr & IBMVETH_ILLAN_ACTIVE_TRUNK)) {
1781                 adapter->is_active_trunk = true;
1782                 netdev->hw_features |= NETIF_F_FRAGLIST;
1783                 netdev->features |= NETIF_F_FRAGLIST;
1784         }
1785
1786         netdev->min_mtu = IBMVETH_MIN_MTU;
1787         netdev->max_mtu = ETH_MAX_MTU - IBMVETH_BUFF_OH;
1788
1789         eth_hw_addr_set(netdev, mac_addr_p);
1790
1791         if (firmware_has_feature(FW_FEATURE_CMO))
1792                 memcpy(pool_count, pool_count_cmo, sizeof(pool_count));
1793
1794         for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) {
1795                 struct kobject *kobj = &adapter->rx_buff_pool[i].kobj;
1796                 int error;
1797
1798                 ibmveth_init_buffer_pool(&adapter->rx_buff_pool[i], i,
1799                                          pool_count[i], pool_size[i],
1800                                          pool_active[i]);
1801                 error = kobject_init_and_add(kobj, &ktype_veth_pool,
1802                                              &dev->dev.kobj, "pool%d", i);
1803                 if (!error)
1804                         kobject_uevent(kobj, KOBJ_ADD);
1805         }
1806
1807         rc = netif_set_real_num_tx_queues(netdev, min(num_online_cpus(),
1808                                                       IBMVETH_DEFAULT_QUEUES));
1809         if (rc) {
1810                 netdev_dbg(netdev, "failed to set number of tx queues rc=%d\n",
1811                            rc);
1812                 free_netdev(netdev);
1813                 return rc;
1814         }
1815         adapter->tx_ltb_size = PAGE_ALIGN(IBMVETH_MAX_TX_BUF_SIZE);
1816         for (i = 0; i < IBMVETH_MAX_QUEUES; i++)
1817                 adapter->tx_ltb_ptr[i] = NULL;
1818
1819         netdev_dbg(netdev, "adapter @ 0x%p\n", adapter);
1820         netdev_dbg(netdev, "registering netdev...\n");
1821
1822         ibmveth_set_features(netdev, netdev->features);
1823
1824         rc = register_netdev(netdev);
1825
1826         if (rc) {
1827                 netdev_dbg(netdev, "failed to register netdev rc=%d\n", rc);
1828                 free_netdev(netdev);
1829                 return rc;
1830         }
1831
1832         netdev_dbg(netdev, "registered\n");
1833
1834         return 0;
1835 }
1836
1837 static void ibmveth_remove(struct vio_dev *dev)
1838 {
1839         struct net_device *netdev = dev_get_drvdata(&dev->dev);
1840         struct ibmveth_adapter *adapter = netdev_priv(netdev);
1841         int i;
1842
1843         cancel_work_sync(&adapter->work);
1844
1845         for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++)
1846                 kobject_put(&adapter->rx_buff_pool[i].kobj);
1847
1848         unregister_netdev(netdev);
1849
1850         free_netdev(netdev);
1851         dev_set_drvdata(&dev->dev, NULL);
1852 }
1853
1854 static struct attribute veth_active_attr;
1855 static struct attribute veth_num_attr;
1856 static struct attribute veth_size_attr;
1857
1858 static ssize_t veth_pool_show(struct kobject *kobj,
1859                               struct attribute *attr, char *buf)
1860 {
1861         struct ibmveth_buff_pool *pool = container_of(kobj,
1862                                                       struct ibmveth_buff_pool,
1863                                                       kobj);
1864
1865         if (attr == &veth_active_attr)
1866                 return sprintf(buf, "%d\n", pool->active);
1867         else if (attr == &veth_num_attr)
1868                 return sprintf(buf, "%d\n", pool->size);
1869         else if (attr == &veth_size_attr)
1870                 return sprintf(buf, "%d\n", pool->buff_size);
1871         return 0;
1872 }
1873
1874 /**
1875  * veth_pool_store - sysfs store handler for pool attributes
1876  * @kobj: kobject embedded in pool
1877  * @attr: attribute being changed
1878  * @buf: value being stored
1879  * @count: length of @buf in bytes
1880  *
1881  * Stores new value in pool attribute. Verifies the range of the new value for
1882  * size and buff_size. Verifies that at least one pool remains available to
1883  * receive MTU-sized packets.
1884  *
1885  * Context: Process context.
1886  *          Takes and releases rtnl_mutex to ensure correct ordering of close
1887  *          and open calls.
1888  * Return:
1889  * * %-EPERM  - Not allowed to disabled all MTU-sized buffer pools
1890  * * %-EINVAL - New pool size or buffer size is out of range
1891  * * count    - Return count for success
1892  * * other    - Return value from a failed ibmveth_open call
1893  */
1894 static ssize_t veth_pool_store(struct kobject *kobj, struct attribute *attr,
1895                                const char *buf, size_t count)
1896 {
1897         struct ibmveth_buff_pool *pool = container_of(kobj,
1898                                                       struct ibmveth_buff_pool,
1899                                                       kobj);
1900         struct net_device *netdev = dev_get_drvdata(kobj_to_dev(kobj->parent));
1901         struct ibmveth_adapter *adapter = netdev_priv(netdev);
1902         long value = simple_strtol(buf, NULL, 10);
1903         bool change = false;
1904         u32 newbuff_size;
1905         u32 oldbuff_size;
1906         int newactive;
1907         int oldactive;
1908         u32 newsize;
1909         u32 oldsize;
1910         long rc;
1911
1912         rtnl_lock();
1913
1914         oldbuff_size = pool->buff_size;
1915         oldactive = pool->active;
1916         oldsize = pool->size;
1917
1918         newbuff_size = oldbuff_size;
1919         newactive = oldactive;
1920         newsize = oldsize;
1921
1922         if (attr == &veth_active_attr) {
1923                 if (value && !oldactive) {
1924                         newactive = 1;
1925                         change = true;
1926                 } else if (!value && oldactive) {
1927                         int mtu = netdev->mtu + IBMVETH_BUFF_OH;
1928                         int i;
1929                         /* Make sure there is a buffer pool with buffers that
1930                            can hold a packet of the size of the MTU */
1931                         for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) {
1932                                 if (pool == &adapter->rx_buff_pool[i])
1933                                         continue;
1934                                 if (!adapter->rx_buff_pool[i].active)
1935                                         continue;
1936                                 if (mtu <= adapter->rx_buff_pool[i].buff_size)
1937                                         break;
1938                         }
1939
1940                         if (i == IBMVETH_NUM_BUFF_POOLS) {
1941                                 netdev_err(netdev, "no active pool >= MTU\n");
1942                                 rc = -EPERM;
1943                                 goto unlock_err;
1944                         }
1945
1946                         newactive = 0;
1947                         change = true;
1948                 }
1949         } else if (attr == &veth_num_attr) {
1950                 if (value <= 0 || value > IBMVETH_MAX_POOL_COUNT) {
1951                         rc = -EINVAL;
1952                         goto unlock_err;
1953                 }
1954                 if (value != oldsize) {
1955                         newsize = value;
1956                         change = true;
1957                 }
1958         } else if (attr == &veth_size_attr) {
1959                 if (value <= IBMVETH_BUFF_OH || value > IBMVETH_MAX_BUF_SIZE) {
1960                         rc = -EINVAL;
1961                         goto unlock_err;
1962                 }
1963                 if (value != oldbuff_size) {
1964                         newbuff_size = value;
1965                         change = true;
1966                 }
1967         }
1968
1969         if (change) {
1970                 if (netif_running(netdev))
1971                         ibmveth_close(netdev);
1972
1973                 pool->active = newactive;
1974                 pool->buff_size = newbuff_size;
1975                 pool->size = newsize;
1976
1977                 if (netif_running(netdev)) {
1978                         rc = ibmveth_open(netdev);
1979                         if (rc) {
1980                                 pool->active = oldactive;
1981                                 pool->buff_size = oldbuff_size;
1982                                 pool->size = oldsize;
1983                                 goto unlock_err;
1984                         }
1985                 }
1986         }
1987         rtnl_unlock();
1988
1989         /* kick the interrupt handler to allocate/deallocate pools */
1990         ibmveth_interrupt(netdev->irq, netdev);
1991         return count;
1992
1993 unlock_err:
1994         rtnl_unlock();
1995         return rc;
1996 }
1997
1998
1999 #define ATTR(_name, _mode)                              \
2000         struct attribute veth_##_name##_attr = {        \
2001         .name = __stringify(_name), .mode = _mode,      \
2002         };
2003
2004 static ATTR(active, 0644);
2005 static ATTR(num, 0644);
2006 static ATTR(size, 0644);
2007
2008 static struct attribute *veth_pool_attrs[] = {
2009         &veth_active_attr,
2010         &veth_num_attr,
2011         &veth_size_attr,
2012         NULL,
2013 };
2014 ATTRIBUTE_GROUPS(veth_pool);
2015
2016 static const struct sysfs_ops veth_pool_ops = {
2017         .show   = veth_pool_show,
2018         .store  = veth_pool_store,
2019 };
2020
2021 static struct kobj_type ktype_veth_pool = {
2022         .release        = NULL,
2023         .sysfs_ops      = &veth_pool_ops,
2024         .default_groups = veth_pool_groups,
2025 };
2026
2027 static int ibmveth_resume(struct device *dev)
2028 {
2029         struct net_device *netdev = dev_get_drvdata(dev);
2030         ibmveth_interrupt(netdev->irq, netdev);
2031         return 0;
2032 }
2033
2034 static const struct vio_device_id ibmveth_device_table[] = {
2035         { "network", "IBM,l-lan"},
2036         { "", "" }
2037 };
2038 MODULE_DEVICE_TABLE(vio, ibmveth_device_table);
2039
2040 static const struct dev_pm_ops ibmveth_pm_ops = {
2041         .resume = ibmveth_resume
2042 };
2043
2044 static struct vio_driver ibmveth_driver = {
2045         .id_table       = ibmveth_device_table,
2046         .probe          = ibmveth_probe,
2047         .remove         = ibmveth_remove,
2048         .get_desired_dma = ibmveth_get_desired_dma,
2049         .name           = ibmveth_driver_name,
2050         .pm             = &ibmveth_pm_ops,
2051 };
2052
2053 static int __init ibmveth_module_init(void)
2054 {
2055         printk(KERN_DEBUG "%s: %s %s\n", ibmveth_driver_name,
2056                ibmveth_driver_string, ibmveth_driver_version);
2057
2058         return vio_register_driver(&ibmveth_driver);
2059 }
2060
2061 static void __exit ibmveth_module_exit(void)
2062 {
2063         vio_unregister_driver(&ibmveth_driver);
2064 }
2065
2066 module_init(ibmveth_module_init);
2067 module_exit(ibmveth_module_exit);
2068
2069 #ifdef CONFIG_IBMVETH_KUNIT_TEST
2070 #include <kunit/test.h>
2071
2072 /**
2073  * ibmveth_reset_kunit - reset routine for running in KUnit environment
2074  *
2075  * @w: pointer to work_struct embedded in adapter structure
2076  *
2077  * Context: Called in the KUnit environment. Does nothing.
2078  *
2079  * Return: void
2080  */
2081 static void ibmveth_reset_kunit(struct work_struct *w)
2082 {
2083         netdev_dbg(NULL, "reset_kunit starting\n");
2084         netdev_dbg(NULL, "reset_kunit complete\n");
2085 }
2086
2087 /**
2088  * ibmveth_remove_buffer_from_pool_test - unit test for some of
2089  *                                        ibmveth_remove_buffer_from_pool
2090  * @test: pointer to kunit structure
2091  *
2092  * Tests the error returns from ibmveth_remove_buffer_from_pool.
2093  * ibmveth_remove_buffer_from_pool also calls WARN_ON, so dmesg should be
2094  * checked to see that these warnings happened.
2095  *
2096  * Return: void
2097  */
2098 static void ibmveth_remove_buffer_from_pool_test(struct kunit *test)
2099 {
2100         struct ibmveth_adapter *adapter = kunit_kzalloc(test, sizeof(*adapter), GFP_KERNEL);
2101         struct ibmveth_buff_pool *pool;
2102         u64 correlator;
2103
2104         KUNIT_ASSERT_NOT_ERR_OR_NULL(test, adapter);
2105
2106         INIT_WORK(&adapter->work, ibmveth_reset_kunit);
2107
2108         /* Set sane values for buffer pools */
2109         for (int i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++)
2110                 ibmveth_init_buffer_pool(&adapter->rx_buff_pool[i], i,
2111                                          pool_count[i], pool_size[i],
2112                                          pool_active[i]);
2113
2114         pool = &adapter->rx_buff_pool[0];
2115         pool->skbuff = kunit_kcalloc(test, pool->size, sizeof(void *), GFP_KERNEL);
2116         KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pool->skbuff);
2117
2118         correlator = ((u64)IBMVETH_NUM_BUFF_POOLS << 32) | 0;
2119         KUNIT_EXPECT_EQ(test, -EINVAL, ibmveth_remove_buffer_from_pool(adapter, correlator, false));
2120         KUNIT_EXPECT_EQ(test, -EINVAL, ibmveth_remove_buffer_from_pool(adapter, correlator, true));
2121
2122         correlator = ((u64)0 << 32) | adapter->rx_buff_pool[0].size;
2123         KUNIT_EXPECT_EQ(test, -EINVAL, ibmveth_remove_buffer_from_pool(adapter, correlator, false));
2124         KUNIT_EXPECT_EQ(test, -EINVAL, ibmveth_remove_buffer_from_pool(adapter, correlator, true));
2125
2126         correlator = (u64)0 | 0;
2127         pool->skbuff[0] = NULL;
2128         KUNIT_EXPECT_EQ(test, -EFAULT, ibmveth_remove_buffer_from_pool(adapter, correlator, false));
2129         KUNIT_EXPECT_EQ(test, -EFAULT, ibmveth_remove_buffer_from_pool(adapter, correlator, true));
2130
2131         flush_work(&adapter->work);
2132 }
2133
2134 /**
2135  * ibmveth_rxq_get_buffer_test - unit test for ibmveth_rxq_get_buffer
2136  * @test: pointer to kunit structure
2137  *
2138  * Tests ibmveth_rxq_get_buffer. ibmveth_rxq_get_buffer also calls WARN_ON for
2139  * the NULL returns, so dmesg should be checked to see that these warnings
2140  * happened.
2141  *
2142  * Return: void
2143  */
2144 static void ibmveth_rxq_get_buffer_test(struct kunit *test)
2145 {
2146         struct ibmveth_adapter *adapter = kunit_kzalloc(test, sizeof(*adapter), GFP_KERNEL);
2147         struct sk_buff *skb = kunit_kzalloc(test, sizeof(*skb), GFP_KERNEL);
2148         struct ibmveth_buff_pool *pool;
2149
2150         KUNIT_ASSERT_NOT_ERR_OR_NULL(test, adapter);
2151         KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb);
2152
2153         INIT_WORK(&adapter->work, ibmveth_reset_kunit);
2154
2155         adapter->rx_queue.queue_len = 1;
2156         adapter->rx_queue.index = 0;
2157         adapter->rx_queue.queue_addr = kunit_kzalloc(test, sizeof(struct ibmveth_rx_q_entry),
2158                                                      GFP_KERNEL);
2159         KUNIT_ASSERT_NOT_ERR_OR_NULL(test, adapter->rx_queue.queue_addr);
2160
2161         /* Set sane values for buffer pools */
2162         for (int i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++)
2163                 ibmveth_init_buffer_pool(&adapter->rx_buff_pool[i], i,
2164                                          pool_count[i], pool_size[i],
2165                                          pool_active[i]);
2166
2167         pool = &adapter->rx_buff_pool[0];
2168         pool->skbuff = kunit_kcalloc(test, pool->size, sizeof(void *), GFP_KERNEL);
2169         KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pool->skbuff);
2170
2171         adapter->rx_queue.queue_addr[0].correlator = (u64)IBMVETH_NUM_BUFF_POOLS << 32 | 0;
2172         KUNIT_EXPECT_PTR_EQ(test, NULL, ibmveth_rxq_get_buffer(adapter));
2173
2174         adapter->rx_queue.queue_addr[0].correlator = (u64)0 << 32 | adapter->rx_buff_pool[0].size;
2175         KUNIT_EXPECT_PTR_EQ(test, NULL, ibmveth_rxq_get_buffer(adapter));
2176
2177         pool->skbuff[0] = skb;
2178         adapter->rx_queue.queue_addr[0].correlator = (u64)0 << 32 | 0;
2179         KUNIT_EXPECT_PTR_EQ(test, skb, ibmveth_rxq_get_buffer(adapter));
2180
2181         flush_work(&adapter->work);
2182 }
2183
2184 static struct kunit_case ibmveth_test_cases[] = {
2185         KUNIT_CASE(ibmveth_remove_buffer_from_pool_test),
2186         KUNIT_CASE(ibmveth_rxq_get_buffer_test),
2187         {}
2188 };
2189
2190 static struct kunit_suite ibmveth_test_suite = {
2191         .name = "ibmveth-kunit-test",
2192         .test_cases = ibmveth_test_cases,
2193 };
2194
2195 kunit_test_suite(ibmveth_test_suite);
2196 #endif