gve: Add ethtool support
[linux-2.6-block.git] / drivers / net / ethernet / google / gve / gve_main.c
1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3  *
4  * Copyright (C) 2015-2019 Google, Inc.
5  */
6
7 #include <linux/cpumask.h>
8 #include <linux/etherdevice.h>
9 #include <linux/interrupt.h>
10 #include <linux/module.h>
11 #include <linux/pci.h>
12 #include <linux/sched.h>
13 #include <linux/timer.h>
14 #include <linux/workqueue.h>
15 #include <net/sch_generic.h>
16 #include "gve.h"
17 #include "gve_adminq.h"
18 #include "gve_register.h"
19
20 #define GVE_DEFAULT_RX_COPYBREAK        (256)
21
22 #define DEFAULT_MSG_LEVEL       (NETIF_MSG_DRV | NETIF_MSG_LINK)
23 #define GVE_VERSION             "1.0.0"
24 #define GVE_VERSION_PREFIX      "GVE-"
25
26 const char gve_version_str[] = GVE_VERSION;
27 static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
28
29 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
30 {
31         struct gve_priv *priv = netdev_priv(dev);
32         unsigned int start;
33         int ring;
34
35         if (priv->rx) {
36                 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
37                         do {
38                                 u64_stats_fetch_begin(&priv->rx[ring].statss);
39                                 s->rx_packets += priv->rx[ring].rpackets;
40                                 s->rx_bytes += priv->rx[ring].rbytes;
41                         } while (u64_stats_fetch_retry(&priv->rx[ring].statss,
42                                                        start));
43                 }
44         }
45         if (priv->tx) {
46                 for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) {
47                         do {
48                                 u64_stats_fetch_begin(&priv->tx[ring].statss);
49                                 s->tx_packets += priv->tx[ring].pkt_done;
50                                 s->tx_bytes += priv->tx[ring].bytes_done;
51                         } while (u64_stats_fetch_retry(&priv->rx[ring].statss,
52                                                        start));
53                 }
54         }
55 }
56
57 static int gve_alloc_counter_array(struct gve_priv *priv)
58 {
59         priv->counter_array =
60                 dma_alloc_coherent(&priv->pdev->dev,
61                                    priv->num_event_counters *
62                                    sizeof(*priv->counter_array),
63                                    &priv->counter_array_bus, GFP_KERNEL);
64         if (!priv->counter_array)
65                 return -ENOMEM;
66
67         return 0;
68 }
69
70 static void gve_free_counter_array(struct gve_priv *priv)
71 {
72         dma_free_coherent(&priv->pdev->dev,
73                           priv->num_event_counters *
74                           sizeof(*priv->counter_array),
75                           priv->counter_array, priv->counter_array_bus);
76         priv->counter_array = NULL;
77 }
78
79 static irqreturn_t gve_mgmnt_intr(int irq, void *arg)
80 {
81         struct gve_priv *priv = arg;
82
83         queue_work(priv->gve_wq, &priv->service_task);
84         return IRQ_HANDLED;
85 }
86
87 static irqreturn_t gve_intr(int irq, void *arg)
88 {
89         struct gve_notify_block *block = arg;
90         struct gve_priv *priv = block->priv;
91
92         iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
93         napi_schedule_irqoff(&block->napi);
94         return IRQ_HANDLED;
95 }
96
97 static int gve_napi_poll(struct napi_struct *napi, int budget)
98 {
99         struct gve_notify_block *block;
100         __be32 __iomem *irq_doorbell;
101         bool reschedule = false;
102         struct gve_priv *priv;
103
104         block = container_of(napi, struct gve_notify_block, napi);
105         priv = block->priv;
106
107         if (block->tx)
108                 reschedule |= gve_tx_poll(block, budget);
109         if (block->rx)
110                 reschedule |= gve_rx_poll(block, budget);
111
112         if (reschedule)
113                 return budget;
114
115         napi_complete(napi);
116         irq_doorbell = gve_irq_doorbell(priv, block);
117         iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell);
118
119         /* Double check we have no extra work.
120          * Ensure unmask synchronizes with checking for work.
121          */
122         dma_rmb();
123         if (block->tx)
124                 reschedule |= gve_tx_poll(block, -1);
125         if (block->rx)
126                 reschedule |= gve_rx_poll(block, -1);
127         if (reschedule && napi_reschedule(napi))
128                 iowrite32be(GVE_IRQ_MASK, irq_doorbell);
129
130         return 0;
131 }
132
133 static int gve_alloc_notify_blocks(struct gve_priv *priv)
134 {
135         int num_vecs_requested = priv->num_ntfy_blks + 1;
136         char *name = priv->dev->name;
137         unsigned int active_cpus;
138         int vecs_enabled;
139         int i, j;
140         int err;
141
142         priv->msix_vectors = kvzalloc(num_vecs_requested *
143                                       sizeof(*priv->msix_vectors), GFP_KERNEL);
144         if (!priv->msix_vectors)
145                 return -ENOMEM;
146         for (i = 0; i < num_vecs_requested; i++)
147                 priv->msix_vectors[i].entry = i;
148         vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors,
149                                              GVE_MIN_MSIX, num_vecs_requested);
150         if (vecs_enabled < 0) {
151                 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n",
152                         GVE_MIN_MSIX, vecs_enabled);
153                 err = vecs_enabled;
154                 goto abort_with_msix_vectors;
155         }
156         if (vecs_enabled != num_vecs_requested) {
157                 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1;
158                 int vecs_per_type = new_num_ntfy_blks / 2;
159                 int vecs_left = new_num_ntfy_blks % 2;
160
161                 priv->num_ntfy_blks = new_num_ntfy_blks;
162                 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues,
163                                                 vecs_per_type);
164                 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues,
165                                                 vecs_per_type + vecs_left);
166                 dev_err(&priv->pdev->dev,
167                         "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n",
168                         vecs_enabled, priv->tx_cfg.max_queues,
169                         priv->rx_cfg.max_queues);
170                 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)
171                         priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
172                 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues)
173                         priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
174         }
175         /* Half the notification blocks go to TX and half to RX */
176         active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus());
177
178         /* Setup Management Vector  - the last vector */
179         snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "%s-mgmnt",
180                  name);
181         err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector,
182                           gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv);
183         if (err) {
184                 dev_err(&priv->pdev->dev, "Did not receive management vector.\n");
185                 goto abort_with_msix_enabled;
186         }
187         priv->ntfy_blocks =
188                 dma_alloc_coherent(&priv->pdev->dev,
189                                    priv->num_ntfy_blks *
190                                    sizeof(*priv->ntfy_blocks),
191                                    &priv->ntfy_block_bus, GFP_KERNEL);
192         if (!priv->ntfy_blocks) {
193                 err = -ENOMEM;
194                 goto abort_with_mgmt_vector;
195         }
196         /* Setup the other blocks - the first n-1 vectors */
197         for (i = 0; i < priv->num_ntfy_blks; i++) {
198                 struct gve_notify_block *block = &priv->ntfy_blocks[i];
199                 int msix_idx = i;
200
201                 snprintf(block->name, sizeof(block->name), "%s-ntfy-block.%d",
202                          name, i);
203                 block->priv = priv;
204                 err = request_irq(priv->msix_vectors[msix_idx].vector,
205                                   gve_intr, 0, block->name, block);
206                 if (err) {
207                         dev_err(&priv->pdev->dev,
208                                 "Failed to receive msix vector %d\n", i);
209                         goto abort_with_some_ntfy_blocks;
210                 }
211                 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
212                                       get_cpu_mask(i % active_cpus));
213         }
214         return 0;
215 abort_with_some_ntfy_blocks:
216         for (j = 0; j < i; j++) {
217                 struct gve_notify_block *block = &priv->ntfy_blocks[j];
218                 int msix_idx = j;
219
220                 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
221                                       NULL);
222                 free_irq(priv->msix_vectors[msix_idx].vector, block);
223         }
224         dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
225                           sizeof(*priv->ntfy_blocks),
226                           priv->ntfy_blocks, priv->ntfy_block_bus);
227         priv->ntfy_blocks = NULL;
228 abort_with_mgmt_vector:
229         free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
230 abort_with_msix_enabled:
231         pci_disable_msix(priv->pdev);
232 abort_with_msix_vectors:
233         kfree(priv->msix_vectors);
234         priv->msix_vectors = NULL;
235         return err;
236 }
237
238 static void gve_free_notify_blocks(struct gve_priv *priv)
239 {
240         int i;
241
242         /* Free the irqs */
243         for (i = 0; i < priv->num_ntfy_blks; i++) {
244                 struct gve_notify_block *block = &priv->ntfy_blocks[i];
245                 int msix_idx = i;
246
247                 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
248                                       NULL);
249                 free_irq(priv->msix_vectors[msix_idx].vector, block);
250         }
251         dma_free_coherent(&priv->pdev->dev,
252                           priv->num_ntfy_blks * sizeof(*priv->ntfy_blocks),
253                           priv->ntfy_blocks, priv->ntfy_block_bus);
254         priv->ntfy_blocks = NULL;
255         free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
256         pci_disable_msix(priv->pdev);
257         kfree(priv->msix_vectors);
258         priv->msix_vectors = NULL;
259 }
260
261 static int gve_setup_device_resources(struct gve_priv *priv)
262 {
263         int err;
264
265         err = gve_alloc_counter_array(priv);
266         if (err)
267                 return err;
268         err = gve_alloc_notify_blocks(priv);
269         if (err)
270                 goto abort_with_counter;
271         err = gve_adminq_configure_device_resources(priv,
272                                                     priv->counter_array_bus,
273                                                     priv->num_event_counters,
274                                                     priv->ntfy_block_bus,
275                                                     priv->num_ntfy_blks);
276         if (unlikely(err)) {
277                 dev_err(&priv->pdev->dev,
278                         "could not setup device_resources: err=%d\n", err);
279                 err = -ENXIO;
280                 goto abort_with_ntfy_blocks;
281         }
282         gve_set_device_resources_ok(priv);
283         return 0;
284 abort_with_ntfy_blocks:
285         gve_free_notify_blocks(priv);
286 abort_with_counter:
287         gve_free_counter_array(priv);
288         return err;
289 }
290
291 static void gve_trigger_reset(struct gve_priv *priv);
292
293 static void gve_teardown_device_resources(struct gve_priv *priv)
294 {
295         int err;
296
297         /* Tell device its resources are being freed */
298         if (gve_get_device_resources_ok(priv)) {
299                 err = gve_adminq_deconfigure_device_resources(priv);
300                 if (err) {
301                         dev_err(&priv->pdev->dev,
302                                 "Could not deconfigure device resources: err=%d\n",
303                                 err);
304                         gve_trigger_reset(priv);
305                 }
306         }
307         gve_free_counter_array(priv);
308         gve_free_notify_blocks(priv);
309         gve_clear_device_resources_ok(priv);
310 }
311
312 static void gve_add_napi(struct gve_priv *priv, int ntfy_idx)
313 {
314         struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
315
316         netif_napi_add(priv->dev, &block->napi, gve_napi_poll,
317                        NAPI_POLL_WEIGHT);
318 }
319
320 static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx)
321 {
322         struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
323
324         netif_napi_del(&block->napi);
325 }
326
327 static int gve_register_qpls(struct gve_priv *priv)
328 {
329         int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
330         int err;
331         int i;
332
333         for (i = 0; i < num_qpls; i++) {
334                 err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
335                 if (err) {
336                         netif_err(priv, drv, priv->dev,
337                                   "failed to register queue page list %d\n",
338                                   priv->qpls[i].id);
339                         /* This failure will trigger a reset - no need to clean
340                          * up
341                          */
342                         return err;
343                 }
344         }
345         return 0;
346 }
347
348 static int gve_unregister_qpls(struct gve_priv *priv)
349 {
350         int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
351         int err;
352         int i;
353
354         for (i = 0; i < num_qpls; i++) {
355                 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
356                 /* This failure will trigger a reset - no need to clean up */
357                 if (err) {
358                         netif_err(priv, drv, priv->dev,
359                                   "Failed to unregister queue page list %d\n",
360                                   priv->qpls[i].id);
361                         return err;
362                 }
363         }
364         return 0;
365 }
366
367 static int gve_create_rings(struct gve_priv *priv)
368 {
369         int err;
370         int i;
371
372         for (i = 0; i < priv->tx_cfg.num_queues; i++) {
373                 err = gve_adminq_create_tx_queue(priv, i);
374                 if (err) {
375                         netif_err(priv, drv, priv->dev, "failed to create tx queue %d\n",
376                                   i);
377                         /* This failure will trigger a reset - no need to clean
378                          * up
379                          */
380                         return err;
381                 }
382                 netif_dbg(priv, drv, priv->dev, "created tx queue %d\n", i);
383         }
384         for (i = 0; i < priv->rx_cfg.num_queues; i++) {
385                 err = gve_adminq_create_rx_queue(priv, i);
386                 if (err) {
387                         netif_err(priv, drv, priv->dev, "failed to create rx queue %d\n",
388                                   i);
389                         /* This failure will trigger a reset - no need to clean
390                          * up
391                          */
392                         return err;
393                 }
394                 /* Rx data ring has been prefilled with packet buffers at
395                  * queue allocation time.
396                  * Write the doorbell to provide descriptor slots and packet
397                  * buffers to the NIC.
398                  */
399                 gve_rx_write_doorbell(priv, &priv->rx[i]);
400                 netif_dbg(priv, drv, priv->dev, "created rx queue %d\n", i);
401         }
402
403         return 0;
404 }
405
406 static int gve_alloc_rings(struct gve_priv *priv)
407 {
408         int ntfy_idx;
409         int err;
410         int i;
411
412         /* Setup tx rings */
413         priv->tx = kvzalloc(priv->tx_cfg.num_queues * sizeof(*priv->tx),
414                             GFP_KERNEL);
415         if (!priv->tx)
416                 return -ENOMEM;
417         err = gve_tx_alloc_rings(priv);
418         if (err)
419                 goto free_tx;
420         /* Setup rx rings */
421         priv->rx = kvzalloc(priv->rx_cfg.num_queues * sizeof(*priv->rx),
422                             GFP_KERNEL);
423         if (!priv->rx) {
424                 err = -ENOMEM;
425                 goto free_tx_queue;
426         }
427         err = gve_rx_alloc_rings(priv);
428         if (err)
429                 goto free_rx;
430         /* Add tx napi & init sync stats*/
431         for (i = 0; i < priv->tx_cfg.num_queues; i++) {
432                 u64_stats_init(&priv->tx[i].statss);
433                 ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
434                 gve_add_napi(priv, ntfy_idx);
435         }
436         /* Add rx napi  & init sync stats*/
437         for (i = 0; i < priv->rx_cfg.num_queues; i++) {
438                 u64_stats_init(&priv->rx[i].statss);
439                 ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
440                 gve_add_napi(priv, ntfy_idx);
441         }
442
443         return 0;
444
445 free_rx:
446         kfree(priv->rx);
447         priv->rx = NULL;
448 free_tx_queue:
449         gve_tx_free_rings(priv);
450 free_tx:
451         kfree(priv->tx);
452         priv->tx = NULL;
453         return err;
454 }
455
456 static int gve_destroy_rings(struct gve_priv *priv)
457 {
458         int err;
459         int i;
460
461         for (i = 0; i < priv->tx_cfg.num_queues; i++) {
462                 err = gve_adminq_destroy_tx_queue(priv, i);
463                 if (err) {
464                         netif_err(priv, drv, priv->dev,
465                                   "failed to destroy tx queue %d\n",
466                                   i);
467                         /* This failure will trigger a reset - no need to clean
468                          * up
469                          */
470                         return err;
471                 }
472                 netif_dbg(priv, drv, priv->dev, "destroyed tx queue %d\n", i);
473         }
474         for (i = 0; i < priv->rx_cfg.num_queues; i++) {
475                 err = gve_adminq_destroy_rx_queue(priv, i);
476                 if (err) {
477                         netif_err(priv, drv, priv->dev,
478                                   "failed to destroy rx queue %d\n",
479                                   i);
480                         /* This failure will trigger a reset - no need to clean
481                          * up
482                          */
483                         return err;
484                 }
485                 netif_dbg(priv, drv, priv->dev, "destroyed rx queue %d\n", i);
486         }
487         return 0;
488 }
489
490 static void gve_free_rings(struct gve_priv *priv)
491 {
492         int ntfy_idx;
493         int i;
494
495         if (priv->tx) {
496                 for (i = 0; i < priv->tx_cfg.num_queues; i++) {
497                         ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
498                         gve_remove_napi(priv, ntfy_idx);
499                 }
500                 gve_tx_free_rings(priv);
501                 kfree(priv->tx);
502                 priv->tx = NULL;
503         }
504         if (priv->rx) {
505                 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
506                         ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
507                         gve_remove_napi(priv, ntfy_idx);
508                 }
509                 gve_rx_free_rings(priv);
510                 kfree(priv->rx);
511                 priv->rx = NULL;
512         }
513 }
514
515 int gve_alloc_page(struct device *dev, struct page **page, dma_addr_t *dma,
516                    enum dma_data_direction dir)
517 {
518         *page = alloc_page(GFP_KERNEL);
519         if (!page)
520                 return -ENOMEM;
521         *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir);
522         if (dma_mapping_error(dev, *dma)) {
523                 put_page(*page);
524                 return -ENOMEM;
525         }
526         return 0;
527 }
528
529 static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id,
530                                      int pages)
531 {
532         struct gve_queue_page_list *qpl = &priv->qpls[id];
533         int err;
534         int i;
535
536         if (pages + priv->num_registered_pages > priv->max_registered_pages) {
537                 netif_err(priv, drv, priv->dev,
538                           "Reached max number of registered pages %llu > %llu\n",
539                           pages + priv->num_registered_pages,
540                           priv->max_registered_pages);
541                 return -EINVAL;
542         }
543
544         qpl->id = id;
545         qpl->num_entries = pages;
546         qpl->pages = kvzalloc(pages * sizeof(*qpl->pages), GFP_KERNEL);
547         /* caller handles clean up */
548         if (!qpl->pages)
549                 return -ENOMEM;
550         qpl->page_buses = kvzalloc(pages * sizeof(*qpl->page_buses),
551                                    GFP_KERNEL);
552         /* caller handles clean up */
553         if (!qpl->page_buses)
554                 return -ENOMEM;
555
556         for (i = 0; i < pages; i++) {
557                 err = gve_alloc_page(&priv->pdev->dev, &qpl->pages[i],
558                                      &qpl->page_buses[i],
559                                      gve_qpl_dma_dir(priv, id));
560                 /* caller handles clean up */
561                 if (err)
562                         return -ENOMEM;
563         }
564         priv->num_registered_pages += pages;
565
566         return 0;
567 }
568
569 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
570                    enum dma_data_direction dir)
571 {
572         if (!dma_mapping_error(dev, dma))
573                 dma_unmap_page(dev, dma, PAGE_SIZE, dir);
574         if (page)
575                 put_page(page);
576 }
577
578 static void gve_free_queue_page_list(struct gve_priv *priv,
579                                      int id)
580 {
581         struct gve_queue_page_list *qpl = &priv->qpls[id];
582         int i;
583
584         if (!qpl->pages)
585                 return;
586         if (!qpl->page_buses)
587                 goto free_pages;
588
589         for (i = 0; i < qpl->num_entries; i++)
590                 gve_free_page(&priv->pdev->dev, qpl->pages[i],
591                               qpl->page_buses[i], gve_qpl_dma_dir(priv, id));
592
593         kfree(qpl->page_buses);
594 free_pages:
595         kfree(qpl->pages);
596         priv->num_registered_pages -= qpl->num_entries;
597 }
598
599 static int gve_alloc_qpls(struct gve_priv *priv)
600 {
601         int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
602         int i, j;
603         int err;
604
605         priv->qpls = kvzalloc(num_qpls * sizeof(*priv->qpls), GFP_KERNEL);
606         if (!priv->qpls)
607                 return -ENOMEM;
608
609         for (i = 0; i < gve_num_tx_qpls(priv); i++) {
610                 err = gve_alloc_queue_page_list(priv, i,
611                                                 priv->tx_pages_per_qpl);
612                 if (err)
613                         goto free_qpls;
614         }
615         for (; i < num_qpls; i++) {
616                 err = gve_alloc_queue_page_list(priv, i,
617                                                 priv->rx_pages_per_qpl);
618                 if (err)
619                         goto free_qpls;
620         }
621
622         priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(num_qpls) *
623                                      sizeof(unsigned long) * BITS_PER_BYTE;
624         priv->qpl_cfg.qpl_id_map = kvzalloc(BITS_TO_LONGS(num_qpls) *
625                                             sizeof(unsigned long), GFP_KERNEL);
626         if (!priv->qpl_cfg.qpl_id_map)
627                 goto free_qpls;
628
629         return 0;
630
631 free_qpls:
632         for (j = 0; j <= i; j++)
633                 gve_free_queue_page_list(priv, j);
634         kfree(priv->qpls);
635         return err;
636 }
637
638 static void gve_free_qpls(struct gve_priv *priv)
639 {
640         int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
641         int i;
642
643         kfree(priv->qpl_cfg.qpl_id_map);
644
645         for (i = 0; i < num_qpls; i++)
646                 gve_free_queue_page_list(priv, i);
647
648         kfree(priv->qpls);
649 }
650
651 /* Use this to schedule a reset when the device is capable of continuing
652  * to handle other requests in its current state. If it is not, do a reset
653  * in thread instead.
654  */
655 void gve_schedule_reset(struct gve_priv *priv)
656 {
657         gve_set_do_reset(priv);
658         queue_work(priv->gve_wq, &priv->service_task);
659 }
660
661 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up);
662 static int gve_reset_recovery(struct gve_priv *priv, bool was_up);
663 static void gve_turndown(struct gve_priv *priv);
664 static void gve_turnup(struct gve_priv *priv);
665
666 static int gve_open(struct net_device *dev)
667 {
668         struct gve_priv *priv = netdev_priv(dev);
669         int err;
670
671         err = gve_alloc_qpls(priv);
672         if (err)
673                 return err;
674         err = gve_alloc_rings(priv);
675         if (err)
676                 goto free_qpls;
677
678         err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues);
679         if (err)
680                 goto free_rings;
681         err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues);
682         if (err)
683                 goto free_rings;
684
685         err = gve_register_qpls(priv);
686         if (err)
687                 goto reset;
688         err = gve_create_rings(priv);
689         if (err)
690                 goto reset;
691         gve_set_device_rings_ok(priv);
692
693         gve_turnup(priv);
694         netif_carrier_on(dev);
695         return 0;
696
697 free_rings:
698         gve_free_rings(priv);
699 free_qpls:
700         gve_free_qpls(priv);
701         return err;
702
703 reset:
704         /* This must have been called from a reset due to the rtnl lock
705          * so just return at this point.
706          */
707         if (gve_get_reset_in_progress(priv))
708                 return err;
709         /* Otherwise reset before returning */
710         gve_reset_and_teardown(priv, true);
711         /* if this fails there is nothing we can do so just ignore the return */
712         gve_reset_recovery(priv, false);
713         /* return the original error */
714         return err;
715 }
716
717 static int gve_close(struct net_device *dev)
718 {
719         struct gve_priv *priv = netdev_priv(dev);
720         int err;
721
722         netif_carrier_off(dev);
723         if (gve_get_device_rings_ok(priv)) {
724                 gve_turndown(priv);
725                 err = gve_destroy_rings(priv);
726                 if (err)
727                         goto err;
728                 err = gve_unregister_qpls(priv);
729                 if (err)
730                         goto err;
731                 gve_clear_device_rings_ok(priv);
732         }
733
734         gve_free_rings(priv);
735         gve_free_qpls(priv);
736         return 0;
737
738 err:
739         /* This must have been called from a reset due to the rtnl lock
740          * so just return at this point.
741          */
742         if (gve_get_reset_in_progress(priv))
743                 return err;
744         /* Otherwise reset before returning */
745         gve_reset_and_teardown(priv, true);
746         return gve_reset_recovery(priv, false);
747 }
748
749 int gve_adjust_queues(struct gve_priv *priv,
750                       struct gve_queue_config new_rx_config,
751                       struct gve_queue_config new_tx_config)
752 {
753         int err;
754
755         if (netif_carrier_ok(priv->dev)) {
756                 /* To make this process as simple as possible we teardown the
757                  * device, set the new configuration, and then bring the device
758                  * up again.
759                  */
760                 err = gve_close(priv->dev);
761                 /* we have already tried to reset in close,
762                  * just fail at this point
763                  */
764                 if (err)
765                         return err;
766                 priv->tx_cfg = new_tx_config;
767                 priv->rx_cfg = new_rx_config;
768
769                 err = gve_open(priv->dev);
770                 if (err)
771                         goto err;
772
773                 return 0;
774         }
775         /* Set the config for the next up. */
776         priv->tx_cfg = new_tx_config;
777         priv->rx_cfg = new_rx_config;
778
779         return 0;
780 err:
781         netif_err(priv, drv, priv->dev,
782                   "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n");
783         gve_turndown(priv);
784         return err;
785 }
786
787 static void gve_turndown(struct gve_priv *priv)
788 {
789         int idx;
790
791         if (netif_carrier_ok(priv->dev))
792                 netif_carrier_off(priv->dev);
793
794         if (!gve_get_napi_enabled(priv))
795                 return;
796
797         /* Disable napi to prevent more work from coming in */
798         for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
799                 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
800                 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
801
802                 napi_disable(&block->napi);
803         }
804         for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
805                 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
806                 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
807
808                 napi_disable(&block->napi);
809         }
810
811         /* Stop tx queues */
812         netif_tx_disable(priv->dev);
813
814         gve_clear_napi_enabled(priv);
815 }
816
817 static void gve_turnup(struct gve_priv *priv)
818 {
819         int idx;
820
821         /* Start the tx queues */
822         netif_tx_start_all_queues(priv->dev);
823
824         /* Enable napi and unmask interrupts for all queues */
825         for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
826                 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
827                 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
828
829                 napi_enable(&block->napi);
830                 iowrite32be(0, gve_irq_doorbell(priv, block));
831         }
832         for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
833                 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
834                 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
835
836                 napi_enable(&block->napi);
837                 iowrite32be(0, gve_irq_doorbell(priv, block));
838         }
839
840         gve_set_napi_enabled(priv);
841 }
842
843 static void gve_tx_timeout(struct net_device *dev)
844 {
845         struct gve_priv *priv = netdev_priv(dev);
846
847         gve_schedule_reset(priv);
848         priv->tx_timeo_cnt++;
849 }
850
851 static const struct net_device_ops gve_netdev_ops = {
852         .ndo_start_xmit         =       gve_tx,
853         .ndo_open               =       gve_open,
854         .ndo_stop               =       gve_close,
855         .ndo_get_stats64        =       gve_get_stats,
856         .ndo_tx_timeout         =       gve_tx_timeout,
857 };
858
859 static void gve_handle_status(struct gve_priv *priv, u32 status)
860 {
861         if (GVE_DEVICE_STATUS_RESET_MASK & status) {
862                 dev_info(&priv->pdev->dev, "Device requested reset.\n");
863                 gve_set_do_reset(priv);
864         }
865 }
866
867 static void gve_handle_reset(struct gve_priv *priv)
868 {
869         /* A service task will be scheduled at the end of probe to catch any
870          * resets that need to happen, and we don't want to reset until
871          * probe is done.
872          */
873         if (gve_get_probe_in_progress(priv))
874                 return;
875
876         if (gve_get_do_reset(priv)) {
877                 rtnl_lock();
878                 gve_reset(priv, false);
879                 rtnl_unlock();
880         }
881 }
882
883 /* Handle NIC status register changes and reset requests */
884 static void gve_service_task(struct work_struct *work)
885 {
886         struct gve_priv *priv = container_of(work, struct gve_priv,
887                                              service_task);
888
889         gve_handle_status(priv,
890                           ioread32be(&priv->reg_bar0->device_status));
891
892         gve_handle_reset(priv);
893 }
894
895 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
896 {
897         int num_ntfy;
898         int err;
899
900         /* Set up the adminq */
901         err = gve_adminq_alloc(&priv->pdev->dev, priv);
902         if (err) {
903                 dev_err(&priv->pdev->dev,
904                         "Failed to alloc admin queue: err=%d\n", err);
905                 return err;
906         }
907
908         if (skip_describe_device)
909                 goto setup_device;
910
911         /* Get the initial information we need from the device */
912         err = gve_adminq_describe_device(priv);
913         if (err) {
914                 dev_err(&priv->pdev->dev,
915                         "Could not get device information: err=%d\n", err);
916                 goto err;
917         }
918         if (priv->dev->max_mtu > PAGE_SIZE) {
919                 priv->dev->max_mtu = PAGE_SIZE;
920                 err = gve_adminq_set_mtu(priv, priv->dev->mtu);
921                 if (err) {
922                         netif_err(priv, drv, priv->dev, "Could not set mtu");
923                         goto err;
924                 }
925         }
926         priv->dev->mtu = priv->dev->max_mtu;
927         num_ntfy = pci_msix_vec_count(priv->pdev);
928         if (num_ntfy <= 0) {
929                 dev_err(&priv->pdev->dev,
930                         "could not count MSI-x vectors: err=%d\n", num_ntfy);
931                 err = num_ntfy;
932                 goto err;
933         } else if (num_ntfy < GVE_MIN_MSIX) {
934                 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n",
935                         GVE_MIN_MSIX, num_ntfy);
936                 err = -EINVAL;
937                 goto err;
938         }
939
940         priv->num_registered_pages = 0;
941         priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
942         /* gvnic has one Notification Block per MSI-x vector, except for the
943          * management vector
944          */
945         priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1;
946         priv->mgmt_msix_idx = priv->num_ntfy_blks;
947
948         priv->tx_cfg.max_queues =
949                 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2);
950         priv->rx_cfg.max_queues =
951                 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2);
952
953         priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
954         priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
955         if (priv->default_num_queues > 0) {
956                 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues,
957                                                 priv->tx_cfg.num_queues);
958                 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues,
959                                                 priv->rx_cfg.num_queues);
960         }
961
962         netif_info(priv, drv, priv->dev, "TX queues %d, RX queues %d\n",
963                    priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
964         netif_info(priv, drv, priv->dev, "Max TX queues %d, Max RX queues %d\n",
965                    priv->tx_cfg.max_queues, priv->rx_cfg.max_queues);
966
967 setup_device:
968         err = gve_setup_device_resources(priv);
969         if (!err)
970                 return 0;
971 err:
972         gve_adminq_free(&priv->pdev->dev, priv);
973         return err;
974 }
975
976 static void gve_teardown_priv_resources(struct gve_priv *priv)
977 {
978         gve_teardown_device_resources(priv);
979         gve_adminq_free(&priv->pdev->dev, priv);
980 }
981
982 static void gve_trigger_reset(struct gve_priv *priv)
983 {
984         /* Reset the device by releasing the AQ */
985         gve_adminq_release(priv);
986 }
987
988 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up)
989 {
990         gve_trigger_reset(priv);
991         /* With the reset having already happened, close cannot fail */
992         if (was_up)
993                 gve_close(priv->dev);
994         gve_teardown_priv_resources(priv);
995 }
996
997 static int gve_reset_recovery(struct gve_priv *priv, bool was_up)
998 {
999         int err;
1000
1001         err = gve_init_priv(priv, true);
1002         if (err)
1003                 goto err;
1004         if (was_up) {
1005                 err = gve_open(priv->dev);
1006                 if (err)
1007                         goto err;
1008         }
1009         return 0;
1010 err:
1011         dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n");
1012         gve_turndown(priv);
1013         return err;
1014 }
1015
1016 int gve_reset(struct gve_priv *priv, bool attempt_teardown)
1017 {
1018         bool was_up = netif_carrier_ok(priv->dev);
1019         int err;
1020
1021         dev_info(&priv->pdev->dev, "Performing reset\n");
1022         gve_clear_do_reset(priv);
1023         gve_set_reset_in_progress(priv);
1024         /* If we aren't attempting to teardown normally, just go turndown and
1025          * reset right away.
1026          */
1027         if (!attempt_teardown) {
1028                 gve_turndown(priv);
1029                 gve_reset_and_teardown(priv, was_up);
1030         } else {
1031                 /* Otherwise attempt to close normally */
1032                 if (was_up) {
1033                         err = gve_close(priv->dev);
1034                         /* If that fails reset as we did above */
1035                         if (err)
1036                                 gve_reset_and_teardown(priv, was_up);
1037                 }
1038                 /* Clean up any remaining resources */
1039                 gve_teardown_priv_resources(priv);
1040         }
1041
1042         /* Set it all back up */
1043         err = gve_reset_recovery(priv, was_up);
1044         gve_clear_reset_in_progress(priv);
1045         return err;
1046 }
1047
1048 static void gve_write_version(u8 __iomem *driver_version_register)
1049 {
1050         const char *c = gve_version_prefix;
1051
1052         while (*c) {
1053                 writeb(*c, driver_version_register);
1054                 c++;
1055         }
1056
1057         c = gve_version_str;
1058         while (*c) {
1059                 writeb(*c, driver_version_register);
1060                 c++;
1061         }
1062         writeb('\n', driver_version_register);
1063 }
1064
1065 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
1066 {
1067         int max_tx_queues, max_rx_queues;
1068         struct net_device *dev;
1069         __be32 __iomem *db_bar;
1070         struct gve_registers __iomem *reg_bar;
1071         struct gve_priv *priv;
1072         int err;
1073
1074         err = pci_enable_device(pdev);
1075         if (err)
1076                 return -ENXIO;
1077
1078         err = pci_request_regions(pdev, "gvnic-cfg");
1079         if (err)
1080                 goto abort_with_enabled;
1081
1082         pci_set_master(pdev);
1083
1084         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1085         if (err) {
1086                 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err);
1087                 goto abort_with_pci_region;
1088         }
1089
1090         err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1091         if (err) {
1092                 dev_err(&pdev->dev,
1093                         "Failed to set consistent dma mask: err=%d\n", err);
1094                 goto abort_with_pci_region;
1095         }
1096
1097         reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0);
1098         if (!reg_bar) {
1099                 dev_err(&pdev->dev, "Failed to map pci bar!\n");
1100                 err = -ENOMEM;
1101                 goto abort_with_pci_region;
1102         }
1103
1104         db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0);
1105         if (!db_bar) {
1106                 dev_err(&pdev->dev, "Failed to map doorbell bar!\n");
1107                 err = -ENOMEM;
1108                 goto abort_with_reg_bar;
1109         }
1110
1111         gve_write_version(&reg_bar->driver_version);
1112         /* Get max queues to alloc etherdev */
1113         max_rx_queues = ioread32be(&reg_bar->max_tx_queues);
1114         max_tx_queues = ioread32be(&reg_bar->max_rx_queues);
1115         /* Alloc and setup the netdev and priv */
1116         dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues);
1117         if (!dev) {
1118                 dev_err(&pdev->dev, "could not allocate netdev\n");
1119                 goto abort_with_db_bar;
1120         }
1121         SET_NETDEV_DEV(dev, &pdev->dev);
1122         pci_set_drvdata(pdev, dev);
1123         dev->ethtool_ops = &gve_ethtool_ops;
1124         dev->netdev_ops = &gve_netdev_ops;
1125         /* advertise features */
1126         dev->hw_features = NETIF_F_HIGHDMA;
1127         dev->hw_features |= NETIF_F_SG;
1128         dev->hw_features |= NETIF_F_HW_CSUM;
1129         dev->hw_features |= NETIF_F_TSO;
1130         dev->hw_features |= NETIF_F_TSO6;
1131         dev->hw_features |= NETIF_F_TSO_ECN;
1132         dev->hw_features |= NETIF_F_RXCSUM;
1133         dev->hw_features |= NETIF_F_RXHASH;
1134         dev->features = dev->hw_features;
1135         dev->watchdog_timeo = 5 * HZ;
1136         dev->min_mtu = ETH_MIN_MTU;
1137         netif_carrier_off(dev);
1138
1139         priv = netdev_priv(dev);
1140         priv->dev = dev;
1141         priv->pdev = pdev;
1142         priv->msg_enable = DEFAULT_MSG_LEVEL;
1143         priv->reg_bar0 = reg_bar;
1144         priv->db_bar2 = db_bar;
1145         priv->service_task_flags = 0x0;
1146         priv->state_flags = 0x0;
1147
1148         gve_set_probe_in_progress(priv);
1149         priv->gve_wq = alloc_ordered_workqueue("gve", 0);
1150         if (!priv->gve_wq) {
1151                 dev_err(&pdev->dev, "Could not allocate workqueue");
1152                 err = -ENOMEM;
1153                 goto abort_with_netdev;
1154         }
1155         INIT_WORK(&priv->service_task, gve_service_task);
1156         priv->tx_cfg.max_queues = max_tx_queues;
1157         priv->rx_cfg.max_queues = max_rx_queues;
1158
1159         err = gve_init_priv(priv, false);
1160         if (err)
1161                 goto abort_with_wq;
1162
1163         err = register_netdev(dev);
1164         if (err)
1165                 goto abort_with_wq;
1166
1167         dev_info(&pdev->dev, "GVE version %s\n", gve_version_str);
1168         gve_clear_probe_in_progress(priv);
1169         queue_work(priv->gve_wq, &priv->service_task);
1170         return 0;
1171
1172 abort_with_wq:
1173         destroy_workqueue(priv->gve_wq);
1174
1175 abort_with_netdev:
1176         free_netdev(dev);
1177
1178 abort_with_db_bar:
1179         pci_iounmap(pdev, db_bar);
1180
1181 abort_with_reg_bar:
1182         pci_iounmap(pdev, reg_bar);
1183
1184 abort_with_pci_region:
1185         pci_release_regions(pdev);
1186
1187 abort_with_enabled:
1188         pci_disable_device(pdev);
1189         return -ENXIO;
1190 }
1191 EXPORT_SYMBOL(gve_probe);
1192
1193 static void gve_remove(struct pci_dev *pdev)
1194 {
1195         struct net_device *netdev = pci_get_drvdata(pdev);
1196         struct gve_priv *priv = netdev_priv(netdev);
1197         __be32 __iomem *db_bar = priv->db_bar2;
1198         void __iomem *reg_bar = priv->reg_bar0;
1199
1200         unregister_netdev(netdev);
1201         gve_teardown_priv_resources(priv);
1202         destroy_workqueue(priv->gve_wq);
1203         free_netdev(netdev);
1204         pci_iounmap(pdev, db_bar);
1205         pci_iounmap(pdev, reg_bar);
1206         pci_release_regions(pdev);
1207         pci_disable_device(pdev);
1208 }
1209
1210 static const struct pci_device_id gve_id_table[] = {
1211         { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) },
1212         { }
1213 };
1214
1215 static struct pci_driver gvnic_driver = {
1216         .name           = "gvnic",
1217         .id_table       = gve_id_table,
1218         .probe          = gve_probe,
1219         .remove         = gve_remove,
1220 };
1221
1222 module_pci_driver(gvnic_driver);
1223
1224 MODULE_DEVICE_TABLE(pci, gve_id_table);
1225 MODULE_AUTHOR("Google, Inc.");
1226 MODULE_DESCRIPTION("gVNIC Driver");
1227 MODULE_LICENSE("Dual MIT/GPL");
1228 MODULE_VERSION(GVE_VERSION);