1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
4 * Copyright (C) 2015-2019 Google, Inc.
7 #include <linux/cpumask.h>
8 #include <linux/etherdevice.h>
9 #include <linux/interrupt.h>
10 #include <linux/module.h>
11 #include <linux/pci.h>
12 #include <linux/sched.h>
13 #include <linux/timer.h>
14 #include <linux/workqueue.h>
15 #include <net/sch_generic.h>
17 #include "gve_adminq.h"
18 #include "gve_register.h"
20 #define GVE_DEFAULT_RX_COPYBREAK (256)
22 #define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK)
23 #define GVE_VERSION "1.0.0"
24 #define GVE_VERSION_PREFIX "GVE-"
26 const char gve_version_str[] = GVE_VERSION;
27 static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
29 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
31 struct gve_priv *priv = netdev_priv(dev);
36 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
38 u64_stats_fetch_begin(&priv->rx[ring].statss);
39 s->rx_packets += priv->rx[ring].rpackets;
40 s->rx_bytes += priv->rx[ring].rbytes;
41 } while (u64_stats_fetch_retry(&priv->rx[ring].statss,
46 for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) {
48 u64_stats_fetch_begin(&priv->tx[ring].statss);
49 s->tx_packets += priv->tx[ring].pkt_done;
50 s->tx_bytes += priv->tx[ring].bytes_done;
51 } while (u64_stats_fetch_retry(&priv->rx[ring].statss,
57 static int gve_alloc_counter_array(struct gve_priv *priv)
60 dma_alloc_coherent(&priv->pdev->dev,
61 priv->num_event_counters *
62 sizeof(*priv->counter_array),
63 &priv->counter_array_bus, GFP_KERNEL);
64 if (!priv->counter_array)
70 static void gve_free_counter_array(struct gve_priv *priv)
72 dma_free_coherent(&priv->pdev->dev,
73 priv->num_event_counters *
74 sizeof(*priv->counter_array),
75 priv->counter_array, priv->counter_array_bus);
76 priv->counter_array = NULL;
79 static irqreturn_t gve_mgmnt_intr(int irq, void *arg)
81 struct gve_priv *priv = arg;
83 queue_work(priv->gve_wq, &priv->service_task);
87 static irqreturn_t gve_intr(int irq, void *arg)
89 struct gve_notify_block *block = arg;
90 struct gve_priv *priv = block->priv;
92 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
93 napi_schedule_irqoff(&block->napi);
97 static int gve_napi_poll(struct napi_struct *napi, int budget)
99 struct gve_notify_block *block;
100 __be32 __iomem *irq_doorbell;
101 bool reschedule = false;
102 struct gve_priv *priv;
104 block = container_of(napi, struct gve_notify_block, napi);
108 reschedule |= gve_tx_poll(block, budget);
110 reschedule |= gve_rx_poll(block, budget);
116 irq_doorbell = gve_irq_doorbell(priv, block);
117 iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell);
119 /* Double check we have no extra work.
120 * Ensure unmask synchronizes with checking for work.
124 reschedule |= gve_tx_poll(block, -1);
126 reschedule |= gve_rx_poll(block, -1);
127 if (reschedule && napi_reschedule(napi))
128 iowrite32be(GVE_IRQ_MASK, irq_doorbell);
133 static int gve_alloc_notify_blocks(struct gve_priv *priv)
135 int num_vecs_requested = priv->num_ntfy_blks + 1;
136 char *name = priv->dev->name;
137 unsigned int active_cpus;
142 priv->msix_vectors = kvzalloc(num_vecs_requested *
143 sizeof(*priv->msix_vectors), GFP_KERNEL);
144 if (!priv->msix_vectors)
146 for (i = 0; i < num_vecs_requested; i++)
147 priv->msix_vectors[i].entry = i;
148 vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors,
149 GVE_MIN_MSIX, num_vecs_requested);
150 if (vecs_enabled < 0) {
151 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n",
152 GVE_MIN_MSIX, vecs_enabled);
154 goto abort_with_msix_vectors;
156 if (vecs_enabled != num_vecs_requested) {
157 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1;
158 int vecs_per_type = new_num_ntfy_blks / 2;
159 int vecs_left = new_num_ntfy_blks % 2;
161 priv->num_ntfy_blks = new_num_ntfy_blks;
162 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues,
164 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues,
165 vecs_per_type + vecs_left);
166 dev_err(&priv->pdev->dev,
167 "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n",
168 vecs_enabled, priv->tx_cfg.max_queues,
169 priv->rx_cfg.max_queues);
170 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)
171 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
172 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues)
173 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
175 /* Half the notification blocks go to TX and half to RX */
176 active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus());
178 /* Setup Management Vector - the last vector */
179 snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "%s-mgmnt",
181 err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector,
182 gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv);
184 dev_err(&priv->pdev->dev, "Did not receive management vector.\n");
185 goto abort_with_msix_enabled;
188 dma_alloc_coherent(&priv->pdev->dev,
189 priv->num_ntfy_blks *
190 sizeof(*priv->ntfy_blocks),
191 &priv->ntfy_block_bus, GFP_KERNEL);
192 if (!priv->ntfy_blocks) {
194 goto abort_with_mgmt_vector;
196 /* Setup the other blocks - the first n-1 vectors */
197 for (i = 0; i < priv->num_ntfy_blks; i++) {
198 struct gve_notify_block *block = &priv->ntfy_blocks[i];
201 snprintf(block->name, sizeof(block->name), "%s-ntfy-block.%d",
204 err = request_irq(priv->msix_vectors[msix_idx].vector,
205 gve_intr, 0, block->name, block);
207 dev_err(&priv->pdev->dev,
208 "Failed to receive msix vector %d\n", i);
209 goto abort_with_some_ntfy_blocks;
211 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
212 get_cpu_mask(i % active_cpus));
215 abort_with_some_ntfy_blocks:
216 for (j = 0; j < i; j++) {
217 struct gve_notify_block *block = &priv->ntfy_blocks[j];
220 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
222 free_irq(priv->msix_vectors[msix_idx].vector, block);
224 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
225 sizeof(*priv->ntfy_blocks),
226 priv->ntfy_blocks, priv->ntfy_block_bus);
227 priv->ntfy_blocks = NULL;
228 abort_with_mgmt_vector:
229 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
230 abort_with_msix_enabled:
231 pci_disable_msix(priv->pdev);
232 abort_with_msix_vectors:
233 kfree(priv->msix_vectors);
234 priv->msix_vectors = NULL;
238 static void gve_free_notify_blocks(struct gve_priv *priv)
243 for (i = 0; i < priv->num_ntfy_blks; i++) {
244 struct gve_notify_block *block = &priv->ntfy_blocks[i];
247 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
249 free_irq(priv->msix_vectors[msix_idx].vector, block);
251 dma_free_coherent(&priv->pdev->dev,
252 priv->num_ntfy_blks * sizeof(*priv->ntfy_blocks),
253 priv->ntfy_blocks, priv->ntfy_block_bus);
254 priv->ntfy_blocks = NULL;
255 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
256 pci_disable_msix(priv->pdev);
257 kfree(priv->msix_vectors);
258 priv->msix_vectors = NULL;
261 static int gve_setup_device_resources(struct gve_priv *priv)
265 err = gve_alloc_counter_array(priv);
268 err = gve_alloc_notify_blocks(priv);
270 goto abort_with_counter;
271 err = gve_adminq_configure_device_resources(priv,
272 priv->counter_array_bus,
273 priv->num_event_counters,
274 priv->ntfy_block_bus,
275 priv->num_ntfy_blks);
277 dev_err(&priv->pdev->dev,
278 "could not setup device_resources: err=%d\n", err);
280 goto abort_with_ntfy_blocks;
282 gve_set_device_resources_ok(priv);
284 abort_with_ntfy_blocks:
285 gve_free_notify_blocks(priv);
287 gve_free_counter_array(priv);
291 static void gve_trigger_reset(struct gve_priv *priv);
293 static void gve_teardown_device_resources(struct gve_priv *priv)
297 /* Tell device its resources are being freed */
298 if (gve_get_device_resources_ok(priv)) {
299 err = gve_adminq_deconfigure_device_resources(priv);
301 dev_err(&priv->pdev->dev,
302 "Could not deconfigure device resources: err=%d\n",
304 gve_trigger_reset(priv);
307 gve_free_counter_array(priv);
308 gve_free_notify_blocks(priv);
309 gve_clear_device_resources_ok(priv);
312 static void gve_add_napi(struct gve_priv *priv, int ntfy_idx)
314 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
316 netif_napi_add(priv->dev, &block->napi, gve_napi_poll,
320 static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx)
322 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
324 netif_napi_del(&block->napi);
327 static int gve_register_qpls(struct gve_priv *priv)
329 int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
333 for (i = 0; i < num_qpls; i++) {
334 err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
336 netif_err(priv, drv, priv->dev,
337 "failed to register queue page list %d\n",
339 /* This failure will trigger a reset - no need to clean
348 static int gve_unregister_qpls(struct gve_priv *priv)
350 int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
354 for (i = 0; i < num_qpls; i++) {
355 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
356 /* This failure will trigger a reset - no need to clean up */
358 netif_err(priv, drv, priv->dev,
359 "Failed to unregister queue page list %d\n",
367 static int gve_create_rings(struct gve_priv *priv)
372 for (i = 0; i < priv->tx_cfg.num_queues; i++) {
373 err = gve_adminq_create_tx_queue(priv, i);
375 netif_err(priv, drv, priv->dev, "failed to create tx queue %d\n",
377 /* This failure will trigger a reset - no need to clean
382 netif_dbg(priv, drv, priv->dev, "created tx queue %d\n", i);
384 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
385 err = gve_adminq_create_rx_queue(priv, i);
387 netif_err(priv, drv, priv->dev, "failed to create rx queue %d\n",
389 /* This failure will trigger a reset - no need to clean
394 /* Rx data ring has been prefilled with packet buffers at
395 * queue allocation time.
396 * Write the doorbell to provide descriptor slots and packet
397 * buffers to the NIC.
399 gve_rx_write_doorbell(priv, &priv->rx[i]);
400 netif_dbg(priv, drv, priv->dev, "created rx queue %d\n", i);
406 static int gve_alloc_rings(struct gve_priv *priv)
413 priv->tx = kvzalloc(priv->tx_cfg.num_queues * sizeof(*priv->tx),
417 err = gve_tx_alloc_rings(priv);
421 priv->rx = kvzalloc(priv->rx_cfg.num_queues * sizeof(*priv->rx),
427 err = gve_rx_alloc_rings(priv);
430 /* Add tx napi & init sync stats*/
431 for (i = 0; i < priv->tx_cfg.num_queues; i++) {
432 u64_stats_init(&priv->tx[i].statss);
433 ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
434 gve_add_napi(priv, ntfy_idx);
436 /* Add rx napi & init sync stats*/
437 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
438 u64_stats_init(&priv->rx[i].statss);
439 ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
440 gve_add_napi(priv, ntfy_idx);
449 gve_tx_free_rings(priv);
456 static int gve_destroy_rings(struct gve_priv *priv)
461 for (i = 0; i < priv->tx_cfg.num_queues; i++) {
462 err = gve_adminq_destroy_tx_queue(priv, i);
464 netif_err(priv, drv, priv->dev,
465 "failed to destroy tx queue %d\n",
467 /* This failure will trigger a reset - no need to clean
472 netif_dbg(priv, drv, priv->dev, "destroyed tx queue %d\n", i);
474 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
475 err = gve_adminq_destroy_rx_queue(priv, i);
477 netif_err(priv, drv, priv->dev,
478 "failed to destroy rx queue %d\n",
480 /* This failure will trigger a reset - no need to clean
485 netif_dbg(priv, drv, priv->dev, "destroyed rx queue %d\n", i);
490 static void gve_free_rings(struct gve_priv *priv)
496 for (i = 0; i < priv->tx_cfg.num_queues; i++) {
497 ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
498 gve_remove_napi(priv, ntfy_idx);
500 gve_tx_free_rings(priv);
505 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
506 ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
507 gve_remove_napi(priv, ntfy_idx);
509 gve_rx_free_rings(priv);
515 int gve_alloc_page(struct device *dev, struct page **page, dma_addr_t *dma,
516 enum dma_data_direction dir)
518 *page = alloc_page(GFP_KERNEL);
521 *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir);
522 if (dma_mapping_error(dev, *dma)) {
529 static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id,
532 struct gve_queue_page_list *qpl = &priv->qpls[id];
536 if (pages + priv->num_registered_pages > priv->max_registered_pages) {
537 netif_err(priv, drv, priv->dev,
538 "Reached max number of registered pages %llu > %llu\n",
539 pages + priv->num_registered_pages,
540 priv->max_registered_pages);
545 qpl->num_entries = pages;
546 qpl->pages = kvzalloc(pages * sizeof(*qpl->pages), GFP_KERNEL);
547 /* caller handles clean up */
550 qpl->page_buses = kvzalloc(pages * sizeof(*qpl->page_buses),
552 /* caller handles clean up */
553 if (!qpl->page_buses)
556 for (i = 0; i < pages; i++) {
557 err = gve_alloc_page(&priv->pdev->dev, &qpl->pages[i],
559 gve_qpl_dma_dir(priv, id));
560 /* caller handles clean up */
564 priv->num_registered_pages += pages;
569 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
570 enum dma_data_direction dir)
572 if (!dma_mapping_error(dev, dma))
573 dma_unmap_page(dev, dma, PAGE_SIZE, dir);
578 static void gve_free_queue_page_list(struct gve_priv *priv,
581 struct gve_queue_page_list *qpl = &priv->qpls[id];
586 if (!qpl->page_buses)
589 for (i = 0; i < qpl->num_entries; i++)
590 gve_free_page(&priv->pdev->dev, qpl->pages[i],
591 qpl->page_buses[i], gve_qpl_dma_dir(priv, id));
593 kfree(qpl->page_buses);
596 priv->num_registered_pages -= qpl->num_entries;
599 static int gve_alloc_qpls(struct gve_priv *priv)
601 int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
605 priv->qpls = kvzalloc(num_qpls * sizeof(*priv->qpls), GFP_KERNEL);
609 for (i = 0; i < gve_num_tx_qpls(priv); i++) {
610 err = gve_alloc_queue_page_list(priv, i,
611 priv->tx_pages_per_qpl);
615 for (; i < num_qpls; i++) {
616 err = gve_alloc_queue_page_list(priv, i,
617 priv->rx_pages_per_qpl);
622 priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(num_qpls) *
623 sizeof(unsigned long) * BITS_PER_BYTE;
624 priv->qpl_cfg.qpl_id_map = kvzalloc(BITS_TO_LONGS(num_qpls) *
625 sizeof(unsigned long), GFP_KERNEL);
626 if (!priv->qpl_cfg.qpl_id_map)
632 for (j = 0; j <= i; j++)
633 gve_free_queue_page_list(priv, j);
638 static void gve_free_qpls(struct gve_priv *priv)
640 int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
643 kfree(priv->qpl_cfg.qpl_id_map);
645 for (i = 0; i < num_qpls; i++)
646 gve_free_queue_page_list(priv, i);
651 /* Use this to schedule a reset when the device is capable of continuing
652 * to handle other requests in its current state. If it is not, do a reset
655 void gve_schedule_reset(struct gve_priv *priv)
657 gve_set_do_reset(priv);
658 queue_work(priv->gve_wq, &priv->service_task);
661 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up);
662 static int gve_reset_recovery(struct gve_priv *priv, bool was_up);
663 static void gve_turndown(struct gve_priv *priv);
664 static void gve_turnup(struct gve_priv *priv);
666 static int gve_open(struct net_device *dev)
668 struct gve_priv *priv = netdev_priv(dev);
671 err = gve_alloc_qpls(priv);
674 err = gve_alloc_rings(priv);
678 err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues);
681 err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues);
685 err = gve_register_qpls(priv);
688 err = gve_create_rings(priv);
691 gve_set_device_rings_ok(priv);
694 netif_carrier_on(dev);
698 gve_free_rings(priv);
704 /* This must have been called from a reset due to the rtnl lock
705 * so just return at this point.
707 if (gve_get_reset_in_progress(priv))
709 /* Otherwise reset before returning */
710 gve_reset_and_teardown(priv, true);
711 /* if this fails there is nothing we can do so just ignore the return */
712 gve_reset_recovery(priv, false);
713 /* return the original error */
717 static int gve_close(struct net_device *dev)
719 struct gve_priv *priv = netdev_priv(dev);
722 netif_carrier_off(dev);
723 if (gve_get_device_rings_ok(priv)) {
725 err = gve_destroy_rings(priv);
728 err = gve_unregister_qpls(priv);
731 gve_clear_device_rings_ok(priv);
734 gve_free_rings(priv);
739 /* This must have been called from a reset due to the rtnl lock
740 * so just return at this point.
742 if (gve_get_reset_in_progress(priv))
744 /* Otherwise reset before returning */
745 gve_reset_and_teardown(priv, true);
746 return gve_reset_recovery(priv, false);
749 int gve_adjust_queues(struct gve_priv *priv,
750 struct gve_queue_config new_rx_config,
751 struct gve_queue_config new_tx_config)
755 if (netif_carrier_ok(priv->dev)) {
756 /* To make this process as simple as possible we teardown the
757 * device, set the new configuration, and then bring the device
760 err = gve_close(priv->dev);
761 /* we have already tried to reset in close,
762 * just fail at this point
766 priv->tx_cfg = new_tx_config;
767 priv->rx_cfg = new_rx_config;
769 err = gve_open(priv->dev);
775 /* Set the config for the next up. */
776 priv->tx_cfg = new_tx_config;
777 priv->rx_cfg = new_rx_config;
781 netif_err(priv, drv, priv->dev,
782 "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n");
787 static void gve_turndown(struct gve_priv *priv)
791 if (netif_carrier_ok(priv->dev))
792 netif_carrier_off(priv->dev);
794 if (!gve_get_napi_enabled(priv))
797 /* Disable napi to prevent more work from coming in */
798 for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
799 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
800 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
802 napi_disable(&block->napi);
804 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
805 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
806 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
808 napi_disable(&block->napi);
812 netif_tx_disable(priv->dev);
814 gve_clear_napi_enabled(priv);
817 static void gve_turnup(struct gve_priv *priv)
821 /* Start the tx queues */
822 netif_tx_start_all_queues(priv->dev);
824 /* Enable napi and unmask interrupts for all queues */
825 for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
826 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
827 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
829 napi_enable(&block->napi);
830 iowrite32be(0, gve_irq_doorbell(priv, block));
832 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
833 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
834 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
836 napi_enable(&block->napi);
837 iowrite32be(0, gve_irq_doorbell(priv, block));
840 gve_set_napi_enabled(priv);
843 static void gve_tx_timeout(struct net_device *dev)
845 struct gve_priv *priv = netdev_priv(dev);
847 gve_schedule_reset(priv);
848 priv->tx_timeo_cnt++;
851 static const struct net_device_ops gve_netdev_ops = {
852 .ndo_start_xmit = gve_tx,
853 .ndo_open = gve_open,
854 .ndo_stop = gve_close,
855 .ndo_get_stats64 = gve_get_stats,
856 .ndo_tx_timeout = gve_tx_timeout,
859 static void gve_handle_status(struct gve_priv *priv, u32 status)
861 if (GVE_DEVICE_STATUS_RESET_MASK & status) {
862 dev_info(&priv->pdev->dev, "Device requested reset.\n");
863 gve_set_do_reset(priv);
867 static void gve_handle_reset(struct gve_priv *priv)
869 /* A service task will be scheduled at the end of probe to catch any
870 * resets that need to happen, and we don't want to reset until
873 if (gve_get_probe_in_progress(priv))
876 if (gve_get_do_reset(priv)) {
878 gve_reset(priv, false);
883 /* Handle NIC status register changes and reset requests */
884 static void gve_service_task(struct work_struct *work)
886 struct gve_priv *priv = container_of(work, struct gve_priv,
889 gve_handle_status(priv,
890 ioread32be(&priv->reg_bar0->device_status));
892 gve_handle_reset(priv);
895 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
900 /* Set up the adminq */
901 err = gve_adminq_alloc(&priv->pdev->dev, priv);
903 dev_err(&priv->pdev->dev,
904 "Failed to alloc admin queue: err=%d\n", err);
908 if (skip_describe_device)
911 /* Get the initial information we need from the device */
912 err = gve_adminq_describe_device(priv);
914 dev_err(&priv->pdev->dev,
915 "Could not get device information: err=%d\n", err);
918 if (priv->dev->max_mtu > PAGE_SIZE) {
919 priv->dev->max_mtu = PAGE_SIZE;
920 err = gve_adminq_set_mtu(priv, priv->dev->mtu);
922 netif_err(priv, drv, priv->dev, "Could not set mtu");
926 priv->dev->mtu = priv->dev->max_mtu;
927 num_ntfy = pci_msix_vec_count(priv->pdev);
929 dev_err(&priv->pdev->dev,
930 "could not count MSI-x vectors: err=%d\n", num_ntfy);
933 } else if (num_ntfy < GVE_MIN_MSIX) {
934 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n",
935 GVE_MIN_MSIX, num_ntfy);
940 priv->num_registered_pages = 0;
941 priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
942 /* gvnic has one Notification Block per MSI-x vector, except for the
945 priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1;
946 priv->mgmt_msix_idx = priv->num_ntfy_blks;
948 priv->tx_cfg.max_queues =
949 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2);
950 priv->rx_cfg.max_queues =
951 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2);
953 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
954 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
955 if (priv->default_num_queues > 0) {
956 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues,
957 priv->tx_cfg.num_queues);
958 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues,
959 priv->rx_cfg.num_queues);
962 netif_info(priv, drv, priv->dev, "TX queues %d, RX queues %d\n",
963 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
964 netif_info(priv, drv, priv->dev, "Max TX queues %d, Max RX queues %d\n",
965 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues);
968 err = gve_setup_device_resources(priv);
972 gve_adminq_free(&priv->pdev->dev, priv);
976 static void gve_teardown_priv_resources(struct gve_priv *priv)
978 gve_teardown_device_resources(priv);
979 gve_adminq_free(&priv->pdev->dev, priv);
982 static void gve_trigger_reset(struct gve_priv *priv)
984 /* Reset the device by releasing the AQ */
985 gve_adminq_release(priv);
988 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up)
990 gve_trigger_reset(priv);
991 /* With the reset having already happened, close cannot fail */
993 gve_close(priv->dev);
994 gve_teardown_priv_resources(priv);
997 static int gve_reset_recovery(struct gve_priv *priv, bool was_up)
1001 err = gve_init_priv(priv, true);
1005 err = gve_open(priv->dev);
1011 dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n");
1016 int gve_reset(struct gve_priv *priv, bool attempt_teardown)
1018 bool was_up = netif_carrier_ok(priv->dev);
1021 dev_info(&priv->pdev->dev, "Performing reset\n");
1022 gve_clear_do_reset(priv);
1023 gve_set_reset_in_progress(priv);
1024 /* If we aren't attempting to teardown normally, just go turndown and
1027 if (!attempt_teardown) {
1029 gve_reset_and_teardown(priv, was_up);
1031 /* Otherwise attempt to close normally */
1033 err = gve_close(priv->dev);
1034 /* If that fails reset as we did above */
1036 gve_reset_and_teardown(priv, was_up);
1038 /* Clean up any remaining resources */
1039 gve_teardown_priv_resources(priv);
1042 /* Set it all back up */
1043 err = gve_reset_recovery(priv, was_up);
1044 gve_clear_reset_in_progress(priv);
1048 static void gve_write_version(u8 __iomem *driver_version_register)
1050 const char *c = gve_version_prefix;
1053 writeb(*c, driver_version_register);
1057 c = gve_version_str;
1059 writeb(*c, driver_version_register);
1062 writeb('\n', driver_version_register);
1065 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
1067 int max_tx_queues, max_rx_queues;
1068 struct net_device *dev;
1069 __be32 __iomem *db_bar;
1070 struct gve_registers __iomem *reg_bar;
1071 struct gve_priv *priv;
1074 err = pci_enable_device(pdev);
1078 err = pci_request_regions(pdev, "gvnic-cfg");
1080 goto abort_with_enabled;
1082 pci_set_master(pdev);
1084 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1086 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err);
1087 goto abort_with_pci_region;
1090 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1093 "Failed to set consistent dma mask: err=%d\n", err);
1094 goto abort_with_pci_region;
1097 reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0);
1099 dev_err(&pdev->dev, "Failed to map pci bar!\n");
1101 goto abort_with_pci_region;
1104 db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0);
1106 dev_err(&pdev->dev, "Failed to map doorbell bar!\n");
1108 goto abort_with_reg_bar;
1111 gve_write_version(®_bar->driver_version);
1112 /* Get max queues to alloc etherdev */
1113 max_rx_queues = ioread32be(®_bar->max_tx_queues);
1114 max_tx_queues = ioread32be(®_bar->max_rx_queues);
1115 /* Alloc and setup the netdev and priv */
1116 dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues);
1118 dev_err(&pdev->dev, "could not allocate netdev\n");
1119 goto abort_with_db_bar;
1121 SET_NETDEV_DEV(dev, &pdev->dev);
1122 pci_set_drvdata(pdev, dev);
1123 dev->ethtool_ops = &gve_ethtool_ops;
1124 dev->netdev_ops = &gve_netdev_ops;
1125 /* advertise features */
1126 dev->hw_features = NETIF_F_HIGHDMA;
1127 dev->hw_features |= NETIF_F_SG;
1128 dev->hw_features |= NETIF_F_HW_CSUM;
1129 dev->hw_features |= NETIF_F_TSO;
1130 dev->hw_features |= NETIF_F_TSO6;
1131 dev->hw_features |= NETIF_F_TSO_ECN;
1132 dev->hw_features |= NETIF_F_RXCSUM;
1133 dev->hw_features |= NETIF_F_RXHASH;
1134 dev->features = dev->hw_features;
1135 dev->watchdog_timeo = 5 * HZ;
1136 dev->min_mtu = ETH_MIN_MTU;
1137 netif_carrier_off(dev);
1139 priv = netdev_priv(dev);
1142 priv->msg_enable = DEFAULT_MSG_LEVEL;
1143 priv->reg_bar0 = reg_bar;
1144 priv->db_bar2 = db_bar;
1145 priv->service_task_flags = 0x0;
1146 priv->state_flags = 0x0;
1148 gve_set_probe_in_progress(priv);
1149 priv->gve_wq = alloc_ordered_workqueue("gve", 0);
1150 if (!priv->gve_wq) {
1151 dev_err(&pdev->dev, "Could not allocate workqueue");
1153 goto abort_with_netdev;
1155 INIT_WORK(&priv->service_task, gve_service_task);
1156 priv->tx_cfg.max_queues = max_tx_queues;
1157 priv->rx_cfg.max_queues = max_rx_queues;
1159 err = gve_init_priv(priv, false);
1163 err = register_netdev(dev);
1167 dev_info(&pdev->dev, "GVE version %s\n", gve_version_str);
1168 gve_clear_probe_in_progress(priv);
1169 queue_work(priv->gve_wq, &priv->service_task);
1173 destroy_workqueue(priv->gve_wq);
1179 pci_iounmap(pdev, db_bar);
1182 pci_iounmap(pdev, reg_bar);
1184 abort_with_pci_region:
1185 pci_release_regions(pdev);
1188 pci_disable_device(pdev);
1191 EXPORT_SYMBOL(gve_probe);
1193 static void gve_remove(struct pci_dev *pdev)
1195 struct net_device *netdev = pci_get_drvdata(pdev);
1196 struct gve_priv *priv = netdev_priv(netdev);
1197 __be32 __iomem *db_bar = priv->db_bar2;
1198 void __iomem *reg_bar = priv->reg_bar0;
1200 unregister_netdev(netdev);
1201 gve_teardown_priv_resources(priv);
1202 destroy_workqueue(priv->gve_wq);
1203 free_netdev(netdev);
1204 pci_iounmap(pdev, db_bar);
1205 pci_iounmap(pdev, reg_bar);
1206 pci_release_regions(pdev);
1207 pci_disable_device(pdev);
1210 static const struct pci_device_id gve_id_table[] = {
1211 { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) },
1215 static struct pci_driver gvnic_driver = {
1217 .id_table = gve_id_table,
1219 .remove = gve_remove,
1222 module_pci_driver(gvnic_driver);
1224 MODULE_DEVICE_TABLE(pci, gve_id_table);
1225 MODULE_AUTHOR("Google, Inc.");
1226 MODULE_DESCRIPTION("gVNIC Driver");
1227 MODULE_LICENSE("Dual MIT/GPL");
1228 MODULE_VERSION(GVE_VERSION);