In order to make possible the implementation of per-queue ndo hooks,
gve_turnup was changed in a previous patch to account for queues already
having some unprocessed descriptors: it does a one-off napi_schdule to
handle them. If conditions of consistent high traffic persist in the
immediate aftermath of this, the poll routine for a queue can be "stuck"
on the cpu on which the ndo hooks ran, instead of the cpu its irq has
affinity with.
This situation is exacerbated by the fact that the ndo hooks for all the
queues are invoked on the same cpu, potentially causing all the napi
poll routines to be residing on the same cpu.
A self correcting mechanism in the poll method itself solves this
problem.
Tested-by: Mina Almasry <almasrymina@google.com>
Reviewed-by: Praveen Kaligineedi <pkaligineedi@google.com>
Reviewed-by: Harshitha Ramamurthy <hramamurthy@google.com>
Signed-off-by: Shailend Chand <shailend@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
struct gve_priv *priv;
struct gve_tx_ring *tx; /* tx rings on this block */
struct gve_rx_ring *rx; /* rx rings on this block */
struct gve_priv *priv;
struct gve_tx_ring *tx; /* tx rings on this block */
struct gve_rx_ring *rx; /* rx rings on this block */
};
/* Tracks allowed and current queue settings */
};
/* Tracks allowed and current queue settings */
#include <linux/etherdevice.h>
#include <linux/filter.h>
#include <linux/interrupt.h>
#include <linux/etherdevice.h>
#include <linux/filter.h>
#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/sched.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/sched.h>
+static int gve_is_napi_on_home_cpu(struct gve_priv *priv, u32 irq)
+{
+ int cpu_curr = smp_processor_id();
+ const struct cpumask *aff_mask;
+
+ aff_mask = irq_get_effective_affinity_mask(irq);
+ if (unlikely(!aff_mask))
+ return 1;
+
+ return cpumask_test_cpu(cpu_curr, aff_mask);
+}
+
int gve_napi_poll(struct napi_struct *napi, int budget)
{
struct gve_notify_block *block;
int gve_napi_poll(struct napi_struct *napi, int budget)
{
struct gve_notify_block *block;
reschedule |= work_done == budget;
}
reschedule |= work_done == budget;
}
- if (reschedule)
- return budget;
+ if (reschedule) {
+ /* Reschedule by returning budget only if already on the correct
+ * cpu.
+ */
+ if (likely(gve_is_napi_on_home_cpu(priv, block->irq)))
+ return budget;
+
+ /* If not on the cpu with which this queue's irq has affinity
+ * with, we avoid rescheduling napi and arm the irq instead so
+ * that napi gets rescheduled back eventually onto the right
+ * cpu.
+ */
+ if (work_done == budget)
+ work_done--;
+ }
if (likely(napi_complete_done(napi, work_done))) {
/* Enable interrupts again.
if (likely(napi_complete_done(napi, work_done))) {
/* Enable interrupts again.
"Failed to receive msix vector %d\n", i);
goto abort_with_some_ntfy_blocks;
}
"Failed to receive msix vector %d\n", i);
goto abort_with_some_ntfy_blocks;
}
+ block->irq = priv->msix_vectors[msix_idx].vector;
irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
get_cpu_mask(i % active_cpus));
block->irq_db_index = &priv->irq_db_indices[i].index;
irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
get_cpu_mask(i % active_cpus));
block->irq_db_index = &priv->irq_db_indices[i].index;
irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
NULL);
free_irq(priv->msix_vectors[msix_idx].vector, block);
irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
NULL);
free_irq(priv->msix_vectors[msix_idx].vector, block);
}
kvfree(priv->ntfy_blocks);
priv->ntfy_blocks = NULL;
}
kvfree(priv->ntfy_blocks);
priv->ntfy_blocks = NULL;
irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
NULL);
free_irq(priv->msix_vectors[msix_idx].vector, block);
irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
NULL);
free_irq(priv->msix_vectors[msix_idx].vector, block);
}
free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
kvfree(priv->ntfy_blocks);
}
free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
kvfree(priv->ntfy_blocks);