summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2021-08-06 14:04:31 -0600
committerJens Axboe <axboe@kernel.dk>2021-08-09 09:04:17 -0600
commitb26d1744f1efd78d137e732bbe4eef4ebb098071 (patch)
treec0b13f9926991f46824f0656e162f0b0c74cb70d
parent5ac78a0ed900418cc8ca27ced66742199566f997 (diff)
io_uring: be smarter about waking multiple CQ ring waiters
Currently we only wake the first waiter, even if we have enough entries posted to satisfy multiple waiters. Improve that situation so that every waiter knows how much the CQ tail has to advance before they can be safely woken up. With this change, if we have N waiters each asking for 1 event and we get 4 completions, then we wake up 4 waiters. If we have N waiters asking for 2 completions and we get 4 completions, then we wake up the first two. Previously, only the first waiter would've been woken up. Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--fs/io_uring.c20
1 files changed, 12 insertions, 8 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 0c3e90f974e9..c9e415671dc0 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1436,11 +1436,13 @@ static inline bool io_should_trigger_evfd(struct io_ring_ctx *ctx)
static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
{
- /* see waitqueue_active() comment */
- smp_mb();
-
- if (waitqueue_active(&ctx->cq_wait))
- wake_up(&ctx->cq_wait);
+ /*
+ * wake_up_all() may seem excessive, but io_wake_function() and
+ * io_should_wake() handle the termination of the loop and only
+ * wake as many waiters as we need to.
+ */
+ if (wq_has_sleeper(&ctx->cq_wait))
+ wake_up_all(&ctx->cq_wait);
if (ctx->sq_data && waitqueue_active(&ctx->sq_data->wait))
wake_up(&ctx->sq_data->wait);
if (io_should_trigger_evfd(ctx))
@@ -6974,20 +6976,21 @@ static int io_sq_thread(void *data)
struct io_wait_queue {
struct wait_queue_entry wq;
struct io_ring_ctx *ctx;
- unsigned to_wait;
+ unsigned cq_tail;
unsigned nr_timeouts;
};
static inline bool io_should_wake(struct io_wait_queue *iowq)
{
struct io_ring_ctx *ctx = iowq->ctx;
+ int tail = ctx->cached_cq_tail + atomic_read(&ctx->cq_timeouts);
/*
* Wake up if we have enough events, or if a timeout occurred since we
* started waiting. For timeouts, we always want to return to userspace,
* regardless of event count.
*/
- return io_cqring_events(ctx) >= iowq->to_wait ||
+ return tail >= (int) iowq->cq_tail ||
atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts;
}
@@ -7051,7 +7054,6 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
.entry = LIST_HEAD_INIT(iowq.wq.entry),
},
.ctx = ctx,
- .to_wait = min_events,
};
struct io_rings *rings = ctx->rings;
signed long timeout = MAX_SCHEDULE_TIMEOUT;
@@ -7087,6 +7089,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
}
iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts);
+ iowq.cq_tail = READ_ONCE(ctx->rings->cq.head) + min_events +
+ iowq.nr_timeouts;
trace_io_uring_cqring_wait(ctx, min_events);
do {
/* if we can't even flush overflow, don't wait for more */