From 45774c5b8ec273f8f79f196da575c5b8ea5bc926 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 25 Jul 2025 21:45:25 -0600 Subject: [PATCH] io_uring/chan: cache consumer head loads Posting a message on the channel currently requires reading the destination to know how far along it is. But in practice, this only needs to be done every time the tail has caught up. Initialize a cached_head to be that of the ring size, and use the cached head when posting an event. If the cached entries are used up, do a proper c->head read and update the cached_head again. This greatly reduces the cross traffic on the posting side, by avoiding pulling in the consumer ring head entry until it's required. Signed-off-by: Jens Axboe --- io_uring/chan.c | 16 +++++++++++++++- io_uring/chan.h | 2 ++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/io_uring/chan.c b/io_uring/chan.c index ebda7179544f..3d1454e8814b 100644 --- a/io_uring/chan.c +++ b/io_uring/chan.c @@ -94,6 +94,8 @@ static struct io_queue_chan *__io_register_queue_chan(struct io_ring_ctx *ctx, atomic_set(&c->refs, 2); c->nentries = chan->nentries; c->mask = chan->nentries - 1; + c->cached_head = 0; + c->cached_nr = chan->nentries; c->req.ctx = dst; ret = xa_alloc(&ctx->xa_src_chan, &ids->src_id, c, lim, GFP_KERNEL_ACCOUNT); @@ -358,8 +360,20 @@ is_dead: goto err; } - head = smp_load_acquire(&c->head); + /* + * If we have cached head entries, there's no need to pull in the + * cacheline that the consumer dirties all the time. If no more + * cached entries exists, acquire the actual head and update the + * cached count. + */ tail = c->tail; + if (c->cached_nr) { + head = c->cached_head++; + c->cached_nr--; + } else { + head = c->cached_head = smp_load_acquire(&c->head); + c->cached_nr = (tail - head) & c->mask; + } if (tail - head >= c->nentries) { ret = -EXFULL; goto err; diff --git a/io_uring/chan.h b/io_uring/chan.h index d09b449e6d46..d7c4ca4b69e8 100644 --- a/io_uring/chan.h +++ b/io_uring/chan.h @@ -19,6 +19,8 @@ struct io_queue_chan { __u32 nentries; __u32 mask; __u32 tail; + __u32 cached_head; + __u32 cached_nr; __u32 resp_id; atomic_t flags; struct io_ring_ctx __rcu *dst_ring; -- 2.25.1