From 369ac9543ab0bd9e83b36b206fc22dbec71b07ff Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 1 Oct 2024 07:58:05 -0600 Subject: [PATCH] io_uring: allow resizing of hash table This is a WIP patch. Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 2 ++ io_uring/io_uring.c | 61 +++++++++++++++++++++++++++++++++- io_uring/io_uring.h | 1 + io_uring/poll.c | 25 ++++++++++++-- 4 files changed, 85 insertions(+), 4 deletions(-) diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 9c7e1d3f06e5..fe1587a0f3aa 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -68,11 +68,13 @@ struct io_file_table { struct io_hash_bucket { struct hlist_head list; + int nr_entries; } ____cacheline_aligned_in_smp; struct io_hash_table { struct io_hash_bucket *hbs; unsigned hash_bits; + unsigned long last_resize; }; /* diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index d7ad4ea5f40b..af2429b434a9 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -276,11 +276,70 @@ static int io_alloc_hash_table(struct io_hash_table *table, unsigned bits) } while (1); table->hash_bits = bits; - for (i = 0; i < hash_buckets; i++) + for (i = 0; i < hash_buckets; i++) { INIT_HLIST_HEAD(&table->hbs[i].list); + table->hbs[i].nr_entries = 0; + } + table->last_resize = jiffies; return 0; } +/* + * Re-hash entries on 'cur_table' into 'new_table', and free the buckets + * from the current table when done. + */ +static void io_swap_hash_tables(struct io_hash_table *cur_table, + struct io_hash_table *new_table) +{ + int i; + + for (i = 0; i < (1U << cur_table->hash_bits); i++) { + struct io_hash_bucket *hb = &cur_table->hbs[i]; + + while (!hlist_empty(&hb->list)) { + struct io_kiocb *req; + struct io_hash_bucket *new_hb; + u32 new_index; + + req = hlist_entry(hb->list.first, struct io_kiocb, + hash_node); + new_index = hash_long(req->cqe.user_data, + new_table->hash_bits); + new_hb = &new_table->hbs[new_index]; + hlist_del(&req->hash_node); + hlist_add_head(&req->hash_node, &new_hb->list); + new_hb->nr_entries++; + } + } + + kvfree(cur_table->hbs); + cur_table->hbs = new_table->hbs; + cur_table->hash_bits = new_table->hash_bits; + cur_table->last_resize = jiffies; + +} + +void io_resize_hash_table(struct io_ring_ctx *ctx) +{ + struct io_hash_table new_table = { }; + int ret, new_bits; + + /* Increase resize rate if we just resized */ + new_bits = ctx->cancel_table.hash_bits + 1; + if (jiffies - ctx->cancel_table.last_resize <= 1) + new_bits++; + + /* cap upper limit */ + if (new_bits > 16) + return; + + ret = io_alloc_hash_table(&new_table, new_bits); + if (ret) + return; + + io_swap_hash_tables(&ctx->cancel_table, &new_table); +} + static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) { struct io_ring_ctx *ctx; diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index 9d70b2cf7b1e..6ae37893fcdd 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -91,6 +91,7 @@ void tctx_task_work(struct callback_head *cb); __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd); int io_uring_alloc_task_context(struct task_struct *task, struct io_ring_ctx *ctx); +void io_resize_hash_table(struct io_ring_ctx *ctx); int io_ring_add_registered_file(struct io_uring_task *tctx, struct file *file, int start, int end); diff --git a/io_uring/poll.c b/io_uring/poll.c index 2d6698fb7400..a9ae48480b71 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -118,14 +118,20 @@ static struct io_poll *io_poll_get_single(struct io_kiocb *req) return &req->apoll->poll; } +#define MAX_HASH_ENTRIES 64 + static void io_poll_req_insert(struct io_kiocb *req) { struct io_hash_table *table = &req->ctx->cancel_table; u32 index = hash_long(req->cqe.user_data, table->hash_bits); + struct io_hash_bucket *hb = &table->hbs[index]; lockdep_assert_held(&req->ctx->uring_lock); - hlist_add_head(&req->hash_node, &table->hbs[index].list); + hlist_add_head(&req->hash_node, &hb->list); + hb->nr_entries++; + if (hb->nr_entries > MAX_HASH_ENTRIES) + io_resize_hash_table(req->ctx); } static void io_init_poll_iocb(struct io_poll *poll, __poll_t events) @@ -310,6 +316,19 @@ static int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts) return IOU_POLL_NO_ACTION; } +static void io_hash_del(struct io_kiocb *req) +{ + struct io_ring_ctx *ctx = req->ctx; + int bucket; + + if (!hash_hashed(&req->hash_node)) + return; + + hash_del(&req->hash_node); + bucket = hash_long(req->cqe.user_data, ctx->cancel_table.hash_bits); + ctx->cancel_table.hbs[bucket].nr_entries--; +} + void io_poll_task_func(struct io_kiocb *req, struct io_tw_state *ts) { int ret; @@ -323,7 +342,7 @@ void io_poll_task_func(struct io_kiocb *req, struct io_tw_state *ts) } io_poll_remove_entries(req); /* task_work always has ->uring_lock held */ - hash_del(&req->hash_node); + io_hash_del(req); if (req->opcode == IORING_OP_POLL_ADD) { if (ret == IOU_POLL_DONE) { @@ -785,7 +804,7 @@ static int io_poll_disarm(struct io_kiocb *req) if (!io_poll_get_ownership(req)) return -EALREADY; io_poll_remove_entries(req); - hash_del(&req->hash_node); + io_hash_del(req); return 0; } -- 2.25.1