io_uring: allow resizing of hash table io_uring-poll-table
authorJens Axboe <axboe@kernel.dk>
Tue, 1 Oct 2024 13:58:05 +0000 (07:58 -0600)
committerJens Axboe <axboe@kernel.dk>
Mon, 14 Oct 2024 15:05:00 +0000 (09:05 -0600)
This is a WIP patch.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
include/linux/io_uring_types.h
io_uring/io_uring.c
io_uring/io_uring.h
io_uring/poll.c

index 9c7e1d3f06e5230b323062e598ea9f981bdb4058..fe1587a0f3aa7a75fb51007448f697605f64a791 100644 (file)
@@ -68,11 +68,13 @@ struct io_file_table {
 
 struct io_hash_bucket {
        struct hlist_head       list;
+       int                     nr_entries;
 } ____cacheline_aligned_in_smp;
 
 struct io_hash_table {
        struct io_hash_bucket   *hbs;
        unsigned                hash_bits;
+       unsigned long           last_resize;
 };
 
 /*
index d7ad4ea5f40b2ae6e5c5b4d494bc3fb2cdd3d051..af2429b434a939efa38651310450ce910b33f453 100644 (file)
@@ -276,11 +276,70 @@ static int io_alloc_hash_table(struct io_hash_table *table, unsigned bits)
        } while (1);
 
        table->hash_bits = bits;
-       for (i = 0; i < hash_buckets; i++)
+       for (i = 0; i < hash_buckets; i++) {
                INIT_HLIST_HEAD(&table->hbs[i].list);
+               table->hbs[i].nr_entries = 0;
+       }
+       table->last_resize = jiffies;
        return 0;
 }
 
+/*
+ * Re-hash entries on 'cur_table' into 'new_table', and free the buckets
+ * from the current table when done.
+ */
+static void io_swap_hash_tables(struct io_hash_table *cur_table,
+                               struct io_hash_table *new_table)
+{
+       int i;
+
+       for (i = 0; i < (1U << cur_table->hash_bits); i++) {
+               struct io_hash_bucket *hb = &cur_table->hbs[i];
+
+               while (!hlist_empty(&hb->list)) {
+                       struct io_kiocb *req;
+                       struct io_hash_bucket *new_hb;
+                       u32 new_index;
+
+                       req = hlist_entry(hb->list.first, struct io_kiocb,
+                                               hash_node);
+                       new_index = hash_long(req->cqe.user_data,
+                                               new_table->hash_bits);
+                       new_hb = &new_table->hbs[new_index];
+                       hlist_del(&req->hash_node);
+                       hlist_add_head(&req->hash_node, &new_hb->list);
+                       new_hb->nr_entries++;
+               }
+       }
+
+       kvfree(cur_table->hbs);
+       cur_table->hbs = new_table->hbs;
+       cur_table->hash_bits = new_table->hash_bits;
+       cur_table->last_resize = jiffies;
+
+}
+
+void io_resize_hash_table(struct io_ring_ctx *ctx)
+{
+       struct io_hash_table new_table = { };
+       int ret, new_bits;
+
+       /* Increase resize rate if we just resized */
+       new_bits = ctx->cancel_table.hash_bits + 1;
+       if (jiffies - ctx->cancel_table.last_resize <= 1)
+               new_bits++;
+
+       /* cap upper limit */
+       if (new_bits > 16)
+               return;
+
+       ret = io_alloc_hash_table(&new_table, new_bits);
+       if (ret)
+               return;
+
+       io_swap_hash_tables(&ctx->cancel_table, &new_table);
+}
+
 static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 {
        struct io_ring_ctx *ctx;
index 9d70b2cf7b1ece54c1a2579000315bc22ecd1cb5..6ae37893fcdd27fe53c17caeabe00914322c407f 100644 (file)
@@ -91,6 +91,7 @@ void tctx_task_work(struct callback_head *cb);
 __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd);
 int io_uring_alloc_task_context(struct task_struct *task,
                                struct io_ring_ctx *ctx);
+void io_resize_hash_table(struct io_ring_ctx *ctx);
 
 int io_ring_add_registered_file(struct io_uring_task *tctx, struct file *file,
                                     int start, int end);
index 2d6698fb740008e443a303c55f2584a345b59eaf..a9ae48480b719d6044009c43ee29b77f06a6b210 100644 (file)
@@ -118,14 +118,20 @@ static struct io_poll *io_poll_get_single(struct io_kiocb *req)
        return &req->apoll->poll;
 }
 
+#define MAX_HASH_ENTRIES       64
+
 static void io_poll_req_insert(struct io_kiocb *req)
 {
        struct io_hash_table *table = &req->ctx->cancel_table;
        u32 index = hash_long(req->cqe.user_data, table->hash_bits);
+       struct io_hash_bucket *hb = &table->hbs[index];
 
        lockdep_assert_held(&req->ctx->uring_lock);
 
-       hlist_add_head(&req->hash_node, &table->hbs[index].list);
+       hlist_add_head(&req->hash_node, &hb->list);
+       hb->nr_entries++;
+       if (hb->nr_entries > MAX_HASH_ENTRIES)
+               io_resize_hash_table(req->ctx);
 }
 
 static void io_init_poll_iocb(struct io_poll *poll, __poll_t events)
@@ -310,6 +316,19 @@ static int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts)
        return IOU_POLL_NO_ACTION;
 }
 
+static void io_hash_del(struct io_kiocb *req)
+{
+       struct io_ring_ctx *ctx = req->ctx;
+       int bucket;
+
+       if (!hash_hashed(&req->hash_node))
+               return;
+
+       hash_del(&req->hash_node);
+       bucket = hash_long(req->cqe.user_data, ctx->cancel_table.hash_bits);
+       ctx->cancel_table.hbs[bucket].nr_entries--;
+}
+
 void io_poll_task_func(struct io_kiocb *req, struct io_tw_state *ts)
 {
        int ret;
@@ -323,7 +342,7 @@ void io_poll_task_func(struct io_kiocb *req, struct io_tw_state *ts)
        }
        io_poll_remove_entries(req);
        /* task_work always has ->uring_lock held */
-       hash_del(&req->hash_node);
+       io_hash_del(req);
 
        if (req->opcode == IORING_OP_POLL_ADD) {
                if (ret == IOU_POLL_DONE) {
@@ -785,7 +804,7 @@ static int io_poll_disarm(struct io_kiocb *req)
        if (!io_poll_get_ownership(req))
                return -EALREADY;
        io_poll_remove_entries(req);
-       hash_del(&req->hash_node);
+       io_hash_del(req);
        return 0;
 }