io_uring: switch cancel_hash to use per entry spinlock
authorHao Xu <howeyxu@tencent.com>
Thu, 16 Jun 2022 09:22:02 +0000 (10:22 +0100)
committerJens Axboe <axboe@kernel.dk>
Mon, 25 Jul 2022 00:39:13 +0000 (18:39 -0600)
Add a new io_hash_bucket structure so that each bucket in cancel_hash
has separate spinlock. Use per entry lock for cancel_hash, this removes
some completion lock invocation and remove contension between different
cancel_hash entries.

Signed-off-by: Hao Xu <howeyxu@tencent.com>
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/05d1e135b0c8bce9d1441e6346776589e5783e26.1655371007.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
io_uring/cancel.c
io_uring/cancel.h
io_uring/fdinfo.c
io_uring/io_uring.c
io_uring/io_uring_types.h
io_uring/poll.c

index 83cceb52d82d644a61800bf7fcc83968f382a445..6f2888388a40ed10f94c7b0b102f86f6688dec81 100644 (file)
@@ -93,14 +93,14 @@ int io_try_cancel(struct io_kiocb *req, struct io_cancel_data *cd)
        if (!ret)
                return 0;
 
-       spin_lock(&ctx->completion_lock);
        ret = io_poll_cancel(ctx, cd);
        if (ret != -ENOENT)
                goto out;
+       spin_lock(&ctx->completion_lock);
        if (!(cd->flags & IORING_ASYNC_CANCEL_FD))
                ret = io_timeout_cancel(ctx, cd);
-out:
        spin_unlock(&ctx->completion_lock);
+out:
        return ret;
 }
 
@@ -192,3 +192,13 @@ done:
        io_req_set_res(req, ret, 0);
        return IOU_OK;
 }
+
+void init_hash_table(struct io_hash_bucket *hash_table, unsigned size)
+{
+       unsigned int i;
+
+       for (i = 0; i < size; i++) {
+               spin_lock_init(&hash_table[i].lock);
+               INIT_HLIST_HEAD(&hash_table[i].list);
+       }
+}
index 4f35d86963253f88f15b605358671a1eb0a9e53d..556a7dcf160e47e45d077530866cc6c2ed100ac1 100644 (file)
@@ -4,3 +4,9 @@ int io_async_cancel_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
 int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags);
 
 int io_try_cancel(struct io_kiocb *req, struct io_cancel_data *cd);
+void init_hash_table(struct io_hash_bucket *hash_table, unsigned size);
+
+struct io_hash_bucket {
+       spinlock_t              lock;
+       struct hlist_head       list;
+} ____cacheline_aligned_in_smp;
index fcedde4b4b1e1f15d63165eb0767d7b2047fba1a..f941c73f550259bb0ef4d7fd1c43b76978db32c7 100644 (file)
@@ -13,6 +13,7 @@
 #include "io_uring.h"
 #include "sqpoll.h"
 #include "fdinfo.h"
+#include "cancel.h"
 
 #ifdef CONFIG_PROC_FS
 static __cold int io_uring_show_cred(struct seq_file *m, unsigned int id,
@@ -157,17 +158,19 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx,
                mutex_unlock(&ctx->uring_lock);
 
        seq_puts(m, "PollList:\n");
-       spin_lock(&ctx->completion_lock);
        for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) {
-               struct hlist_head *list = &ctx->cancel_hash[i];
+               struct io_hash_bucket *hb = &ctx->cancel_hash[i];
                struct io_kiocb *req;
 
-               hlist_for_each_entry(req, list, hash_node)
+               spin_lock(&hb->lock);
+               hlist_for_each_entry(req, &hb->list, hash_node)
                        seq_printf(m, "  op=%d, task_works=%d\n", req->opcode,
                                        task_work_pending(req->task));
+               spin_unlock(&hb->lock);
        }
 
        seq_puts(m, "CqOverflowList:\n");
+       spin_lock(&ctx->completion_lock);
        list_for_each_entry(ocqe, &ctx->cq_overflow_list, list) {
                struct io_uring_cqe *cqe = &ocqe->cqe;
 
index 957a5bc1b528c9ab27e32ddfc67d9df6c0d42283..ac6946e3f174b57566e43f237cfddf62c2c88b87 100644 (file)
@@ -89,6 +89,7 @@
 #include "fdinfo.h"
 #include "kbuf.h"
 #include "rsrc.h"
+#include "cancel.h"
 
 #include "timeout.h"
 #include "poll.h"
@@ -260,11 +261,13 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
        if (hash_bits <= 0)
                hash_bits = 1;
        ctx->cancel_hash_bits = hash_bits;
-       ctx->cancel_hash = kmalloc((1U << hash_bits) * sizeof(struct hlist_head),
-                                       GFP_KERNEL);
+       ctx->cancel_hash =
+               kmalloc((1U << hash_bits) * sizeof(struct io_hash_bucket),
+                       GFP_KERNEL);
        if (!ctx->cancel_hash)
                goto err;
-       __hash_init(ctx->cancel_hash, 1U << hash_bits);
+
+       init_hash_table(ctx->cancel_hash, 1U << hash_bits);
 
        ctx->dummy_ubuf = kzalloc(sizeof(*ctx->dummy_ubuf), GFP_KERNEL);
        if (!ctx->dummy_ubuf)
index 4576ea8cad2e6c1b062dd872c519c085ce4c6b6a..1f8db2dd7af75216e32d1b065cdaf072a1065bff 100644 (file)
@@ -224,7 +224,7 @@ struct io_ring_ctx {
                 * manipulate the list, hence no extra locking is needed there.
                 */
                struct io_wq_work_list  iopoll_list;
-               struct hlist_head       *cancel_hash;
+               struct io_hash_bucket   *cancel_hash;
                unsigned                cancel_hash_bits;
                bool                    poll_multi_queue;
 
index fdb6b1101ffcb7729c89aa6d5280e19e087cd3e0..1511a26b412d17dacc0defd8f56438b5fba3556b 100644 (file)
@@ -19,6 +19,7 @@
 #include "opdef.h"
 #include "kbuf.h"
 #include "poll.h"
+#include "cancel.h"
 
 struct io_poll_update {
        struct file                     *file;
@@ -73,10 +74,22 @@ static struct io_poll *io_poll_get_single(struct io_kiocb *req)
 static void io_poll_req_insert(struct io_kiocb *req)
 {
        struct io_ring_ctx *ctx = req->ctx;
-       struct hlist_head *list;
+       u32 index = hash_long(req->cqe.user_data, ctx->cancel_hash_bits);
+       struct io_hash_bucket *hb = &ctx->cancel_hash[index];
 
-       list = &ctx->cancel_hash[hash_long(req->cqe.user_data, ctx->cancel_hash_bits)];
-       hlist_add_head(&req->hash_node, list);
+       spin_lock(&hb->lock);
+       hlist_add_head(&req->hash_node, &hb->list);
+       spin_unlock(&hb->lock);
+}
+
+static void io_poll_req_delete(struct io_kiocb *req, struct io_ring_ctx *ctx)
+{
+       u32 index = hash_long(req->cqe.user_data, ctx->cancel_hash_bits);
+       spinlock_t *lock = &ctx->cancel_hash[index].lock;
+
+       spin_lock(lock);
+       hash_del(&req->hash_node);
+       spin_unlock(lock);
 }
 
 static void io_init_poll_iocb(struct io_poll *poll, __poll_t events,
@@ -220,8 +233,8 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
        }
 
        io_poll_remove_entries(req);
+       io_poll_req_delete(req, ctx);
        spin_lock(&ctx->completion_lock);
-       hash_del(&req->hash_node);
        req->cqe.flags = 0;
        __io_req_complete_post(req);
        io_commit_cqring(ctx);
@@ -231,7 +244,6 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
 
 static void io_apoll_task_func(struct io_kiocb *req, bool *locked)
 {
-       struct io_ring_ctx *ctx = req->ctx;
        int ret;
 
        ret = io_poll_check_events(req, locked);
@@ -239,9 +251,7 @@ static void io_apoll_task_func(struct io_kiocb *req, bool *locked)
                return;
 
        io_poll_remove_entries(req);
-       spin_lock(&ctx->completion_lock);
-       hash_del(&req->hash_node);
-       spin_unlock(&ctx->completion_lock);
+       io_poll_req_delete(req, req->ctx);
 
        if (!ret)
                io_req_task_submit(req, locked);
@@ -437,9 +447,7 @@ static int __io_arm_poll_handler(struct io_kiocb *req,
                return 0;
        }
 
-       spin_lock(&ctx->completion_lock);
        io_poll_req_insert(req);
-       spin_unlock(&ctx->completion_lock);
 
        if (mask && (poll->events & EPOLLET)) {
                /* can't multishot if failed, just queue the event we've got */
@@ -540,32 +548,31 @@ __cold bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk,
        bool found = false;
        int i;
 
-       spin_lock(&ctx->completion_lock);
        for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) {
-               struct hlist_head *list;
+               struct io_hash_bucket *hb = &ctx->cancel_hash[i];
 
-               list = &ctx->cancel_hash[i];
-               hlist_for_each_entry_safe(req, tmp, list, hash_node) {
+               spin_lock(&hb->lock);
+               hlist_for_each_entry_safe(req, tmp, &hb->list, hash_node) {
                        if (io_match_task_safe(req, tsk, cancel_all)) {
                                hlist_del_init(&req->hash_node);
                                io_poll_cancel_req(req);
                                found = true;
                        }
                }
+               spin_unlock(&hb->lock);
        }
-       spin_unlock(&ctx->completion_lock);
        return found;
 }
 
 static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, bool poll_only,
                                     struct io_cancel_data *cd)
-       __must_hold(&ctx->completion_lock)
 {
-       struct hlist_head *list;
        struct io_kiocb *req;
+       u32 index = hash_long(cd->data, ctx->cancel_hash_bits);
+       struct io_hash_bucket *hb = &ctx->cancel_hash[index];
 
-       list = &ctx->cancel_hash[hash_long(cd->data, ctx->cancel_hash_bits)];
-       hlist_for_each_entry(req, list, hash_node) {
+       spin_lock(&hb->lock);
+       hlist_for_each_entry(req, &hb->list, hash_node) {
                if (cd->data != req->cqe.user_data)
                        continue;
                if (poll_only && req->opcode != IORING_OP_POLL_ADD)
@@ -577,21 +584,21 @@ static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, bool poll_only,
                }
                return req;
        }
+       spin_unlock(&hb->lock);
        return NULL;
 }
 
 static struct io_kiocb *io_poll_file_find(struct io_ring_ctx *ctx,
                                          struct io_cancel_data *cd)
-       __must_hold(&ctx->completion_lock)
 {
        struct io_kiocb *req;
        int i;
 
        for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) {
-               struct hlist_head *list;
+               struct io_hash_bucket *hb = &ctx->cancel_hash[i];
 
-               list = &ctx->cancel_hash[i];
-               hlist_for_each_entry(req, list, hash_node) {
+               spin_lock(&hb->lock);
+               hlist_for_each_entry(req, &hb->list, hash_node) {
                        if (!(cd->flags & IORING_ASYNC_CANCEL_ANY) &&
                            req->file != cd->file)
                                continue;
@@ -600,12 +607,12 @@ static struct io_kiocb *io_poll_file_find(struct io_ring_ctx *ctx,
                        req->work.cancel_seq = cd->seq;
                        return req;
                }
+               spin_unlock(&hb->lock);
        }
        return NULL;
 }
 
 static bool io_poll_disarm(struct io_kiocb *req)
-       __must_hold(&ctx->completion_lock)
 {
        if (!io_poll_get_ownership(req))
                return false;
@@ -615,17 +622,23 @@ static bool io_poll_disarm(struct io_kiocb *req)
 }
 
 int io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd)
-       __must_hold(&ctx->completion_lock)
 {
        struct io_kiocb *req;
+       u32 index;
+       spinlock_t *lock;
 
        if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_ANY))
                req = io_poll_file_find(ctx, cd);
        else
                req = io_poll_find(ctx, false, cd);
-       if (!req)
+       if (!req) {
                return -ENOENT;
+       } else {
+               index = hash_long(req->cqe.user_data, ctx->cancel_hash_bits);
+               lock = &ctx->cancel_hash[index].lock;
+       }
        io_poll_cancel_req(req);
+       spin_unlock(lock);
        return 0;
 }
 
@@ -719,18 +732,23 @@ int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags)
        struct io_poll_update *poll_update = io_kiocb_to_cmd(req);
        struct io_cancel_data cd = { .data = poll_update->old_user_data, };
        struct io_ring_ctx *ctx = req->ctx;
+       u32 index = hash_long(cd.data, ctx->cancel_hash_bits);
+       spinlock_t *lock = &ctx->cancel_hash[index].lock;
        struct io_kiocb *preq;
        int ret2, ret = 0;
        bool locked;
 
-       spin_lock(&ctx->completion_lock);
        preq = io_poll_find(ctx, true, &cd);
-       if (!preq || !io_poll_disarm(preq)) {
-               spin_unlock(&ctx->completion_lock);
-               ret = preq ? -EALREADY : -ENOENT;
+       if (!preq) {
+               ret = -ENOENT;
+               goto out;
+       }
+       ret2 = io_poll_disarm(preq);
+       spin_unlock(lock);
+       if (!ret2) {
+               ret = -EALREADY;
                goto out;
        }
-       spin_unlock(&ctx->completion_lock);
 
        if (poll_update->update_events || poll_update->update_user_data) {
                /* only mask one event flags, keep behavior flags */