io_uring: switch to per-cpu task_work

author Jens Axboe <axboe@kernel.dk>

Thu, 16 Jun 2022 17:20:12 +0000 (11:20 -0600)

committer Jens Axboe <axboe@kernel.dk>

Fri, 17 Jun 2022 14:26:05 +0000 (08:26 -0600)
author Jens Axboe <axboe@kernel.dk>
Thu, 16 Jun 2022 17:20:12 +0000 (11:20 -0600)
committer Jens Axboe <axboe@kernel.dk>
Fri, 17 Jun 2022 14:26:05 +0000 (08:26 -0600)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c

index 13d177ab9cd8570cc978cfbc63ecb41e03431bb6..fd166fb249eb3c1f503868234fd1e344bdb44fce 100644 (file)
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1011,7 +1011,6 @@ void tctx_task_work(struct callback_head *cb)
         bool uring_locked = false;
         struct io_ring_ctx *ctx = NULL;
         struct tctx_tw *tw = container_of(cb, struct tctx_tw, task_work);
-       struct io_uring_task *tctx = container_of(tw, struct io_uring_task, tw);
  
         while (1) {
                 struct io_wq_work_node *node;
@@ -1035,7 +1034,7 @@ void tctx_task_work(struct callback_head *cb)
         ctx_flush_and_put(ctx, &uring_locked);
  
         /* relaxed read is enough as only the task itself sets ->in_idle */
-       if (unlikely(atomic_read(&tctx->in_idle)))
+       if (unlikely(atomic_read(&tw->tctx->in_idle)))
                 io_uring_drop_tctx_refs(current);
  }
  
@@ -1043,12 +1042,15 @@ void io_req_task_work_add(struct io_kiocb *req)
  {
         struct io_uring_task *tctx = req->task->io_uring;
         struct io_ring_ctx *ctx = req->ctx;
-       struct tctx_tw *tw = &tctx->tw;
         struct io_wq_work_node *node;
         unsigned long flags;
+       struct tctx_tw *tw;
         bool running;
  
-       spin_lock_irqsave(&tw->task_lock, flags);
+       local_irq_save(flags);
+       tw = this_cpu_ptr(tctx->tw);
+
+       spin_lock(&tw->task_lock);
         wq_list_add_tail(&req->io_task_work.node, &tw->task_list);
         running = tw->task_running;
         if (!running)
diff --git a/io_uring/tctx.c b/io_uring/tctx.c

index 00a2fc8ed11036ab1efe373fc3644b9bb0111b87..7dc653b19e617d314850914e10ece825f1e0b3d5 100644 (file)
--- a/io_uring/tctx.c
+++ b/io_uring/tctx.c
@@ -53,6 +53,7 @@ void __io_uring_free(struct task_struct *tsk)
         WARN_ON_ONCE(tctx->cached_refs);
  
         percpu_counter_destroy(&tctx->inflight);
+       free_percpu(tctx->tw);
         kfree(tctx);
         tsk->io_uring = NULL;
  }
@@ -61,7 +62,7 @@ __cold int io_uring_alloc_task_context(struct task_struct *task,
                                        struct io_ring_ctx *ctx)
  {
         struct io_uring_task *tctx;
-       int ret;
+       int ret, cpu;
  
         tctx = kzalloc(sizeof(*tctx), GFP_KERNEL);
         if (unlikely(!tctx))
@@ -73,22 +74,36 @@ __cold int io_uring_alloc_task_context(struct task_struct *task,
                 return ret;
         }
  
+       tctx->tw = alloc_percpu(struct tctx_tw);
+       if (!tctx->tw) {
+               percpu_counter_destroy(&tctx->inflight);
+               kfree(tctx);
+               return -ENOMEM;
+       }
+
         tctx->io_wq = io_init_wq_offload(ctx, task);
         if (IS_ERR(tctx->io_wq)) {
                 ret = PTR_ERR(tctx->io_wq);
                 percpu_counter_destroy(&tctx->inflight);
+               free_percpu(tctx->tw);
                 kfree(tctx);
                 return ret;
         }
  
+       for_each_possible_cpu(cpu) {
+               struct tctx_tw *tw = per_cpu_ptr(tctx->tw, cpu);
+
+               spin_lock_init(&tw->task_lock);
+               INIT_WQ_LIST(&tw->task_list);
+               init_task_work(&tw->task_work, tctx_task_work);
+               tw->tctx = tctx;
+       }
+
         xa_init(&tctx->xa);
         init_waitqueue_head(&tctx->wait);
         atomic_set(&tctx->in_idle, 0);
         atomic_set(&tctx->inflight_tracked, 0);
         task->io_uring = tctx;
-       spin_lock_init(&tctx->tw.task_lock);
-       INIT_WQ_LIST(&tctx->tw.task_list);
-       init_task_work(&tctx->tw.task_work, tctx_task_work);
         return 0;
  }
  
diff --git a/io_uring/tctx.h b/io_uring/tctx.h

index b1cab2e84b16f7345eef1044a7ff9f6f4edd73de..c50432906dc85360c78335c5fce2fc882a29321e 100644 (file)
--- a/io_uring/tctx.h
+++ b/io_uring/tctx.h
@@ -1,5 +1,7 @@
  // SPDX-License-Identifier: GPL-2.0
  
+#include <linux/percpu.h>
+
  /*
   * Arbitrary limit, can be raised if need be
   */
@@ -9,6 +11,7 @@ struct tctx_tw {
         spinlock_t              task_lock;
         struct io_wq_work_list  task_list;
         struct callback_head    task_work;
+       struct io_uring_task    *tctx;
         bool                    task_running;
  };
  
@@ -23,7 +26,7 @@ struct io_uring_task {
         atomic_t                inflight_tracked;
         atomic_t                in_idle;
  
-       struct tctx_tw          tw;
+       struct __percpu tctx_tw *tw;
  
         struct file             *registered_rings[IO_RINGFD_REG_MAX];
  };
author	Jens Axboe <axboe@kernel.dk>
	Thu, 16 Jun 2022 17:20:12 +0000 (11:20 -0600)
committer	Jens Axboe <axboe@kernel.dk>
	Fri, 17 Jun 2022 14:26:05 +0000 (08:26 -0600)
io_uring/io_uring.c		patch \| blob \| blame \| history
io_uring/tctx.c		patch \| blob \| blame \| history
io_uring/tctx.h		patch \| blob \| blame \| history