io_uring: complete request via task work in case of DEFER_TASKRUN

[linux-block.git] / io_uring / io_uring.c
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c

index 1df68da89f998660d54bc4999430f6219000b8a2..4a865f0e85d0b8116c6ef3bd37b5f6db4af29835 100644 (file)
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -719,7 +719,7 @@ static void io_put_task_remote(struct task_struct *task, int nr)
         struct io_uring_task *tctx = task->io_uring;
  
         percpu_counter_sub(&tctx->inflight, nr);
-       if (unlikely(atomic_read(&tctx->in_idle)))
+       if (unlikely(atomic_read(&tctx->in_cancel)))
                 wake_up(&tctx->wait);
         put_task_struct_many(task, nr);
  }
@@ -998,7 +998,7 @@ static void __io_req_complete_post(struct io_kiocb *req)
  
  void io_req_complete_post(struct io_kiocb *req, unsigned issue_flags)
  {
-       if (req->ctx->task_complete && (issue_flags & IO_URING_F_IOWQ)) {
+       if (req->ctx->task_complete && req->ctx->submitter_task != current) {
                 req->io_task_work.func = io_req_task_complete;
                 io_req_task_work_add(req);
         } else if (!(issue_flags & IO_URING_F_UNLOCKED) ||
@@ -1258,8 +1258,8 @@ void tctx_task_work(struct callback_head *cb)
  
         ctx_flush_and_put(ctx, &uring_locked);
  
-       /* relaxed read is enough as only the task itself sets ->in_idle */
-       if (unlikely(atomic_read(&tctx->in_idle)))
+       /* relaxed read is enough as only the task itself sets ->in_cancel */
+       if (unlikely(atomic_read(&tctx->in_cancel)))
                 io_uring_drop_tctx_refs(current);
  
         trace_io_uring_task_work_run(tctx, count, loops);
@@ -1285,17 +1285,15 @@ static void io_req_local_work_add(struct io_kiocb *req)
  
         percpu_ref_get(&ctx->refs);
  
-       if (!llist_add(&req->io_task_work.node, &ctx->work_llist)) {
-               percpu_ref_put(&ctx->refs);
-               return;
-       }
+       if (!llist_add(&req->io_task_work.node, &ctx->work_llist))
+               goto put_ref;
+
         /* needed for the following wake up */
         smp_mb__after_atomic();
  
-       if (unlikely(atomic_read(&req->task->io_uring->in_idle))) {
+       if (unlikely(atomic_read(&req->task->io_uring->in_cancel))) {
                 io_move_task_work_from_local(ctx);
-               percpu_ref_put(&ctx->refs);
-               return;
+               goto put_ref;
         }
  
         if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
@@ -1305,6 +1303,8 @@ static void io_req_local_work_add(struct io_kiocb *req)
  
         if (READ_ONCE(ctx->cq_waiting))
                 wake_up_state(ctx->submitter_task, TASK_INTERRUPTIBLE);
+
+put_ref:
         percpu_ref_put(&ctx->refs);
  }
  
@@ -1499,14 +1499,14 @@ void io_free_batch_list(struct io_ring_ctx *ctx, struct io_wq_work_node *node)
  static void __io_submit_flush_completions(struct io_ring_ctx *ctx)
         __must_hold(&ctx->uring_lock)
  {
-       struct io_wq_work_node *node, *prev;
         struct io_submit_state *state = &ctx->submit_state;
+       struct io_wq_work_node *node;
  
         __io_cq_lock(ctx);
         /* must come first to preserve CQE ordering in failure cases */
         if (state->cqes_count)
                 __io_flush_post_cqes(ctx);
-       wq_list_for_each(node, prev, &state->compl_reqs) {
+       __wq_list_for_each(node, &state->compl_reqs) {
                 struct io_kiocb *req = container_of(node, struct io_kiocb,
                                             comp_list);
  
@@ -1777,7 +1777,7 @@ int io_req_prep_async(struct io_kiocb *req)
         const struct io_issue_def *def = &io_issue_defs[req->opcode];
  
         /* assign early for deferred execution for non-fixed file */
-       if (def->needs_file && !(req->flags & REQ_F_FIXED_FILE))
+       if (def->needs_file && !(req->flags & REQ_F_FIXED_FILE) && !req->file)
                 req->file = io_file_get_normal(req, req->cqe.fd);
         if (!cdef->prep_async)
                 return 0;
@@ -2789,8 +2789,8 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
         io_eventfd_unregister(ctx);
         io_alloc_cache_free(&ctx->apoll_cache, io_apoll_cache_free);
         io_alloc_cache_free(&ctx->netmsg_cache, io_netmsg_cache_free);
-       mutex_unlock(&ctx->uring_lock);
         io_destroy_buffers(ctx);
+       mutex_unlock(&ctx->uring_lock);
         if (ctx->sq_creds)
                 put_cred(ctx->sq_creds);
         if (ctx->submitter_task)
@@ -2937,12 +2937,12 @@ static __cold void io_tctx_exit_cb(struct callback_head *cb)
  
         work = container_of(cb, struct io_tctx_exit, task_work);
         /*
-        * When @in_idle, we're in cancellation and it's racy to remove the
+        * When @in_cancel, we're in cancellation and it's racy to remove the
          * node. It'll be removed by the end of cancellation, just ignore it.
          * tctx can be NULL if the queueing of this task_work raced with
          * work cancelation off the exec path.
          */
-       if (tctx && !atomic_read(&tctx->in_idle))
+       if (tctx && !atomic_read(&tctx->in_cancel))
                 io_uring_del_tctx_node((unsigned long)work->ctx);
         complete(&work->completion);
  }
@@ -3210,7 +3210,7 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
         if (tctx->io_wq)
                 io_wq_exit_start(tctx->io_wq);
  
-       atomic_inc(&tctx->in_idle);
+       atomic_inc(&tctx->in_cancel);
         do {
                 bool loop = false;
  
@@ -3261,9 +3261,9 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
         if (cancel_all) {
                 /*
                  * We shouldn't run task_works after cancel, so just leave
-                * ->in_idle set for normal exit.
+                * ->in_cancel set for normal exit.
                  */
-               atomic_dec(&tctx->in_idle);
+               atomic_dec(&tctx->in_cancel);
                 /* for exec all current's requests should be gone, kill tctx */
                 __io_uring_free(current);
         }