Merge tag 'io_uring-6.2-2022-12-19' of git://git.kernel.dk/linux

author Linus Torvalds <torvalds@linux-foundation.org>

Thu, 22 Dec 2022 00:28:25 +0000 (16:28 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 22 Dec 2022 00:28:25 +0000 (16:28 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Thu, 22 Dec 2022 00:28:25 +0000 (16:28 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 22 Dec 2022 00:28:25 +0000 (16:28 -0800)
diff --git a/MAINTAINERS b/MAINTAINERS

index b701de08d60217535636b9b58a18243750199443..f61eb221415bd490a4b29945bc284b313562721f 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10878,6 +10878,7 @@ T:      git git://git.kernel.dk/liburing
  F:     io_uring/
  F:     include/linux/io_uring.h
  F:     include/linux/io_uring_types.h
+F:     include/trace/events/io_uring.h
  F:     include/uapi/linux/io_uring.h
  F:     tools/io_uring/
  
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c

index b521186efa5c75ac0e0d49c63c0a555c032dcf86..ff2bbac1a10f4afbaad0bb14820a0df0ee70893e 100644 (file)
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -538,7 +538,7 @@ static void io_eventfd_signal(struct io_ring_ctx *ctx)
         } else {
                 atomic_inc(&ev_fd->refs);
                 if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops))
-                       call_rcu(&ev_fd->rcu, io_eventfd_ops);
+                       call_rcu_hurry(&ev_fd->rcu, io_eventfd_ops);
                 else
                         atomic_dec(&ev_fd->refs);
         }
@@ -572,12 +572,11 @@ static void io_eventfd_flush_signal(struct io_ring_ctx *ctx)
  
  void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
  {
-       if (ctx->off_timeout_used || ctx->drain_active) {
+       if (ctx->off_timeout_used)
+               io_flush_timeouts(ctx);
+       if (ctx->drain_active) {
                 spin_lock(&ctx->completion_lock);
-               if (ctx->off_timeout_used)
-                       io_flush_timeouts(ctx);
-               if (ctx->drain_active)
-                       io_queue_deferred(ctx);
+               io_queue_deferred(ctx);
                 spin_unlock(&ctx->completion_lock);
         }
         if (ctx->has_evfd)
@@ -597,6 +596,18 @@ static inline void __io_cq_unlock(struct io_ring_ctx *ctx)
                 spin_unlock(&ctx->completion_lock);
  }
  
+static inline void io_cq_lock(struct io_ring_ctx *ctx)
+       __acquires(ctx->completion_lock)
+{
+       spin_lock(&ctx->completion_lock);
+}
+
+static inline void io_cq_unlock(struct io_ring_ctx *ctx)
+       __releases(ctx->completion_lock)
+{
+       spin_unlock(&ctx->completion_lock);
+}
+
  /* keep it inlined for io_submit_flush_completions() */
  static inline void __io_cq_unlock_post(struct io_ring_ctx *ctx)
         __releases(ctx->completion_lock)
@@ -916,7 +927,7 @@ static void __io_req_complete_post(struct io_kiocb *req)
  
         io_cq_lock(ctx);
         if (!(req->flags & REQ_F_CQE_SKIP))
-               __io_fill_cqe_req(ctx, req);
+               io_fill_cqe_req(ctx, req);
  
         /*
          * If we're the last reference to this request, add to our locked
@@ -1074,9 +1085,9 @@ static void __io_req_find_next_prep(struct io_kiocb *req)
  {
         struct io_ring_ctx *ctx = req->ctx;
  
-       io_cq_lock(ctx);
+       spin_lock(&ctx->completion_lock);
         io_disarm_next(req);
-       io_cq_unlock_post(ctx);
+       spin_unlock(&ctx->completion_lock);
  }
  
  static inline struct io_kiocb *io_req_find_next(struct io_kiocb *req)
@@ -2470,7 +2481,14 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
         }
         if (!schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS))
                 return -ETIME;
-       return 1;
+
+       /*
+        * Run task_work after scheduling. If we got woken because of
+        * task_work being processed, run it now rather than let the caller
+        * do another wait loop.
+        */
+       ret = io_run_task_work_sig(ctx);
+       return ret < 0 ? ret : 1;
  }
  
  /*
@@ -2535,6 +2553,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
                 prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq,
                                                 TASK_INTERRUPTIBLE);
                 ret = io_cqring_wait_schedule(ctx, &iowq, timeout);
+               if (__io_cqring_events_user(ctx) >= min_events)
+                       break;
                 cond_resched();
         } while (ret > 0);
  
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h

index 1b2f0b2cc888c37f3984989ba22a1b9458e71f6a..e9f0d41ebb9960189930e2699669fc206b0b764f 100644 (file)
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -87,17 +87,6 @@ static inline void io_req_task_work_add(struct io_kiocb *req)
  #define io_for_each_link(pos, head) \
         for (pos = (head); pos; pos = pos->link)
  
-static inline void io_cq_lock(struct io_ring_ctx *ctx)
-       __acquires(ctx->completion_lock)
-{
-       spin_lock(&ctx->completion_lock);
-}
-
-static inline void io_cq_unlock(struct io_ring_ctx *ctx)
-{
-       spin_unlock(&ctx->completion_lock);
-}
-
  void io_cq_unlock_post(struct io_ring_ctx *ctx);
  
  static inline struct io_uring_cqe *io_get_cqe_overflow(struct io_ring_ctx *ctx,
@@ -277,8 +266,7 @@ static inline int io_run_task_work(void)
  
  static inline bool io_task_work_pending(struct io_ring_ctx *ctx)
  {
-       return test_thread_flag(TIF_NOTIFY_SIGNAL) ||
-               !wq_list_empty(&ctx->work_llist);
+       return task_work_pending(current) || !wq_list_empty(&ctx->work_llist);
  }
  
  static inline int io_run_task_work_ctx(struct io_ring_ctx *ctx)
diff --git a/io_uring/net.c b/io_uring/net.c

index 5229976cb5829f8a5944a5bcb34f4175bd2c1bd8..fbc34a7c2743946c7fbabd91513489c8e25a0b25 100644 (file)
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -494,6 +494,7 @@ static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
         if (req->flags & REQ_F_BUFFER_SELECT) {
                 compat_ssize_t clen;
  
+               iomsg->free_iov = NULL;
                 if (msg.msg_iovlen == 0) {
                         sr->len = 0;
                 } else if (msg.msg_iovlen > 1) {
@@ -819,10 +820,10 @@ retry_multishot:
                 goto retry_multishot;
  
         if (mshot_finished) {
-               io_netmsg_recycle(req, issue_flags);
                 /* fast path, check for non-NULL to avoid function call */
                 if (kmsg->free_iov)
                         kfree(kmsg->free_iov);
+               io_netmsg_recycle(req, issue_flags);
                 req->flags &= ~REQ_F_NEED_CLEANUP;
         }
  
diff --git a/io_uring/rw.c b/io_uring/rw.c

index b9cac5706e8da71f7f7b9adaaa54931a2a544bf8..8227af2e1c0f5e0add7d364a8d5be13f62aa23dd 100644 (file)
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -1062,7 +1062,7 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
                         continue;
  
                 req->cqe.flags = io_put_kbuf(req, 0);
-               __io_fill_cqe_req(req->ctx, req);
+               io_fill_cqe_req(req->ctx, req);
         }
  
         if (unlikely(!nr_events))
diff --git a/io_uring/timeout.c b/io_uring/timeout.c

index 5b4bc93fd6e07c83c92a8812c4d9ab7482ee2541..826a51bca3e498cbff5014bca0104a68c5890978 100644 (file)
--- a/io_uring/timeout.c
+++ b/io_uring/timeout.c
@@ -50,7 +50,6 @@ static inline void io_put_req(struct io_kiocb *req)
  }
  
  static bool io_kill_timeout(struct io_kiocb *req, int status)
-       __must_hold(&req->ctx->completion_lock)
         __must_hold(&req->ctx->timeout_lock)
  {
         struct io_timeout_data *io = req->async_data;
@@ -70,12 +69,13 @@ static bool io_kill_timeout(struct io_kiocb *req, int status)
  }
  
  __cold void io_flush_timeouts(struct io_ring_ctx *ctx)
-       __must_hold(&ctx->completion_lock)
  {
-       u32 seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
+       u32 seq;
         struct io_timeout *timeout, *tmp;
  
         spin_lock_irq(&ctx->timeout_lock);
+       seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
+
         list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) {
                 struct io_kiocb *req = cmd_to_io_kiocb(timeout);
                 u32 events_needed, events_got;
@@ -622,7 +622,11 @@ __cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk,
         struct io_timeout *timeout, *tmp;
         int canceled = 0;
  
-       io_cq_lock(ctx);
+       /*
+        * completion_lock is needed for io_match_task(). Take it before
+        * timeout_lockfirst to keep locking ordering.
+        */
+       spin_lock(&ctx->completion_lock);
         spin_lock_irq(&ctx->timeout_lock);
         list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) {
                 struct io_kiocb *req = cmd_to_io_kiocb(timeout);
@@ -632,6 +636,6 @@ __cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk,
                         canceled++;
         }
         spin_unlock_irq(&ctx->timeout_lock);
-       io_cq_unlock_post(ctx);
+       spin_unlock(&ctx->completion_lock);
         return canceled != 0;
  }
author	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 22 Dec 2022 00:28:25 +0000 (16:28 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 22 Dec 2022 00:28:25 +0000 (16:28 -0800)
MAINTAINERS		patch \| blob \| blame \| history
io_uring/io_uring.c		patch \| blob \| blame \| history
io_uring/io_uring.h		patch \| blob \| blame \| history
io_uring/net.c		patch \| blob \| blame \| history
io_uring/rw.c		patch \| blob \| blame \| history
io_uring/timeout.c		patch \| blob \| blame \| history