io_uring: add absolute mode wait timeouts
authorPavel Begunkov <asml.silence@gmail.com>
Wed, 7 Aug 2024 14:18:13 +0000 (15:18 +0100)
committerJens Axboe <axboe@kernel.dk>
Sun, 25 Aug 2024 14:27:01 +0000 (08:27 -0600)
In addition to current relative timeouts for the waiting loop, where the
timespec argument specifies the maximum time it can wait for, add
support for the absolute mode, with the value carrying a CLOCK_MONOTONIC
absolute time until which we should return control back to the user.

Suggested-by: Lewis Baker <lewissbaker@gmail.com>
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/4d5b74d67ada882590b2e42aa3aa7117bbf6b55f.1723039801.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
include/uapi/linux/io_uring.h
io_uring/io_uring.c

index adc2524fd8e3dca4be2a5967eee3bd1bcebacd9a..6a81f55fcd0d27a01cf38f63db9bafbd93594cab 100644 (file)
@@ -507,6 +507,7 @@ struct io_cqring_offsets {
 #define IORING_ENTER_SQ_WAIT           (1U << 2)
 #define IORING_ENTER_EXT_ARG           (1U << 3)
 #define IORING_ENTER_REGISTERED_RING   (1U << 4)
+#define IORING_ENTER_ABS_TIMER         (1U << 5)
 
 /*
  * Passed in for io_uring_setup(2). Copied back with updated info on success
index 9ec07f76ad1941daff85f02a1e8dccf0a4f35494..5282f988744093e100e72080287ac7c5a34bbc52 100644 (file)
@@ -2387,7 +2387,7 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
  * Wait until events become available, if we don't already have some. The
  * application must reap them itself, as they reside on the shared cq ring.
  */
-static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
+static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
                          const sigset_t __user *sig, size_t sigsz,
                          struct __kernel_timespec __user *uts)
 {
@@ -2416,13 +2416,13 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
 
        if (uts) {
                struct timespec64 ts;
-               ktime_t dt;
 
                if (get_timespec64(&ts, uts))
                        return -EFAULT;
 
-               dt = timespec64_to_ktime(ts);
-               iowq.timeout = ktime_add(dt, ktime_get());
+               iowq.timeout = timespec64_to_ktime(ts);
+               if (!(flags & IORING_ENTER_ABS_TIMER))
+                       iowq.timeout = ktime_add(iowq.timeout, ktime_get());
        }
 
        if (sig) {
@@ -3153,7 +3153,8 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 
        if (unlikely(flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP |
                               IORING_ENTER_SQ_WAIT | IORING_ENTER_EXT_ARG |
-                              IORING_ENTER_REGISTERED_RING)))
+                              IORING_ENTER_REGISTERED_RING |
+                              IORING_ENTER_ABS_TIMER)))
                return -EINVAL;
 
        /*
@@ -3251,8 +3252,8 @@ iopoll_locked:
                        if (likely(!ret2)) {
                                min_complete = min(min_complete,
                                                   ctx->cq_entries);
-                               ret2 = io_cqring_wait(ctx, min_complete, sig,
-                                                     argsz, ts);
+                               ret2 = io_cqring_wait(ctx, min_complete, flags,
+                                                     sig, argsz, ts);
                        }
                }