Update io_uring API
authorJens Axboe <axboe@kernel.dk>
Thu, 10 Jan 2019 21:22:08 +0000 (14:22 -0700)
committerJens Axboe <axboe@kernel.dk>
Thu, 10 Jan 2019 21:35:29 +0000 (14:35 -0700)
- Fixed buffers are now available through io_uring_register()
- Various thread/wq options are now dead and automatic instead
- sqe->index is now sqe->buf_index
- Fixed buffers require flag, not separate opcode

Signed-off-by: Jens Axboe <axboe@kernel.dk>
arch/arch-x86_64.h
engines/io_uring.c
os/io_uring.h
t/io_uring.c

index a5864bab0fe012afab2a454403dc85f073dc7ff2..665c6b0457befa70c2091902665bafd1b184316e 100644 (file)
@@ -2,10 +2,13 @@
 #define ARCH_X86_64_H
 
 #ifndef __NR_sys_io_uring_setup
-#define __NR_sys_io_uring_setup        335
+#define __NR_sys_io_uring_setup                335
 #endif
 #ifndef __NR_sys_io_uring_enter
-#define __NR_sys_io_uring_enter        336
+#define __NR_sys_io_uring_enter                336
+#endif
+#ifndef __NR_sys_io_uring_register
+#define __NR_sys_io_uring_register     337
 #endif
 
 static inline void do_cpuid(unsigned int *eax, unsigned int *ebx,
index 77b4686a6d1d6680203039aac0ed79a11573455d..39359af909c7b199677dcadf784778435873b28c 100644 (file)
@@ -73,18 +73,16 @@ struct ioring_options {
        void *pad;
        unsigned int hipri;
        unsigned int fixedbufs;
-       unsigned int sqthread;
-       unsigned int sqthread_set;
-       unsigned int sqthread_poll;
-       unsigned int sqwq;
+       unsigned int sqpoll_set;
+       unsigned int sqpoll_cpu;
 };
 
-static int fio_ioring_sqthread_cb(void *data, unsigned long long *val)
+static int fio_ioring_sqpoll_cb(void *data, unsigned long long *val)
 {
        struct ioring_options *o = data;
 
-       o->sqthread = *val;
-       o->sqthread_set = 1;
+       o->sqpoll_cpu = *val;
+       o->sqpoll_set = 1;
        return 0;
 }
 
@@ -107,30 +105,12 @@ static struct fio_option options[] = {
                .category = FIO_OPT_C_ENGINE,
                .group  = FIO_OPT_G_LIBAIO,
        },
-       {
-               .name   = "sqthread",
-               .lname  = "Use kernel SQ thread on this CPU",
-               .type   = FIO_OPT_INT,
-               .cb     = fio_ioring_sqthread_cb,
-               .help   = "Offload submission to kernel thread",
-               .category = FIO_OPT_C_ENGINE,
-               .group  = FIO_OPT_G_LIBAIO,
-       },
        {
                .name   = "sqthread_poll",
                .lname  = "Kernel SQ thread should poll",
-               .type   = FIO_OPT_STR_SET,
-               .off1   = offsetof(struct ioring_options, sqthread_poll),
-               .help   = "Used with sqthread, enables kernel side polling",
-               .category = FIO_OPT_C_ENGINE,
-               .group  = FIO_OPT_G_LIBAIO,
-       },
-       {
-               .name   = "sqwq",
-               .lname  = "Offload submission to kernel workqueue",
-               .type   = FIO_OPT_STR_SET,
-               .off1   = offsetof(struct ioring_options, sqwq),
-               .help   = "Offload submission to kernel workqueue",
+               .type   = FIO_OPT_INT,
+               .cb     = fio_ioring_sqpoll_cb,
+               .help   = "Offload submission to kernel thread",
                .category = FIO_OPT_C_ENGINE,
                .group  = FIO_OPT_G_LIBAIO,
        },
@@ -157,21 +137,20 @@ static int fio_ioring_prep(struct thread_data *td, struct io_u *io_u)
        sqe->fd = f->fd;
        sqe->flags = 0;
        sqe->ioprio = 0;
+       sqe->buf_index = 0;
 
        if (io_u->ddir == DDIR_READ || io_u->ddir == DDIR_WRITE) {
+               if (io_u->ddir == DDIR_READ)
+                       sqe->opcode = IORING_OP_READV;
+               else
+                       sqe->opcode = IORING_OP_WRITEV;
+
                if (o->fixedbufs) {
-                       if (io_u->ddir == DDIR_READ)
-                               sqe->opcode = IORING_OP_READ_FIXED;
-                       else
-                               sqe->opcode = IORING_OP_WRITE_FIXED;
+                       sqe->flags |= IOSQE_FIXED_BUFFER;
                        sqe->addr = io_u->xfer_buf;
                        sqe->len = io_u->xfer_buflen;
-                       sqe->index = io_u->index;
+                       sqe->buf_index = io_u->index;
                } else {
-                       if (io_u->ddir == DDIR_READ)
-                               sqe->opcode = IORING_OP_READV;
-                       else
-                               sqe->opcode = IORING_OP_WRITEV;
                        sqe->addr = &ld->iovecs[io_u->index];
                        sqe->len = 1;
                }
@@ -252,7 +231,7 @@ static int fio_ioring_getevents(struct thread_data *td, unsigned int min,
                        continue;
                }
 
-               if (!o->sqthread_poll) {
+               if (!o->sqpoll_set) {
                        r = io_uring_enter(ld, 0, actual_min,
                                                IORING_ENTER_GETEVENTS);
                        if (r < 0) {
@@ -335,9 +314,10 @@ static int fio_ioring_commit(struct thread_data *td)
                return 0;
 
        /* Nothing to do */
-       if (o->sqthread_poll) {
+       if (o->sqpoll_set) {
                struct io_sq_ring *ring = &ld->sq_ring;
 
+               read_barrier();
                if (*ring->flags & IORING_SQ_NEED_WAKEUP)
                        io_uring_enter(ld, ld->queued, 0, 0);
                ld->queued = 0;
@@ -447,7 +427,6 @@ static int fio_ioring_queue_init(struct thread_data *td)
        struct ioring_data *ld = td->io_ops_data;
        struct ioring_options *o = td->eo;
        int depth = td->o.iodepth;
-       struct iovec *vecs = NULL;
        struct io_uring_params p;
        int ret;
 
@@ -455,14 +434,10 @@ static int fio_ioring_queue_init(struct thread_data *td)
 
        if (o->hipri)
                p.flags |= IORING_SETUP_IOPOLL;
-       if (o->sqthread_set) {
-               p.sq_thread_cpu = o->sqthread;
-               p.flags |= IORING_SETUP_SQTHREAD;
-               if (o->sqthread_poll)
-                       p.flags |= IORING_SETUP_SQPOLL;
+       if (o->sqpoll_set) {
+               p.flags |= IORING_SETUP_SQPOLL | IORING_SETUP_SQ_AFF;
+               p.sq_thread_cpu = o->sqpoll_cpu;
        }
-       if (o->sqwq)
-               p.flags |= IORING_SETUP_SQWQ;
 
        if (o->fixedbufs) {
                struct rlimit rlim = {
@@ -471,14 +446,26 @@ static int fio_ioring_queue_init(struct thread_data *td)
                };
 
                setrlimit(RLIMIT_MEMLOCK, &rlim);
-               vecs = ld->iovecs;
        }
 
-       ret = syscall(__NR_sys_io_uring_setup, depth, vecs, depth, &p);
+       ret = syscall(__NR_sys_io_uring_setup, depth, &p);
        if (ret < 0)
                return ret;
 
        ld->ring_fd = ret;
+
+       if (o->fixedbufs) {
+               struct io_uring_register_buffers reg = {
+                       .iovecs = ld->iovecs,
+                       .nr_iovecs = depth
+               };
+
+               ret = syscall(__NR_sys_io_uring_register, ld->ring_fd,
+                               IORING_REGISTER_BUFFERS, &reg);
+               if (ret < 0)
+                       return ret;
+       }
+
        return fio_ioring_mmap(ld, &p);
 }
 
index b07bbbb3dd656dc4fbd2f14bc867b11eca0d5b49..613930dbf3253ef744263599383211f91d30995f 100644 (file)
@@ -29,25 +29,27 @@ struct io_uring_sqe {
                __kernel_rwf_t  rw_flags;
                __u32           __resv;
        };
-       __u16   index;          /* index into fixed buffers, if used */
+       __u16   buf_index;      /* index into fixed buffers, if used */
        __u16   __pad2[3];
        __u64   data;           /* data to be passed back at completion time */
 };
 
+/*
+ * sqe->flags
+ */
+#define IOSQE_FIXED_BUFFER     (1 << 0)        /* use fixed buffer */
+
 /*
  * io_uring_setup() flags
  */
 #define IORING_SETUP_IOPOLL    (1 << 0)        /* io_context is polled */
-#define        IORING_SETUP_SQTHREAD   (1 << 1)        /* Use SQ thread */
-#define IORING_SETUP_SQWQ      (1 << 2)        /* Use SQ workqueue */
-#define IORING_SETUP_SQPOLL    (1 << 3)        /* SQ thread polls */
+#define IORING_SETUP_SQPOLL    (1 << 1)        /* SQ poll thread */
+#define IORING_SETUP_SQ_AFF    (1 << 2)        /* sq_thread_cpu is valid */
 
 #define IORING_OP_READV                1
 #define IORING_OP_WRITEV       2
 #define IORING_OP_FSYNC                3
 #define IORING_OP_FDSYNC       4
-#define IORING_OP_READ_FIXED   5
-#define IORING_OP_WRITE_FIXED  6
 
 /*
  * IO completion data structure (Completion Queue Entry)
@@ -114,4 +116,15 @@ struct io_uring_params {
        struct io_cqring_offsets cq_off;
 };
 
+/*
+ * io_uring_register(2) opcodes and arguments
+ */
+#define IORING_REGISTER_BUFFERS                0
+#define IORING_UNREGISTER_BUFFERS      1
+
+struct io_uring_register_buffers {
+       struct iovec *iovecs;
+       unsigned nr_iovecs;
+};
+
 #endif
index 76da6b292f03e2d21623b1d2f48f4bb88dfdb149..af20bbf3dad3800d7645ce208aa9f75d4db5a6d9 100644 (file)
@@ -85,13 +85,23 @@ static volatile int finish;
 static int polled = 1;         /* use IO polling */
 static int fixedbufs = 0;      /* use fixed user buffers */
 static int buffered = 0;       /* use buffered IO, not O_DIRECT */
-static int sq_thread = 0;      /* use kernel submission thread */
+static int sq_thread = 0;      /* use kernel submission/poller thread */
 static int sq_thread_cpu = 0;  /* pin above thread to this CPU */
 
-static int io_uring_setup(unsigned entries, struct iovec *iovecs,
-                         unsigned nr_iovecs, struct io_uring_params *p)
+static int io_uring_register_buffers(struct submitter *s)
 {
-       return syscall(__NR_sys_io_uring_setup, entries, iovecs, nr_iovecs, p);
+       struct io_uring_register_buffers reg = {
+               .iovecs = s->iovecs,
+               .nr_iovecs = DEPTH
+       };
+
+       return syscall(__NR_sys_io_uring_register, s->ring_fd,
+                       IORING_REGISTER_BUFFERS, &reg);
+}
+
+static int io_uring_setup(unsigned entries, struct io_uring_params *p)
+{
+       return syscall(__NR_sys_io_uring_setup, entries, p);
 }
 
 static int io_uring_enter(struct submitter *s, unsigned int to_submit,
@@ -121,17 +131,18 @@ static void init_io(struct submitter *s, unsigned index)
        lrand48_r(&s->rand, &r);
        offset = (r % (f->max_blocks - 1)) * BS;
 
+       sqe->flags = 0;
+       sqe->opcode = IORING_OP_READV;
        if (fixedbufs) {
-               sqe->opcode = IORING_OP_READ_FIXED;
                sqe->addr = s->iovecs[index].iov_base;
                sqe->len = BS;
-               sqe->index = index;
+               sqe->buf_index = index;
+               sqe->flags |= IOSQE_FIXED_BUFFER;
        } else {
-               sqe->opcode = IORING_OP_READV;
                sqe->addr = &s->iovecs[index];
                sqe->len = 1;
+               sqe->buf_index = 0;
        }
-       sqe->flags = 0;
        sqe->ioprio = 0;
        sqe->fd = f->fd;
        sqe->off = offset;
@@ -308,30 +319,33 @@ static int setup_ring(struct submitter *s)
        struct io_sq_ring *sring = &s->sq_ring;
        struct io_cq_ring *cring = &s->cq_ring;
        struct io_uring_params p;
+       int ret, fd;
        void *ptr;
-       int fd;
 
        memset(&p, 0, sizeof(p));
 
        if (polled)
                p.flags |= IORING_SETUP_IOPOLL;
-       if (buffered)
-               p.flags |= IORING_SETUP_SQWQ;
-       else if (sq_thread) {
-               p.flags |= IORING_SETUP_SQTHREAD;
+       if (sq_thread) {
+               p.flags |= IORING_SETUP_SQPOLL;
                p.sq_thread_cpu = sq_thread_cpu;
        }
 
-       if (fixedbufs)
-               fd = io_uring_setup(DEPTH, s->iovecs, DEPTH, &p);
-       else
-               fd = io_uring_setup(DEPTH, NULL, 0, &p);
+       fd = io_uring_setup(DEPTH, &p);
        if (fd < 0) {
                perror("io_uring_setup");
                return 1;
        }
-
        s->ring_fd = fd;
+
+       if (fixedbufs) {
+               ret = io_uring_register_buffers(s);
+               if (ret < 0) {
+                       perror("io_uring_register");
+                       return 1;
+               }
+       }
+
        ptr = mmap(0, p.sq_off.array + p.sq_entries * sizeof(__u32),
                        PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
                        IORING_OFF_SQ_RING);