From: Jens Axboe Date: Thu, 10 Jan 2019 21:22:08 +0000 (-0700) Subject: Update io_uring API X-Git-Tag: fio-3.13~40 X-Git-Url: https://git.kernel.dk/?p=fio.git;a=commitdiff_plain;h=2ea53ca36174ee16e331ecab33cb413799168e26 Update io_uring API - Fixed buffers are now available through io_uring_register() - Various thread/wq options are now dead and automatic instead - sqe->index is now sqe->buf_index - Fixed buffers require flag, not separate opcode Signed-off-by: Jens Axboe --- diff --git a/arch/arch-x86_64.h b/arch/arch-x86_64.h index a5864bab..665c6b04 100644 --- a/arch/arch-x86_64.h +++ b/arch/arch-x86_64.h @@ -2,10 +2,13 @@ #define ARCH_X86_64_H #ifndef __NR_sys_io_uring_setup -#define __NR_sys_io_uring_setup 335 +#define __NR_sys_io_uring_setup 335 #endif #ifndef __NR_sys_io_uring_enter -#define __NR_sys_io_uring_enter 336 +#define __NR_sys_io_uring_enter 336 +#endif +#ifndef __NR_sys_io_uring_register +#define __NR_sys_io_uring_register 337 #endif static inline void do_cpuid(unsigned int *eax, unsigned int *ebx, diff --git a/engines/io_uring.c b/engines/io_uring.c index 77b4686a..39359af9 100644 --- a/engines/io_uring.c +++ b/engines/io_uring.c @@ -73,18 +73,16 @@ struct ioring_options { void *pad; unsigned int hipri; unsigned int fixedbufs; - unsigned int sqthread; - unsigned int sqthread_set; - unsigned int sqthread_poll; - unsigned int sqwq; + unsigned int sqpoll_set; + unsigned int sqpoll_cpu; }; -static int fio_ioring_sqthread_cb(void *data, unsigned long long *val) +static int fio_ioring_sqpoll_cb(void *data, unsigned long long *val) { struct ioring_options *o = data; - o->sqthread = *val; - o->sqthread_set = 1; + o->sqpoll_cpu = *val; + o->sqpoll_set = 1; return 0; } @@ -107,30 +105,12 @@ static struct fio_option options[] = { .category = FIO_OPT_C_ENGINE, .group = FIO_OPT_G_LIBAIO, }, - { - .name = "sqthread", - .lname = "Use kernel SQ thread on this CPU", - .type = FIO_OPT_INT, - .cb = fio_ioring_sqthread_cb, - .help = "Offload submission to kernel thread", - .category = FIO_OPT_C_ENGINE, - .group = FIO_OPT_G_LIBAIO, - }, { .name = "sqthread_poll", .lname = "Kernel SQ thread should poll", - .type = FIO_OPT_STR_SET, - .off1 = offsetof(struct ioring_options, sqthread_poll), - .help = "Used with sqthread, enables kernel side polling", - .category = FIO_OPT_C_ENGINE, - .group = FIO_OPT_G_LIBAIO, - }, - { - .name = "sqwq", - .lname = "Offload submission to kernel workqueue", - .type = FIO_OPT_STR_SET, - .off1 = offsetof(struct ioring_options, sqwq), - .help = "Offload submission to kernel workqueue", + .type = FIO_OPT_INT, + .cb = fio_ioring_sqpoll_cb, + .help = "Offload submission to kernel thread", .category = FIO_OPT_C_ENGINE, .group = FIO_OPT_G_LIBAIO, }, @@ -157,21 +137,20 @@ static int fio_ioring_prep(struct thread_data *td, struct io_u *io_u) sqe->fd = f->fd; sqe->flags = 0; sqe->ioprio = 0; + sqe->buf_index = 0; if (io_u->ddir == DDIR_READ || io_u->ddir == DDIR_WRITE) { + if (io_u->ddir == DDIR_READ) + sqe->opcode = IORING_OP_READV; + else + sqe->opcode = IORING_OP_WRITEV; + if (o->fixedbufs) { - if (io_u->ddir == DDIR_READ) - sqe->opcode = IORING_OP_READ_FIXED; - else - sqe->opcode = IORING_OP_WRITE_FIXED; + sqe->flags |= IOSQE_FIXED_BUFFER; sqe->addr = io_u->xfer_buf; sqe->len = io_u->xfer_buflen; - sqe->index = io_u->index; + sqe->buf_index = io_u->index; } else { - if (io_u->ddir == DDIR_READ) - sqe->opcode = IORING_OP_READV; - else - sqe->opcode = IORING_OP_WRITEV; sqe->addr = &ld->iovecs[io_u->index]; sqe->len = 1; } @@ -252,7 +231,7 @@ static int fio_ioring_getevents(struct thread_data *td, unsigned int min, continue; } - if (!o->sqthread_poll) { + if (!o->sqpoll_set) { r = io_uring_enter(ld, 0, actual_min, IORING_ENTER_GETEVENTS); if (r < 0) { @@ -335,9 +314,10 @@ static int fio_ioring_commit(struct thread_data *td) return 0; /* Nothing to do */ - if (o->sqthread_poll) { + if (o->sqpoll_set) { struct io_sq_ring *ring = &ld->sq_ring; + read_barrier(); if (*ring->flags & IORING_SQ_NEED_WAKEUP) io_uring_enter(ld, ld->queued, 0, 0); ld->queued = 0; @@ -447,7 +427,6 @@ static int fio_ioring_queue_init(struct thread_data *td) struct ioring_data *ld = td->io_ops_data; struct ioring_options *o = td->eo; int depth = td->o.iodepth; - struct iovec *vecs = NULL; struct io_uring_params p; int ret; @@ -455,14 +434,10 @@ static int fio_ioring_queue_init(struct thread_data *td) if (o->hipri) p.flags |= IORING_SETUP_IOPOLL; - if (o->sqthread_set) { - p.sq_thread_cpu = o->sqthread; - p.flags |= IORING_SETUP_SQTHREAD; - if (o->sqthread_poll) - p.flags |= IORING_SETUP_SQPOLL; + if (o->sqpoll_set) { + p.flags |= IORING_SETUP_SQPOLL | IORING_SETUP_SQ_AFF; + p.sq_thread_cpu = o->sqpoll_cpu; } - if (o->sqwq) - p.flags |= IORING_SETUP_SQWQ; if (o->fixedbufs) { struct rlimit rlim = { @@ -471,14 +446,26 @@ static int fio_ioring_queue_init(struct thread_data *td) }; setrlimit(RLIMIT_MEMLOCK, &rlim); - vecs = ld->iovecs; } - ret = syscall(__NR_sys_io_uring_setup, depth, vecs, depth, &p); + ret = syscall(__NR_sys_io_uring_setup, depth, &p); if (ret < 0) return ret; ld->ring_fd = ret; + + if (o->fixedbufs) { + struct io_uring_register_buffers reg = { + .iovecs = ld->iovecs, + .nr_iovecs = depth + }; + + ret = syscall(__NR_sys_io_uring_register, ld->ring_fd, + IORING_REGISTER_BUFFERS, ®); + if (ret < 0) + return ret; + } + return fio_ioring_mmap(ld, &p); } diff --git a/os/io_uring.h b/os/io_uring.h index b07bbbb3..613930db 100644 --- a/os/io_uring.h +++ b/os/io_uring.h @@ -29,25 +29,27 @@ struct io_uring_sqe { __kernel_rwf_t rw_flags; __u32 __resv; }; - __u16 index; /* index into fixed buffers, if used */ + __u16 buf_index; /* index into fixed buffers, if used */ __u16 __pad2[3]; __u64 data; /* data to be passed back at completion time */ }; +/* + * sqe->flags + */ +#define IOSQE_FIXED_BUFFER (1 << 0) /* use fixed buffer */ + /* * io_uring_setup() flags */ #define IORING_SETUP_IOPOLL (1 << 0) /* io_context is polled */ -#define IORING_SETUP_SQTHREAD (1 << 1) /* Use SQ thread */ -#define IORING_SETUP_SQWQ (1 << 2) /* Use SQ workqueue */ -#define IORING_SETUP_SQPOLL (1 << 3) /* SQ thread polls */ +#define IORING_SETUP_SQPOLL (1 << 1) /* SQ poll thread */ +#define IORING_SETUP_SQ_AFF (1 << 2) /* sq_thread_cpu is valid */ #define IORING_OP_READV 1 #define IORING_OP_WRITEV 2 #define IORING_OP_FSYNC 3 #define IORING_OP_FDSYNC 4 -#define IORING_OP_READ_FIXED 5 -#define IORING_OP_WRITE_FIXED 6 /* * IO completion data structure (Completion Queue Entry) @@ -114,4 +116,15 @@ struct io_uring_params { struct io_cqring_offsets cq_off; }; +/* + * io_uring_register(2) opcodes and arguments + */ +#define IORING_REGISTER_BUFFERS 0 +#define IORING_UNREGISTER_BUFFERS 1 + +struct io_uring_register_buffers { + struct iovec *iovecs; + unsigned nr_iovecs; +}; + #endif diff --git a/t/io_uring.c b/t/io_uring.c index 76da6b29..af20bbf3 100644 --- a/t/io_uring.c +++ b/t/io_uring.c @@ -85,13 +85,23 @@ static volatile int finish; static int polled = 1; /* use IO polling */ static int fixedbufs = 0; /* use fixed user buffers */ static int buffered = 0; /* use buffered IO, not O_DIRECT */ -static int sq_thread = 0; /* use kernel submission thread */ +static int sq_thread = 0; /* use kernel submission/poller thread */ static int sq_thread_cpu = 0; /* pin above thread to this CPU */ -static int io_uring_setup(unsigned entries, struct iovec *iovecs, - unsigned nr_iovecs, struct io_uring_params *p) +static int io_uring_register_buffers(struct submitter *s) { - return syscall(__NR_sys_io_uring_setup, entries, iovecs, nr_iovecs, p); + struct io_uring_register_buffers reg = { + .iovecs = s->iovecs, + .nr_iovecs = DEPTH + }; + + return syscall(__NR_sys_io_uring_register, s->ring_fd, + IORING_REGISTER_BUFFERS, ®); +} + +static int io_uring_setup(unsigned entries, struct io_uring_params *p) +{ + return syscall(__NR_sys_io_uring_setup, entries, p); } static int io_uring_enter(struct submitter *s, unsigned int to_submit, @@ -121,17 +131,18 @@ static void init_io(struct submitter *s, unsigned index) lrand48_r(&s->rand, &r); offset = (r % (f->max_blocks - 1)) * BS; + sqe->flags = 0; + sqe->opcode = IORING_OP_READV; if (fixedbufs) { - sqe->opcode = IORING_OP_READ_FIXED; sqe->addr = s->iovecs[index].iov_base; sqe->len = BS; - sqe->index = index; + sqe->buf_index = index; + sqe->flags |= IOSQE_FIXED_BUFFER; } else { - sqe->opcode = IORING_OP_READV; sqe->addr = &s->iovecs[index]; sqe->len = 1; + sqe->buf_index = 0; } - sqe->flags = 0; sqe->ioprio = 0; sqe->fd = f->fd; sqe->off = offset; @@ -308,30 +319,33 @@ static int setup_ring(struct submitter *s) struct io_sq_ring *sring = &s->sq_ring; struct io_cq_ring *cring = &s->cq_ring; struct io_uring_params p; + int ret, fd; void *ptr; - int fd; memset(&p, 0, sizeof(p)); if (polled) p.flags |= IORING_SETUP_IOPOLL; - if (buffered) - p.flags |= IORING_SETUP_SQWQ; - else if (sq_thread) { - p.flags |= IORING_SETUP_SQTHREAD; + if (sq_thread) { + p.flags |= IORING_SETUP_SQPOLL; p.sq_thread_cpu = sq_thread_cpu; } - if (fixedbufs) - fd = io_uring_setup(DEPTH, s->iovecs, DEPTH, &p); - else - fd = io_uring_setup(DEPTH, NULL, 0, &p); + fd = io_uring_setup(DEPTH, &p); if (fd < 0) { perror("io_uring_setup"); return 1; } - s->ring_fd = fd; + + if (fixedbufs) { + ret = io_uring_register_buffers(s); + if (ret < 0) { + perror("io_uring_register"); + return 1; + } + } + ptr = mmap(0, p.sq_off.array + p.sq_entries * sizeof(__u32), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING);