#define ARCH_X86_64_H
#ifndef __NR_sys_io_uring_setup
-#define __NR_sys_io_uring_setup 335
+#define __NR_sys_io_uring_setup 335
#endif
#ifndef __NR_sys_io_uring_enter
-#define __NR_sys_io_uring_enter 336
+#define __NR_sys_io_uring_enter 336
+#endif
+#ifndef __NR_sys_io_uring_register
+#define __NR_sys_io_uring_register 337
#endif
static inline void do_cpuid(unsigned int *eax, unsigned int *ebx,
void *pad;
unsigned int hipri;
unsigned int fixedbufs;
- unsigned int sqthread;
- unsigned int sqthread_set;
- unsigned int sqthread_poll;
- unsigned int sqwq;
+ unsigned int sqpoll_set;
+ unsigned int sqpoll_cpu;
};
-static int fio_ioring_sqthread_cb(void *data, unsigned long long *val)
+static int fio_ioring_sqpoll_cb(void *data, unsigned long long *val)
{
struct ioring_options *o = data;
- o->sqthread = *val;
- o->sqthread_set = 1;
+ o->sqpoll_cpu = *val;
+ o->sqpoll_set = 1;
return 0;
}
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_LIBAIO,
},
- {
- .name = "sqthread",
- .lname = "Use kernel SQ thread on this CPU",
- .type = FIO_OPT_INT,
- .cb = fio_ioring_sqthread_cb,
- .help = "Offload submission to kernel thread",
- .category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_LIBAIO,
- },
{
.name = "sqthread_poll",
.lname = "Kernel SQ thread should poll",
- .type = FIO_OPT_STR_SET,
- .off1 = offsetof(struct ioring_options, sqthread_poll),
- .help = "Used with sqthread, enables kernel side polling",
- .category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_LIBAIO,
- },
- {
- .name = "sqwq",
- .lname = "Offload submission to kernel workqueue",
- .type = FIO_OPT_STR_SET,
- .off1 = offsetof(struct ioring_options, sqwq),
- .help = "Offload submission to kernel workqueue",
+ .type = FIO_OPT_INT,
+ .cb = fio_ioring_sqpoll_cb,
+ .help = "Offload submission to kernel thread",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_LIBAIO,
},
sqe->fd = f->fd;
sqe->flags = 0;
sqe->ioprio = 0;
+ sqe->buf_index = 0;
if (io_u->ddir == DDIR_READ || io_u->ddir == DDIR_WRITE) {
+ if (io_u->ddir == DDIR_READ)
+ sqe->opcode = IORING_OP_READV;
+ else
+ sqe->opcode = IORING_OP_WRITEV;
+
if (o->fixedbufs) {
- if (io_u->ddir == DDIR_READ)
- sqe->opcode = IORING_OP_READ_FIXED;
- else
- sqe->opcode = IORING_OP_WRITE_FIXED;
+ sqe->flags |= IOSQE_FIXED_BUFFER;
sqe->addr = io_u->xfer_buf;
sqe->len = io_u->xfer_buflen;
- sqe->index = io_u->index;
+ sqe->buf_index = io_u->index;
} else {
- if (io_u->ddir == DDIR_READ)
- sqe->opcode = IORING_OP_READV;
- else
- sqe->opcode = IORING_OP_WRITEV;
sqe->addr = &ld->iovecs[io_u->index];
sqe->len = 1;
}
continue;
}
- if (!o->sqthread_poll) {
+ if (!o->sqpoll_set) {
r = io_uring_enter(ld, 0, actual_min,
IORING_ENTER_GETEVENTS);
if (r < 0) {
return 0;
/* Nothing to do */
- if (o->sqthread_poll) {
+ if (o->sqpoll_set) {
struct io_sq_ring *ring = &ld->sq_ring;
+ read_barrier();
if (*ring->flags & IORING_SQ_NEED_WAKEUP)
io_uring_enter(ld, ld->queued, 0, 0);
ld->queued = 0;
struct ioring_data *ld = td->io_ops_data;
struct ioring_options *o = td->eo;
int depth = td->o.iodepth;
- struct iovec *vecs = NULL;
struct io_uring_params p;
int ret;
if (o->hipri)
p.flags |= IORING_SETUP_IOPOLL;
- if (o->sqthread_set) {
- p.sq_thread_cpu = o->sqthread;
- p.flags |= IORING_SETUP_SQTHREAD;
- if (o->sqthread_poll)
- p.flags |= IORING_SETUP_SQPOLL;
+ if (o->sqpoll_set) {
+ p.flags |= IORING_SETUP_SQPOLL | IORING_SETUP_SQ_AFF;
+ p.sq_thread_cpu = o->sqpoll_cpu;
}
- if (o->sqwq)
- p.flags |= IORING_SETUP_SQWQ;
if (o->fixedbufs) {
struct rlimit rlim = {
};
setrlimit(RLIMIT_MEMLOCK, &rlim);
- vecs = ld->iovecs;
}
- ret = syscall(__NR_sys_io_uring_setup, depth, vecs, depth, &p);
+ ret = syscall(__NR_sys_io_uring_setup, depth, &p);
if (ret < 0)
return ret;
ld->ring_fd = ret;
+
+ if (o->fixedbufs) {
+ struct io_uring_register_buffers reg = {
+ .iovecs = ld->iovecs,
+ .nr_iovecs = depth
+ };
+
+ ret = syscall(__NR_sys_io_uring_register, ld->ring_fd,
+ IORING_REGISTER_BUFFERS, ®);
+ if (ret < 0)
+ return ret;
+ }
+
return fio_ioring_mmap(ld, &p);
}
__kernel_rwf_t rw_flags;
__u32 __resv;
};
- __u16 index; /* index into fixed buffers, if used */
+ __u16 buf_index; /* index into fixed buffers, if used */
__u16 __pad2[3];
__u64 data; /* data to be passed back at completion time */
};
+/*
+ * sqe->flags
+ */
+#define IOSQE_FIXED_BUFFER (1 << 0) /* use fixed buffer */
+
/*
* io_uring_setup() flags
*/
#define IORING_SETUP_IOPOLL (1 << 0) /* io_context is polled */
-#define IORING_SETUP_SQTHREAD (1 << 1) /* Use SQ thread */
-#define IORING_SETUP_SQWQ (1 << 2) /* Use SQ workqueue */
-#define IORING_SETUP_SQPOLL (1 << 3) /* SQ thread polls */
+#define IORING_SETUP_SQPOLL (1 << 1) /* SQ poll thread */
+#define IORING_SETUP_SQ_AFF (1 << 2) /* sq_thread_cpu is valid */
#define IORING_OP_READV 1
#define IORING_OP_WRITEV 2
#define IORING_OP_FSYNC 3
#define IORING_OP_FDSYNC 4
-#define IORING_OP_READ_FIXED 5
-#define IORING_OP_WRITE_FIXED 6
/*
* IO completion data structure (Completion Queue Entry)
struct io_cqring_offsets cq_off;
};
+/*
+ * io_uring_register(2) opcodes and arguments
+ */
+#define IORING_REGISTER_BUFFERS 0
+#define IORING_UNREGISTER_BUFFERS 1
+
+struct io_uring_register_buffers {
+ struct iovec *iovecs;
+ unsigned nr_iovecs;
+};
+
#endif
static int polled = 1; /* use IO polling */
static int fixedbufs = 0; /* use fixed user buffers */
static int buffered = 0; /* use buffered IO, not O_DIRECT */
-static int sq_thread = 0; /* use kernel submission thread */
+static int sq_thread = 0; /* use kernel submission/poller thread */
static int sq_thread_cpu = 0; /* pin above thread to this CPU */
-static int io_uring_setup(unsigned entries, struct iovec *iovecs,
- unsigned nr_iovecs, struct io_uring_params *p)
+static int io_uring_register_buffers(struct submitter *s)
{
- return syscall(__NR_sys_io_uring_setup, entries, iovecs, nr_iovecs, p);
+ struct io_uring_register_buffers reg = {
+ .iovecs = s->iovecs,
+ .nr_iovecs = DEPTH
+ };
+
+ return syscall(__NR_sys_io_uring_register, s->ring_fd,
+ IORING_REGISTER_BUFFERS, ®);
+}
+
+static int io_uring_setup(unsigned entries, struct io_uring_params *p)
+{
+ return syscall(__NR_sys_io_uring_setup, entries, p);
}
static int io_uring_enter(struct submitter *s, unsigned int to_submit,
lrand48_r(&s->rand, &r);
offset = (r % (f->max_blocks - 1)) * BS;
+ sqe->flags = 0;
+ sqe->opcode = IORING_OP_READV;
if (fixedbufs) {
- sqe->opcode = IORING_OP_READ_FIXED;
sqe->addr = s->iovecs[index].iov_base;
sqe->len = BS;
- sqe->index = index;
+ sqe->buf_index = index;
+ sqe->flags |= IOSQE_FIXED_BUFFER;
} else {
- sqe->opcode = IORING_OP_READV;
sqe->addr = &s->iovecs[index];
sqe->len = 1;
+ sqe->buf_index = 0;
}
- sqe->flags = 0;
sqe->ioprio = 0;
sqe->fd = f->fd;
sqe->off = offset;
struct io_sq_ring *sring = &s->sq_ring;
struct io_cq_ring *cring = &s->cq_ring;
struct io_uring_params p;
+ int ret, fd;
void *ptr;
- int fd;
memset(&p, 0, sizeof(p));
if (polled)
p.flags |= IORING_SETUP_IOPOLL;
- if (buffered)
- p.flags |= IORING_SETUP_SQWQ;
- else if (sq_thread) {
- p.flags |= IORING_SETUP_SQTHREAD;
+ if (sq_thread) {
+ p.flags |= IORING_SETUP_SQPOLL;
p.sq_thread_cpu = sq_thread_cpu;
}
- if (fixedbufs)
- fd = io_uring_setup(DEPTH, s->iovecs, DEPTH, &p);
- else
- fd = io_uring_setup(DEPTH, NULL, 0, &p);
+ fd = io_uring_setup(DEPTH, &p);
if (fd < 0) {
perror("io_uring_setup");
return 1;
}
-
s->ring_fd = fd;
+
+ if (fixedbufs) {
+ ret = io_uring_register_buffers(s);
+ if (ret < 0) {
+ perror("io_uring_register");
+ return 1;
+ }
+ }
+
ptr = mmap(0, p.sq_off.array + p.sq_entries * sizeof(__u32),
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
IORING_OFF_SQ_RING);