unsigned *tail;
unsigned *ring_mask;
unsigned *ring_entries;
- struct io_uring_event *events;
+ struct io_uring_cqe *cqes;
};
struct ioring_mmap {
struct io_u **io_u_index;
struct io_sq_ring sq_ring;
- struct io_uring_iocb *iocbs;
+ struct io_uring_sqe *sqes;
struct iovec *iovecs;
unsigned sq_ring_mask;
struct ioring_data *ld = td->io_ops_data;
struct ioring_options *o = td->eo;
struct fio_file *f = io_u->file;
- struct io_uring_iocb *iocb;
+ struct io_uring_sqe *sqe;
- iocb = &ld->iocbs[io_u->index];
- iocb->fd = f->fd;
- iocb->flags = 0;
- iocb->ioprio = 0;
+ sqe = &ld->sqes[io_u->index];
+ sqe->fd = f->fd;
+ sqe->flags = 0;
+ sqe->ioprio = 0;
if (io_u->ddir == DDIR_READ || io_u->ddir == DDIR_WRITE) {
- if (io_u->ddir == DDIR_READ) {
- if (o->fixedbufs)
- iocb->opcode = IORING_OP_READ_FIXED;
+ if (o->fixedbufs) {
+ if (io_u->ddir == DDIR_READ)
+ sqe->opcode = IORING_OP_READ_FIXED;
else
- iocb->opcode = IORING_OP_READ;
+ sqe->opcode = IORING_OP_WRITE_FIXED;
+ sqe->addr = io_u->xfer_buf;
+ sqe->len = io_u->xfer_buflen;
} else {
- if (o->fixedbufs)
- iocb->opcode = IORING_OP_WRITE_FIXED;
+ if (io_u->ddir == DDIR_READ)
+ sqe->opcode = IORING_OP_READV;
else
- iocb->opcode = IORING_OP_WRITE;
+ sqe->opcode = IORING_OP_WRITEV;
+ sqe->addr = &ld->iovecs[io_u->index];
+ sqe->len = 1;
}
- iocb->off = io_u->offset;
- iocb->addr = io_u->xfer_buf;
- iocb->len = io_u->xfer_buflen;
+ sqe->off = io_u->offset;
} else if (ddir_sync(io_u->ddir))
- iocb->opcode = IORING_OP_FSYNC;
+ sqe->opcode = IORING_OP_FSYNC;
return 0;
}
static struct io_u *fio_ioring_event(struct thread_data *td, int event)
{
struct ioring_data *ld = td->io_ops_data;
- struct io_uring_event *ev;
+ struct io_uring_cqe *cqe;
struct io_u *io_u;
unsigned index;
index = (event + ld->cq_ring_off) & ld->cq_ring_mask;
- ev = &ld->cq_ring.events[index];
- io_u = ld->io_u_index[ev->index];
+ cqe = &ld->cq_ring.cqes[index];
+ io_u = ld->io_u_index[cqe->index];
- if (ev->res != io_u->xfer_buflen) {
- if (ev->res > io_u->xfer_buflen)
- io_u->error = -ev->res;
+ if (cqe->res != io_u->xfer_buflen) {
+ if (cqe->res > io_u->xfer_buflen)
+ io_u->error = -cqe->res;
else
- io_u->resid = io_u->xfer_buflen - ev->res;
+ io_u->resid = io_u->xfer_buflen - cqe->res;
} else
io_u->error = 0;
if (io_u->ddir == DDIR_READ) {
- if (ev->flags & IOEV_FLAG_CACHEHIT)
+ if (cqe->flags & IOCQE_FLAG_CACHEHIT)
ld->cachehit++;
else
ld->cachemiss++;
sring->array = ptr + p->sq_off.array;
ld->sq_ring_mask = *sring->ring_mask;
- ld->mmap[1].len = p->sq_entries * sizeof(struct io_uring_iocb);
- ld->iocbs = mmap(0, ld->mmap[1].len, PROT_READ | PROT_WRITE,
+ ld->mmap[1].len = p->sq_entries * sizeof(struct io_uring_sqe);
+ ld->sqes = mmap(0, ld->mmap[1].len, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, ld->ring_fd,
- IORING_OFF_IOCB);
- ld->mmap[1].ptr = ld->iocbs;
+ IORING_OFF_SQES);
+ ld->mmap[1].ptr = ld->sqes;
- ld->mmap[2].len = p->cq_off.events +
- p->cq_entries * sizeof(struct io_uring_event);
+ ld->mmap[2].len = p->cq_off.cqes +
+ p->cq_entries * sizeof(struct io_uring_cqe);
ptr = mmap(0, ld->mmap[2].len, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, ld->ring_fd,
IORING_OFF_CQ_RING);
cring->tail = ptr + p->cq_off.tail;
cring->ring_mask = ptr + p->cq_off.ring_mask;
cring->ring_entries = ptr + p->cq_off.ring_entries;
- cring->events = ptr + p->cq_off.events;
+ cring->cqes = ptr + p->cq_off.cqes;
ld->cq_ring_mask = *cring->ring_mask;
return 0;
}
};
setrlimit(RLIMIT_MEMLOCK, &rlim);
- p.flags |= IORING_SETUP_FIXEDBUFS;
}
ret = syscall(__NR_sys_io_uring_setup, depth, ld->iovecs, &p);
static int fio_ioring_init(struct thread_data *td)
{
+ struct ioring_options *o = td->eo;
struct ioring_data *ld;
ld = calloc(1, sizeof(*ld));
ld->io_u_index = calloc(td->o.iodepth, sizeof(struct io_u *));
ld->io_us = calloc(td->o.iodepth, sizeof(struct io_u *));
- ld->iovecs = calloc(td->o.iodepth, sizeof(struct iovec));
+ if (o->fixedbufs)
+ ld->iovecs = calloc(td->o.iodepth, sizeof(struct iovec));
td->io_ops_data = ld;
return 0;
#include <linux/types.h>
/*
- * IO submission data structure
+ * IO submission data structure (Submission Queue Entry)
*/
-struct io_uring_iocb {
+struct io_uring_sqe {
__u8 opcode;
__u8 flags;
__u16 ioprio;
* io_uring_setup() flags
*/
#define IORING_SETUP_IOPOLL (1 << 0) /* io_context is polled */
-#define IORING_SETUP_FIXEDBUFS (1 << 1) /* IO buffers are fixed */
-#define IORING_SETUP_SQTHREAD (1 << 2) /* Use SQ thread */
-#define IORING_SETUP_SQWQ (1 << 3) /* Use SQ workqueue */
-#define IORING_SETUP_SQPOLL (1 << 4) /* SQ thread polls */
+#define IORING_SETUP_SQTHREAD (1 << 1) /* Use SQ thread */
+#define IORING_SETUP_SQWQ (1 << 2) /* Use SQ workqueue */
+#define IORING_SETUP_SQPOLL (1 << 3) /* SQ thread polls */
-#define IORING_OP_READ 1
-#define IORING_OP_WRITE 2
+#define IORING_OP_READV 1
+#define IORING_OP_WRITEV 2
#define IORING_OP_FSYNC 3
#define IORING_OP_FDSYNC 4
#define IORING_OP_READ_FIXED 5
#define IORING_OP_WRITE_FIXED 6
/*
- * IO completion data structure
+ * IO completion data structure (Completion Queue Entry)
*/
-struct io_uring_event {
- __u64 index; /* what iocb this event came from */
+struct io_uring_cqe {
+ __u64 index; /* what sqe this event came from */
__s32 res; /* result code for this event */
__u32 flags;
};
/*
* io_uring_event->flags
*/
-#define IOEV_FLAG_CACHEHIT (1 << 0) /* IO did not hit media */
+#define IOCQE_FLAG_CACHEHIT (1 << 0) /* IO did not hit media */
/*
* Magic offsets for the application to mmap the data it needs
*/
#define IORING_OFF_SQ_RING 0ULL
#define IORING_OFF_CQ_RING 0x8000000ULL
-#define IORING_OFF_IOCB 0x10000000ULL
+#define IORING_OFF_SQES 0x10000000ULL
/*
* Filled with the offset for mmap(2)
__u32 ring_mask;
__u32 ring_entries;
__u32 overflow;
- __u32 events;
+ __u32 cqes;
__u32 resv[4];
};
unsigned *tail;
unsigned *ring_mask;
unsigned *ring_entries;
- struct io_uring_event *events;
+ struct io_uring_cqe *cqes;
};
#define DEPTH 32
int ring_fd;
struct drand48_data rand;
struct io_sq_ring sq_ring;
- struct io_uring_iocb *iocbs;
+ struct io_uring_sqe *sqes;
struct iovec iovecs[DEPTH];
struct io_cq_ring cq_ring;
int inflight;
static struct submitter submitters[1];
static volatile int finish;
-static int polled = 0; /* use IO polling */
+static int polled = 1; /* use IO polling */
static int fixedbufs = 0; /* use fixed user buffers */
-static int buffered = 1; /* use buffered IO, not O_DIRECT */
+static int buffered = 0; /* use buffered IO, not O_DIRECT */
static int sq_thread = 0; /* use kernel submission thread */
static int sq_thread_cpu = 0; /* pin above thread to this CPU */
static void init_io(struct submitter *s, int fd, unsigned index)
{
- struct io_uring_iocb *iocb = &s->iocbs[index];
+ struct io_uring_sqe *sqe = &s->sqes[index];
unsigned long offset;
long r;
lrand48_r(&s->rand, &r);
offset = (r % (s->max_blocks - 1)) * BS;
- if (fixedbufs)
- iocb->opcode = IORING_OP_READ_FIXED;
- else
- iocb->opcode = IORING_OP_READ;
- iocb->flags = 0;
- iocb->ioprio = 0;
- iocb->fd = fd;
- iocb->off = offset;
- iocb->addr = s->iovecs[index].iov_base;
- iocb->len = BS;
+ if (fixedbufs) {
+ sqe->opcode = IORING_OP_READ_FIXED;
+ sqe->addr = s->iovecs[index].iov_base;
+ sqe->len = BS;
+ } else {
+ sqe->opcode = IORING_OP_READV;
+ sqe->addr = &s->iovecs[index];
+ sqe->len = 1;
+ }
+ sqe->flags = 0;
+ sqe->ioprio = 0;
+ sqe->fd = fd;
+ sqe->off = offset;
}
static int prep_more_ios(struct submitter *s, int fd, int max_ios)
} while (prepped < max_ios);
if (*ring->tail != tail) {
- /* order tail store with writes to iocbs above */
+ /* order tail store with writes to sqes above */
barrier();
*ring->tail = tail;
barrier();
static int reap_events(struct submitter *s)
{
struct io_cq_ring *ring = &s->cq_ring;
- struct io_uring_event *ev;
+ struct io_uring_cqe *cqe;
unsigned head, reaped = 0;
head = *ring->head;
barrier();
if (head == *ring->tail)
break;
- ev = &ring->events[head & cq_ring_mask];
- if (ev->res != BS) {
- struct io_uring_iocb *iocb = &s->iocbs[ev->index];
+ cqe = &ring->cqes[head & cq_ring_mask];
+ if (cqe->res != BS) {
+ struct io_uring_sqe *sqe = &s->sqes[cqe->index];
- printf("io: unexpected ret=%d\n", ev->res);
+ printf("io: unexpected ret=%d\n", cqe->res);
printf("offset=%lu, size=%lu\n",
- (unsigned long) iocb->off,
- (unsigned long) iocb->len);
+ (unsigned long) sqe->off,
+ (unsigned long) sqe->len);
return -1;
}
- if (ev->flags & IOEV_FLAG_CACHEHIT)
+ if (cqe->flags & IOCQE_FLAG_CACHEHIT)
s->cachehit++;
else
s->cachemiss++;
if (polled)
p.flags |= IORING_SETUP_IOPOLL;
- if (fixedbufs)
- p.flags |= IORING_SETUP_FIXEDBUFS;
if (buffered)
p.flags |= IORING_SETUP_SQWQ;
else if (sq_thread) {
sring->array = ptr + p.sq_off.array;
sq_ring_mask = *sring->ring_mask;
- s->iocbs = mmap(0, p.sq_entries * sizeof(struct io_uring_iocb),
+ s->sqes = mmap(0, p.sq_entries * sizeof(struct io_uring_sqe),
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
- IORING_OFF_IOCB);
- printf("iocbs ptr = 0x%p\n", s->iocbs);
+ IORING_OFF_SQES);
+ printf("sqes ptr = 0x%p\n", s->sqes);
- ptr = mmap(0, p.cq_off.events + p.cq_entries * sizeof(struct io_uring_event),
+ ptr = mmap(0, p.cq_off.cqes + p.cq_entries * sizeof(struct io_uring_cqe),
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
IORING_OFF_CQ_RING);
printf("cq_ring ptr = 0x%p\n", ptr);
cring->tail = ptr + p.cq_off.tail;
cring->ring_mask = ptr + p.cq_off.ring_mask;
cring->ring_entries = ptr + p.cq_off.ring_entries;
- cring->events = ptr + p.cq_off.events;
+ cring->cqes = ptr + p.cq_off.cqes;
cq_ring_mask = *cring->ring_mask;
return 0;
}