X-Git-Url: https://git.kernel.dk/?p=fio.git;a=blobdiff_plain;f=engines%2Faioring.c;h=4b410cf7b19446c68d94a1093a73de7cae7143d9;hp=cb13b4158b2bc117e995b770e2c8bf21a87cc657;hb=9a2d78b316f18847a007809c5c57737978b73cc3;hpb=771c99012e26af0dc2a0b7e0762e5097534144bd diff --git a/engines/aioring.c b/engines/aioring.c index cb13b415..4b410cf7 100644 --- a/engines/aioring.c +++ b/engines/aioring.c @@ -17,21 +17,18 @@ #include "../lib/pow2.h" #include "../optgroup.h" #include "../lib/memalign.h" +#include "../lib/fls.h" #ifdef ARCH_HAVE_AIORING -#ifndef IOCB_FLAG_HIPRI -#define IOCB_FLAG_HIPRI (1 << 2) -#endif - /* - * io_setup2(2) flags + * io_uring_setup(2) flags */ -#ifndef IOCTX_FLAG_IOPOLL -#define IOCTX_FLAG_IOPOLL (1 << 0) -#endif #ifndef IOCTX_FLAG_SCQRING -#define IOCTX_FLAG_SCQRING (1 << 1) +#define IOCTX_FLAG_SCQRING (1 << 0) +#endif +#ifndef IOCTX_FLAG_IOPOLL +#define IOCTX_FLAG_IOPOLL (1 << 1) #endif #ifndef IOCTX_FLAG_FIXEDBUFS #define IOCTX_FLAG_FIXEDBUFS (1 << 2) @@ -46,60 +43,89 @@ #define IOCTX_FLAG_SQPOLL (1 << 5) #endif +#define IORING_OFF_SQ_RING 0ULL +#define IORING_OFF_CQ_RING 0x8000000ULL +#define IORING_OFF_IOCB 0x10000000ULL /* - * io_ring_enter(2) flags + * io_uring_enter(2) flags */ -#ifndef IORING_FLAG_SUBMIT -#define IORING_FLAG_SUBMIT (1 << 0) -#endif -#ifndef IORING_FLAG_GETEVENTS -#define IORING_FLAG_GETEVENTS (1 << 1) +#ifndef IORING_ENTER_GETEVENTS +#define IORING_ENTER_GETEVENTS (1 << 0) #endif typedef uint64_t u64; typedef uint32_t u32; typedef uint16_t u16; +#define IORING_SQ_NEED_WAKEUP (1 << 0) + +#define IOEV_RES2_CACHEHIT (1 << 0) + +struct aio_uring_offsets { + u32 head; + u32 tail; + u32 ring_mask; + u32 ring_entries; + u32 flags; + u32 elems; +}; + +struct aio_uring_params { + u32 sq_entries; + u32 cq_entries; + u32 flags; + u16 sq_thread_cpu; + u16 resv[9]; + struct aio_uring_offsets sq_off; + struct aio_uring_offsets cq_off; +}; + struct aio_sq_ring { - union { - struct { - u32 head; - u32 tail; - u32 nr_events; - u16 sq_thread_cpu; - u64 iocbs; - }; - u32 pad[16]; - }; - u32 array[0]; + u32 *head; + u32 *tail; + u32 *ring_mask; + u32 *ring_entries; + u32 *flags; + u32 *array; }; struct aio_cq_ring { - union { - struct { - u32 head; - u32 tail; - u32 nr_events; - }; - struct io_event pad; - }; - struct io_event events[0]; + u32 *head; + u32 *tail; + u32 *ring_mask; + u32 *ring_entries; + struct io_event *events; +}; + +struct aioring_mmap { + void *ptr; + size_t len; }; struct aioring_data { - io_context_t aio_ctx; + int ring_fd; + struct io_u **io_us; struct io_u **io_u_index; - struct aio_sq_ring *sq_ring; + struct aio_sq_ring sq_ring; struct iocb *iocbs; + struct iovec *iovecs; + unsigned sq_ring_mask; - struct aio_cq_ring *cq_ring; + struct aio_cq_ring cq_ring; struct io_event *events; + unsigned cq_ring_mask; int queued; int cq_ring_off; + unsigned iodepth; + + uint64_t cachehit; + uint64_t cachemiss; + + struct aioring_mmap mmap[3]; }; struct aioring_options { @@ -173,42 +199,32 @@ static struct fio_option options[] = { }, }; -static int io_ring_enter(io_context_t ctx, unsigned int to_submit, +static int io_uring_enter(struct aioring_data *ld, unsigned int to_submit, unsigned int min_complete, unsigned int flags) { - return syscall(__NR_sys_io_ring_enter, ctx, to_submit, min_complete, - flags); + return syscall(__NR_sys_io_uring_enter, ld->ring_fd, to_submit, + min_complete, flags); } static int fio_aioring_prep(struct thread_data *td, struct io_u *io_u) { struct aioring_data *ld = td->io_ops_data; struct fio_file *f = io_u->file; - struct aioring_options *o = td->eo; struct iocb *iocb; iocb = &ld->iocbs[io_u->index]; - if (io_u->ddir == DDIR_READ) { - if (o->fixedbufs) { - iocb->aio_fildes = f->fd; + if (io_u->ddir == DDIR_READ || io_u->ddir == DDIR_WRITE) { + if (io_u->ddir == DDIR_READ) iocb->aio_lio_opcode = IO_CMD_PREAD; - iocb->u.c.offset = io_u->offset; - } else { - io_prep_pread(iocb, f->fd, io_u->xfer_buf, io_u->xfer_buflen, io_u->offset); - if (o->hipri) - iocb->u.c.flags |= IOCB_FLAG_HIPRI; - } - } else if (io_u->ddir == DDIR_WRITE) { - if (o->fixedbufs) { - iocb->aio_fildes = f->fd; + else iocb->aio_lio_opcode = IO_CMD_PWRITE; - iocb->u.c.offset = io_u->offset; - } else { - io_prep_pwrite(iocb, f->fd, io_u->xfer_buf, io_u->xfer_buflen, io_u->offset); - if (o->hipri) - iocb->u.c.flags |= IOCB_FLAG_HIPRI; - } + iocb->aio_reqprio = 0; + iocb->aio_fildes = f->fd; + iocb->u.c.buf = io_u->xfer_buf; + iocb->u.c.nbytes = io_u->xfer_buflen; + iocb->u.c.offset = io_u->offset; + iocb->u.c.flags = 0; } else if (ddir_sync(io_u->ddir)) io_prep_fsync(iocb, f->fd); @@ -221,13 +237,11 @@ static struct io_u *fio_aioring_event(struct thread_data *td, int event) struct aioring_data *ld = td->io_ops_data; struct io_event *ev; struct io_u *io_u; - int index; + unsigned index; - index = event + ld->cq_ring_off; - if (index >= ld->cq_ring->nr_events) - index -= ld->cq_ring->nr_events; + index = (event + ld->cq_ring_off) & ld->cq_ring_mask; - ev = &ld->cq_ring->events[index]; + ev = &ld->cq_ring.events[index]; io_u = ev->data; if (ev->res != io_u->xfer_buflen) { @@ -238,6 +252,13 @@ static struct io_u *fio_aioring_event(struct thread_data *td, int event) } else io_u->error = 0; + if (io_u->ddir == DDIR_READ) { + if (ev->res2 & IOEV_RES2_CACHEHIT) + ld->cachehit++; + else + ld->cachemiss++; + } + return io_u; } @@ -245,21 +266,19 @@ static int fio_aioring_cqring_reap(struct thread_data *td, unsigned int events, unsigned int max) { struct aioring_data *ld = td->io_ops_data; - struct aio_cq_ring *ring = ld->cq_ring; + struct aio_cq_ring *ring = &ld->cq_ring; u32 head, reaped = 0; - head = ring->head; + head = *ring->head; do { read_barrier(); - if (head == ring->tail) + if (head == *ring->tail) break; reaped++; head++; - if (head == ring->nr_events) - head = 0; } while (reaped + events < max); - ring->head = head; + *ring->head = head; write_barrier(); return reaped; } @@ -270,10 +289,11 @@ static int fio_aioring_getevents(struct thread_data *td, unsigned int min, struct aioring_data *ld = td->io_ops_data; unsigned actual_min = td->o.iodepth_batch_complete_min == 0 ? 0 : min; struct aioring_options *o = td->eo; - struct aio_cq_ring *ring = ld->cq_ring; - int r, events = 0; + struct aio_cq_ring *ring = &ld->cq_ring; + unsigned events = 0; + int r; - ld->cq_ring_off = ring->head; + ld->cq_ring_off = *ring->head; do { r = fio_aioring_cqring_reap(td, events, max); if (r) { @@ -282,12 +302,12 @@ static int fio_aioring_getevents(struct thread_data *td, unsigned int min, } if (!o->sqthread_poll) { - r = io_ring_enter(ld->aio_ctx, 0, actual_min, - IORING_FLAG_GETEVENTS); + r = io_uring_enter(ld, 0, actual_min, + IORING_ENTER_GETEVENTS); if (r < 0) { if (errno == EAGAIN) continue; - td_verror(td, errno, "io_ring_enter get"); + td_verror(td, errno, "io_uring_enter"); break; } } @@ -300,12 +320,12 @@ static enum fio_q_status fio_aioring_queue(struct thread_data *td, struct io_u *io_u) { struct aioring_data *ld = td->io_ops_data; - struct aio_sq_ring *ring = ld->sq_ring; + struct aio_sq_ring *ring = &ld->sq_ring; unsigned tail, next_tail; fio_ro_check(td, io_u); - if (ld->queued == td->o.iodepth) + if (ld->queued == ld->iodepth) return FIO_Q_BUSY; if (io_u->ddir == DDIR_TRIM) { @@ -318,16 +338,14 @@ static enum fio_q_status fio_aioring_queue(struct thread_data *td, return FIO_Q_COMPLETED; } - tail = ring->tail; + tail = *ring->tail; next_tail = tail + 1; - if (next_tail == ring->nr_events) - next_tail = 0; read_barrier(); - if (next_tail == ring->head) + if (next_tail == *ring->head) return FIO_Q_BUSY; - ring->array[tail] = io_u->index; - ring->tail = next_tail; + ring->array[tail & ld->sq_ring_mask] = io_u->index; + *ring->tail = next_tail; write_barrier(); ld->queued++; @@ -345,15 +363,14 @@ static void fio_aioring_queued(struct thread_data *td, int start, int nr) fio_gettime(&now, NULL); while (nr--) { - int index = ld->sq_ring->array[start]; - struct io_u *io_u = io_u = ld->io_u_index[index]; + struct aio_sq_ring *ring = &ld->sq_ring; + int index = ring->array[start & ld->sq_ring_mask]; + struct io_u *io_u = ld->io_u_index[index]; memcpy(&io_u->issue_time, &now, sizeof(now)); io_u_queued(td, io_u); start++; - if (start == ld->sq_ring->nr_events) - start = 0; } } @@ -368,16 +385,19 @@ static int fio_aioring_commit(struct thread_data *td) /* Nothing to do */ if (o->sqthread_poll) { + struct aio_sq_ring *ring = &ld->sq_ring; + + if (*ring->flags & IORING_SQ_NEED_WAKEUP) + io_uring_enter(ld, ld->queued, 0, 0); ld->queued = 0; return 0; } do { - int start = ld->sq_ring->head; + unsigned start = *ld->sq_ring.head; long nr = ld->queued; - ret = io_ring_enter(ld->aio_ctx, nr, 0, IORING_FLAG_SUBMIT | - IORING_FLAG_GETEVENTS); + ret = io_uring_enter(ld, nr, 0, IORING_ENTER_GETEVENTS); if (ret > 0) { fio_aioring_queued(td, start, ret); io_u_mark_submit(td, ret); @@ -396,7 +416,7 @@ static int fio_aioring_commit(struct thread_data *td) usleep(1); continue; } - td_verror(td, errno, "io_ring_enter sumit"); + td_verror(td, errno, "io_uring_enter submit"); break; } } while (ld->queued); @@ -404,19 +424,13 @@ static int fio_aioring_commit(struct thread_data *td) return ret; } -static size_t aioring_cq_size(struct thread_data *td) -{ - return sizeof(struct aio_cq_ring) + 2 * td->o.iodepth * sizeof(struct io_event); -} - -static size_t aioring_sq_iocb(struct thread_data *td) +static void fio_aioring_unmap(struct aioring_data *ld) { - return sizeof(struct iocb) * td->o.iodepth; -} + int i; -static size_t aioring_sq_size(struct thread_data *td) -{ - return sizeof(struct aio_sq_ring) + td->o.iodepth * sizeof(u32); + for (i = 0; i < ARRAY_SIZE(ld->mmap); i++) + munmap(ld->mmap[i].ptr, ld->mmap[i].len); + close(ld->ring_fd); } static void fio_aioring_cleanup(struct thread_data *td) @@ -424,8 +438,8 @@ static void fio_aioring_cleanup(struct thread_data *td) struct aioring_data *ld = td->io_ops_data; if (ld) { - /* Bump depth to match init depth */ - td->o.iodepth++; + td->ts.cachehit += ld->cachehit; + td->ts.cachemiss += ld->cachemiss; /* * Work-around to avoid huge RCU stalls at exit time. If we @@ -434,32 +448,76 @@ static void fio_aioring_cleanup(struct thread_data *td) * speeding it up a lot. */ if (!(td->flags & TD_F_CHILD)) - io_destroy(ld->aio_ctx); + fio_aioring_unmap(ld); + free(ld->io_u_index); free(ld->io_us); - fio_memfree(ld->sq_ring, aioring_sq_size(td), false); - fio_memfree(ld->iocbs, aioring_sq_iocb(td), false); - fio_memfree(ld->cq_ring, aioring_cq_size(td), false); + free(ld->iovecs); free(ld); } } +static int fio_aioring_mmap(struct aioring_data *ld, struct aio_uring_params *p) +{ + struct aio_sq_ring *sring = &ld->sq_ring; + struct aio_cq_ring *cring = &ld->cq_ring; + void *ptr; + + ld->mmap[0].len = p->sq_off.elems + p->sq_entries * sizeof(u32); + ptr = mmap(0, ld->mmap[0].len, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, ld->ring_fd, + IORING_OFF_SQ_RING); + ld->mmap[0].ptr = ptr; + sring->head = ptr + p->sq_off.head; + sring->tail = ptr + p->sq_off.tail; + sring->ring_mask = ptr + p->sq_off.ring_mask; + sring->ring_entries = ptr + p->sq_off.ring_entries; + sring->flags = ptr + p->sq_off.flags; + sring->array = ptr + p->sq_off.elems; + ld->sq_ring_mask = *sring->ring_mask; + + ld->mmap[1].len = p->sq_entries * sizeof(struct iocb); + ld->iocbs = mmap(0, ld->mmap[1].len, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, ld->ring_fd, + IORING_OFF_IOCB); + ld->mmap[1].ptr = ld->iocbs; + + ld->mmap[2].len = p->cq_off.elems + + p->cq_entries * sizeof(struct io_event); + ptr = mmap(0, ld->mmap[2].len, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, ld->ring_fd, + IORING_OFF_CQ_RING); + ld->mmap[2].ptr = ptr; + cring->head = ptr + p->cq_off.head; + cring->tail = ptr + p->cq_off.tail; + cring->ring_mask = ptr + p->cq_off.ring_mask; + cring->ring_entries = ptr + p->cq_off.ring_entries; + cring->events = ptr + p->cq_off.elems; + ld->cq_ring_mask = *cring->ring_mask; + return 0; +} + static int fio_aioring_queue_init(struct thread_data *td) { struct aioring_data *ld = td->io_ops_data; struct aioring_options *o = td->eo; - int flags = IOCTX_FLAG_SCQRING; int depth = td->o.iodepth; + struct aio_uring_params p; + int ret; + + memset(&p, 0, sizeof(p)); + p.flags = IOCTX_FLAG_SCQRING; if (o->hipri) - flags |= IOCTX_FLAG_IOPOLL; + p.flags |= IOCTX_FLAG_IOPOLL; if (o->sqthread_set) { - ld->sq_ring->sq_thread_cpu = o->sqthread; - flags |= IOCTX_FLAG_SQTHREAD; + p.sq_thread_cpu = o->sqthread; + p.flags |= IOCTX_FLAG_SQTHREAD; if (o->sqthread_poll) - flags |= IOCTX_FLAG_SQPOLL; - } else if (o->sqwq) - flags |= IOCTX_FLAG_SQWQ; + p.flags |= IOCTX_FLAG_SQPOLL; + } + if (o->sqwq) + p.flags |= IOCTX_FLAG_SQWQ; if (o->fixedbufs) { struct rlimit rlim = { @@ -468,11 +526,15 @@ static int fio_aioring_queue_init(struct thread_data *td) }; setrlimit(RLIMIT_MEMLOCK, &rlim); - flags |= IOCTX_FLAG_FIXEDBUFS; + p.flags |= IOCTX_FLAG_FIXEDBUFS; } - return syscall(__NR_sys_io_setup2, depth, flags, - ld->sq_ring, ld->cq_ring, &ld->aio_ctx); + ret = syscall(__NR_sys_io_uring_setup, depth, ld->iovecs, &p); + if (ret < 0) + return ret; + + ld->ring_fd = ret; + return fio_aioring_mmap(ld, &p); } static int fio_aioring_post_init(struct thread_data *td) @@ -480,64 +542,49 @@ static int fio_aioring_post_init(struct thread_data *td) struct aioring_data *ld = td->io_ops_data; struct aioring_options *o = td->eo; struct io_u *io_u; - struct iocb *iocb; - int err = 0; + int err; if (o->fixedbufs) { int i; for (i = 0; i < td->o.iodepth; i++) { - io_u = ld->io_u_index[i]; - iocb = &ld->iocbs[i]; - iocb->u.c.buf = io_u->buf; - iocb->u.c.nbytes = td_max_bs(td); + struct iovec *iov = &ld->iovecs[i]; - if (o->hipri) - iocb->u.c.flags |= IOCB_FLAG_HIPRI; + io_u = ld->io_u_index[i]; + iov->iov_base = io_u->buf; + iov->iov_len = td_max_bs(td); } } err = fio_aioring_queue_init(td); if (err) { - td_verror(td, -err, "io_queue_init"); + td_verror(td, errno, "io_queue_init"); return 1; } - /* Adjust depth back again */ - td->o.iodepth--; return 0; } +static unsigned roundup_pow2(unsigned depth) +{ + return 1UL << __fls(depth - 1); +} + static int fio_aioring_init(struct thread_data *td) { - struct aioring_options *o = td->eo; struct aioring_data *ld; - if (o->sqthread_set && o->sqwq) { - log_err("fio: aioring sqthread and sqwq are mutually exclusive\n"); - return 1; - } - - /* ring needs an extra entry, add one to achieve QD set */ - td->o.iodepth++; - ld = calloc(1, sizeof(*ld)); + /* ring depth must be a power-of-2 */ + ld->iodepth = td->o.iodepth; + td->o.iodepth = roundup_pow2(td->o.iodepth); + /* io_u index */ ld->io_u_index = calloc(td->o.iodepth, sizeof(struct io_u *)); ld->io_us = calloc(td->o.iodepth, sizeof(struct io_u *)); - ld->iocbs = fio_memalign(page_size, aioring_sq_iocb(td), false); - memset(ld->iocbs, 0, aioring_sq_iocb(td)); - - ld->sq_ring = fio_memalign(page_size, aioring_sq_size(td), false); - memset(ld->sq_ring, 0, aioring_sq_size(td)); - ld->sq_ring->nr_events = td->o.iodepth; - ld->sq_ring->iocbs = (u64) (uintptr_t) ld->iocbs; - - ld->cq_ring = fio_memalign(page_size, aioring_cq_size(td), false); - memset(ld->cq_ring, 0, aioring_cq_size(td)); - ld->cq_ring->nr_events = td->o.iodepth * 2; + ld->iovecs = calloc(td->o.iodepth, sizeof(struct iovec)); td->io_ops_data = ld; return 0;