t/aio-ring: cleanup the code a bit
[fio.git] / t / aio-ring.c
index c813c4e7f1de1283c49cd0dc80c19b3bb9274bea..1a4fe44b75a9feae40f152540ce952e289d9e111 100644 (file)
@@ -14,6 +14,7 @@
 #include <sys/ioctl.h>
 #include <sys/syscall.h>
 #include <sys/resource.h>
+#include <sys/mman.h>
 #include <linux/fs.h>
 #include <fcntl.h>
 #include <unistd.h>
 #include <pthread.h>
 #include <sched.h>
 
-#define IOCB_FLAG_HIPRI                (1 << 2)
+#include "../arch/arch.h"
 
-#define IOCTX_FLAG_IOPOLL      (1 << 0)
-#define IOCTX_FLAG_SCQRING     (1 << 1)        /* Use SQ/CQ rings */
+#define IOCTX_FLAG_SCQRING     (1 << 0)        /* Use SQ/CQ rings */
+#define IOCTX_FLAG_IOPOLL      (1 << 1)
 #define IOCTX_FLAG_FIXEDBUFS   (1 << 2)
 #define IOCTX_FLAG_SQTHREAD    (1 << 3)        /* Use SQ thread */
 #define IOCTX_FLAG_SQWQ                (1 << 4)        /* Use SQ wq */
+#define IOCTX_FLAG_SQPOLL      (1 << 5)
 
 #define IOEV_RES2_CACHEHIT     (1 << 0)
 
@@ -40,51 +42,77 @@ typedef uint64_t u64;
 typedef uint32_t u32;
 typedef uint16_t u16;
 
+#define IORING_OFF_SQ_RING     0ULL
+#define IORING_OFF_CQ_RING     0x8000000ULL
+#define IORING_OFF_IOCB                0x10000000ULL
+
+struct aio_sqring_offsets {
+       u32 head;
+       u32 tail;
+       u32 ring_mask;
+       u32 ring_entries;
+       u32 flags;
+       u32 dropped;
+       u32 array;
+       u32 resv[3];
+};
+
+struct aio_cqring_offsets {
+       u32 head;
+       u32 tail;
+       u32 ring_mask;
+       u32 ring_entries;
+       u32 overflow;
+       u32 events;
+       u32 resv[4];
+};
+
+struct aio_uring_params {
+       u32 sq_entries;
+       u32 cq_entries;
+       u32 flags;
+       u16 sq_thread_cpu;
+       u16 resv[9];
+       struct aio_sqring_offsets sq_off;
+       struct aio_cqring_offsets cq_off;
+};
+
 struct aio_sq_ring {
-       union {
-               struct {
-                       u32 head;
-                       u32 tail;
-                       u32 nr_events;
-                       u16 sq_thread_cpu;
-                       u64 iocbs;
-               };
-               u32 pad[16];
-       };
-       u32 array[0];
+       u32 *head;
+       u32 *tail;
+       u32 *ring_mask;
+       u32 *ring_entries;
+       u32 *array;
 };
 
 struct aio_cq_ring {
-       union {
-               struct {
-                       u32 head;
-                       u32 tail;
-                       u32 nr_events;
-               };
-               struct io_event pad;
-       };
-       struct io_event events[0];
+       u32 *head;
+       u32 *tail;
+       u32 *ring_mask;
+       u32 *ring_entries;
+       struct io_event *events;
 };
 
-#define IORING_FLAG_SUBMIT     (1 << 0)
-#define IORING_FLAG_GETEVENTS  (1 << 1)
+#define IORING_ENTER_GETEVENTS (1 << 0)
 
 #define DEPTH                  32
-#define RING_SIZE              (DEPTH + 1)
 
 #define BATCH_SUBMIT           8
 #define BATCH_COMPLETE         8
 
 #define BS                     4096
 
+static unsigned sq_ring_mask, cq_ring_mask;
+
 struct submitter {
        pthread_t thread;
        unsigned long max_blocks;
-       io_context_t ioc;
+       int ring_fd;
        struct drand48_data rand;
-       struct aio_sq_ring *sq_ring;
+       struct aio_sq_ring sq_ring;
        struct iocb *iocbs;
-       struct aio_cq_ring *cq_ring;
+       struct iovec iovecs[DEPTH];
+       struct aio_cq_ring cq_ring;
        int inflight;
        unsigned long reaps;
        unsigned long done;
@@ -97,23 +125,23 @@ struct submitter {
 static struct submitter submitters[1];
 static volatile int finish;
 
-static int polled = 1;         /* use IO polling */
-static int fixedbufs = 1;      /* use fixed user buffers */
-static int buffered = 0;       /* use buffered IO, not O_DIRECT */
+static int polled = 0;         /* use IO polling */
+static int fixedbufs = 0;      /* use fixed user buffers */
+static int buffered = 1;       /* use buffered IO, not O_DIRECT */
 static int sq_thread = 0;      /* use kernel submission thread */
 static int sq_thread_cpu = 0;  /* pin above thread to this CPU */
 
-static int io_setup2(unsigned int nr_events, unsigned int flags,
-                    struct aio_sq_ring *sq_ring, struct aio_cq_ring *cq_ring,
-                    io_context_t *ctx_idp)
+static int io_uring_setup(unsigned entries, struct iovec *iovecs,
+                         struct aio_uring_params *p)
 {
-       return syscall(335, nr_events, flags, sq_ring, cq_ring, ctx_idp);
+       return syscall(__NR_sys_io_uring_setup, entries, iovecs, p);
 }
 
-static int io_ring_enter(io_context_t ctx, unsigned int to_submit,
-                        unsigned int min_complete, unsigned int flags)
+static int io_uring_enter(struct submitter *s, unsigned int to_submit,
+                         unsigned int min_complete, unsigned int flags)
 {
-       return syscall(336, ctx, to_submit, min_complete, flags);
+       return syscall(__NR_sys_io_uring_enter, s->ring_fd, to_submit,
+                       min_complete, flags);
 }
 
 static int gettid(void)
@@ -121,8 +149,9 @@ static int gettid(void)
        return syscall(__NR_gettid);
 }
 
-static void init_io(struct submitter *s, int fd, struct iocb *iocb)
+static void init_io(struct submitter *s, int fd, unsigned index)
 {
+       struct iocb *iocb = &s->iocbs[index];
        unsigned long offset;
        long r;
 
@@ -131,38 +160,34 @@ static void init_io(struct submitter *s, int fd, struct iocb *iocb)
 
        iocb->aio_fildes = fd;
        iocb->aio_lio_opcode = IO_CMD_PREAD;
+       iocb->u.c.buf = s->iovecs[index].iov_base;
+       iocb->u.c.nbytes = BS;
        iocb->u.c.offset = offset;
-       if (polled)
-               iocb->u.c.flags = IOCB_FLAG_HIPRI;
-       if (!fixedbufs)
-               iocb->u.c.nbytes = BS;
 }
 
 static int prep_more_ios(struct submitter *s, int fd, int max_ios)
 {
-       struct aio_sq_ring *ring = s->sq_ring;
-       u32 tail, next_tail, prepped = 0;
+       struct aio_sq_ring *ring = &s->sq_ring;
+       u32 index, tail, next_tail, prepped = 0;
 
-       next_tail = tail = ring->tail;
+       next_tail = tail = *ring->tail;
        do {
                next_tail++;
-               if (next_tail == ring->nr_events)
-                       next_tail = 0;
-
                barrier();
-               if (next_tail == ring->head)
+               if (next_tail == *ring->head)
                        break;
 
-               init_io(s, fd, &s->iocbs[tail]);
-               s->sq_ring->array[tail] = tail;
+               index = tail & sq_ring_mask;
+               init_io(s, fd, index);
+               ring->array[index] = index;
                prepped++;
                tail = next_tail;
        } while (prepped < max_ios);
 
-       if (ring->tail != tail) {
+       if (*ring->tail != tail) {
                /* order tail store with writes to iocbs above */
                barrier();
-               ring->tail = tail;
+               *ring->tail = tail;
                barrier();
        }
        return prepped;
@@ -192,21 +217,23 @@ static int get_file_size(int fd, unsigned long *blocks)
 
 static int reap_events(struct submitter *s)
 {
-       struct aio_cq_ring *ring = s->cq_ring;
+       struct aio_cq_ring *ring = &s->cq_ring;
        struct io_event *ev;
        u32 head, reaped = 0;
 
-       head = ring->head;
+       head = *ring->head;
        do {
                barrier();
-               if (head == ring->tail)
+               if (head == *ring->tail)
                        break;
-               ev = &ring->events[head];
+               ev = &ring->events[head & cq_ring_mask];
                if (ev->res != BS) {
                        struct iocb *iocb = ev->obj;
 
                        printf("io: unexpected ret=%ld\n", ev->res);
-                       printf("offset=%lu, size=%lu\n", (unsigned long) iocb->u.c.offset, (unsigned long) iocb->u.c.nbytes);
+                       printf("offset=%lu, size=%lu\n",
+                                       (unsigned long) iocb->u.c.offset,
+                                       (unsigned long) iocb->u.c.nbytes);
                        return -1;
                }
                if (ev->res2 & IOEV_RES2_CACHEHIT)
@@ -215,12 +242,10 @@ static int reap_events(struct submitter *s)
                        s->cachemiss++;
                reaped++;
                head++;
-               if (head == ring->nr_events)
-                       head = 0;
        } while (1);
 
        s->inflight -= reaped;
-       ring->head = head;
+       *ring->head = head;
        barrier();
        return reaped;
 }
@@ -245,21 +270,22 @@ static void *submitter_fn(void *data)
                printf("failed getting size of device/file\n");
                goto err;
        }
-       if (!s->max_blocks) {
+       if (s->max_blocks <= 1) {
                printf("Zero file/device size?\n");
                goto err;
        }
-
        s->max_blocks--;
 
        srand48_r(pthread_self(), &s->rand);
 
        prepped = 0;
        do {
-               int to_wait, flags, to_submit, this_reap;
+               int to_wait, to_submit, this_reap, to_prep;
 
-               if (!prepped && s->inflight < DEPTH)
-                       prepped = prep_more_ios(s, fd, min(DEPTH - s->inflight, BATCH_SUBMIT));
+               if (!prepped && s->inflight < DEPTH) {
+                       to_prep = min(DEPTH - s->inflight, BATCH_SUBMIT);
+                       prepped = prep_more_ios(s, fd, to_prep);
+               }
                s->inflight += prepped;
 submit_more:
                to_submit = prepped;
@@ -269,11 +295,8 @@ submit:
                else
                        to_wait = min(s->inflight + to_submit, BATCH_COMPLETE);
 
-               flags = IORING_FLAG_GETEVENTS;
-               if (to_submit)
-                       flags |= IORING_FLAG_SUBMIT;
-
-               ret = io_ring_enter(s->ioc, to_submit, to_wait, flags);
+               ret = io_uring_enter(s, to_submit, to_wait,
+                                       IORING_ENTER_GETEVENTS);
                s->calls++;
 
                this_reap = reap_events(s);
@@ -298,7 +321,7 @@ submit:
                        prepped = 0;
                        continue;
                } else if (ret < 0) {
-                       if ((ret == -1 && errno == EAGAIN) || ret == -EAGAIN) {
+                       if (errno == EAGAIN) {
                                if (s->finish)
                                        break;
                                if (this_reap)
@@ -306,10 +329,7 @@ submit:
                                to_submit = 0;
                                goto submit;
                        }
-                       if (ret == -1)
-                               printf("io_submit: %s\n", strerror(errno));
-                       else
-                               printf("io_submit: %s\n", strerror(-ret));
+                       printf("io_submit: %s\n", strerror(errno));
                        break;
                }
        } while (!s->finish);
@@ -337,15 +357,74 @@ static void arm_sig_int(void)
        sigaction(SIGINT, &act, NULL);
 }
 
+static int setup_ring(struct submitter *s)
+{
+       struct aio_sq_ring *sring = &s->sq_ring;
+       struct aio_cq_ring *cring = &s->cq_ring;
+       struct aio_uring_params p;
+       void *ptr;
+       int fd;
+
+       memset(&p, 0, sizeof(p));
+
+       p.flags = IOCTX_FLAG_SCQRING;
+       if (polled)
+               p.flags |= IOCTX_FLAG_IOPOLL;
+       if (fixedbufs)
+               p.flags |= IOCTX_FLAG_FIXEDBUFS;
+       if (buffered)
+               p.flags |= IOCTX_FLAG_SQWQ;
+       else if (sq_thread) {
+               p.flags |= IOCTX_FLAG_SQTHREAD;
+               p.sq_thread_cpu = sq_thread_cpu;
+       }
+
+       if (fixedbufs)
+               fd = io_uring_setup(DEPTH, s->iovecs, &p);
+       else
+               fd = io_uring_setup(DEPTH, NULL, &p);
+       if (fd < 0) {
+               perror("io_uring_setup");
+               return 1;
+       }
+
+       s->ring_fd = fd;
+       ptr = mmap(0, p.sq_off.array + p.sq_entries * sizeof(u32),
+                       PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
+                       IORING_OFF_SQ_RING);
+       printf("sq_ring ptr = 0x%p\n", ptr);
+       sring->head = ptr + p.sq_off.head;
+       sring->tail = ptr + p.sq_off.tail;
+       sring->ring_mask = ptr + p.sq_off.ring_mask;
+       sring->ring_entries = ptr + p.sq_off.ring_entries;
+       sring->array = ptr + p.sq_off.array;
+       sq_ring_mask = *sring->ring_mask;
+
+       s->iocbs = mmap(0, p.sq_entries * sizeof(struct iocb),
+                       PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
+                       IORING_OFF_IOCB);
+       printf("iocbs ptr   = 0x%p\n", s->iocbs);
+
+       ptr = mmap(0, p.cq_off.events + p.cq_entries * sizeof(struct io_event),
+                       PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
+                       IORING_OFF_CQ_RING);
+       printf("cq_ring ptr = 0x%p\n", ptr);
+       cring->head = ptr + p.cq_off.head;
+       cring->tail = ptr + p.cq_off.tail;
+       cring->ring_mask = ptr + p.cq_off.ring_mask;
+       cring->ring_entries = ptr + p.cq_off.ring_entries;
+       cring->events = ptr + p.cq_off.events;
+       cq_ring_mask = *cring->ring_mask;
+       return 0;
+}
+
 int main(int argc, char *argv[])
 {
        struct submitter *s = &submitters[0];
        unsigned long done, calls, reap, cache_hit, cache_miss;
-       int flags = 0, err;
-       int j;
-       size_t size;
-       void *p, *ret;
+       int err, i;
        struct rlimit rlim;
+       void *ret;
 
        if (argc < 2) {
                printf("%s: filename\n", argv[0]);
@@ -361,58 +440,24 @@ int main(int argc, char *argv[])
 
        arm_sig_int();
 
-       size = sizeof(struct iocb) * RING_SIZE;
-       if (posix_memalign(&p, 4096, size))
-               return 1;
-       memset(p, 0, size);
-       s->iocbs = p;
-
-       size = sizeof(struct aio_sq_ring) + RING_SIZE * sizeof(u32);
-       if (posix_memalign(&p, 4096, size))
-               return 1;
-       s->sq_ring = p;
-       memset(p, 0, size);
-       s->sq_ring->nr_events = RING_SIZE;
-       s->sq_ring->iocbs = (u64) s->iocbs;
-
-       /* CQ ring must be twice as big */
-       size = sizeof(struct aio_cq_ring) +
-                       2 * RING_SIZE * sizeof(struct io_event);
-       if (posix_memalign(&p, 4096, size))
-               return 1;
-       s->cq_ring = p;
-       memset(p, 0, size);
-       s->cq_ring->nr_events = 2 * RING_SIZE;
+       for (i = 0; i < DEPTH; i++) {
+               void *buf;
 
-       for (j = 0; j < RING_SIZE; j++) {
-               struct iocb *iocb = &s->iocbs[j];
-
-               if (posix_memalign(&iocb->u.c.buf, BS, BS)) {
+               if (posix_memalign(&buf, BS, BS)) {
                        printf("failed alloc\n");
                        return 1;
                }
-               iocb->u.c.nbytes = BS;
-       }
-
-       flags = IOCTX_FLAG_SCQRING;
-       if (polled)
-               flags |= IOCTX_FLAG_IOPOLL;
-       if (fixedbufs)
-               flags |= IOCTX_FLAG_FIXEDBUFS;
-       if (buffered)
-               flags |= IOCTX_FLAG_SQWQ;
-       else if (sq_thread) {
-               flags |= IOCTX_FLAG_SQTHREAD;
-               s->sq_ring->sq_thread_cpu = sq_thread_cpu;
+               s->iovecs[i].iov_base = buf;
+               s->iovecs[i].iov_len = BS;
        }
 
-       err = io_setup2(RING_SIZE, flags, s->sq_ring, s->cq_ring, &s->ioc);
+       err = setup_ring(s);
        if (err) {
-               printf("ctx_init failed: %s, %d\n", strerror(errno), err);
+               printf("ring setup failed: %s, %d\n", strerror(errno), err);
                return 1;
        }
-       printf("polled=%d, fixedbufs=%d, buffered=%d\n", polled, fixedbufs, buffered);
-       printf("  QD=%d, sq_ring=%d, cq_ring=%d\n", DEPTH, s->sq_ring->nr_events, s->cq_ring->nr_events);
+       printf("polled=%d, fixedbufs=%d, buffered=%d", polled, fixedbufs, buffered);
+       printf(" QD=%d, sq_ring=%d, cq_ring=%d\n", DEPTH, *s->sq_ring.ring_entries, *s->cq_ring.ring_entries);
        strcpy(s->filename, argv[1]);
 
        pthread_create(&s->thread, NULL, submitter_fn, s);
@@ -445,9 +490,9 @@ int main(int argc, char *argv[])
                        rpc = (this_done - done) / (this_call - calls);
                        ipc = (this_reap - reap) / (this_call - calls);
                }
-               printf("IOPS=%lu, IOS/call=%lu/%lu, inflight=%u (head=%d tail=%d), Cachehit=%0.2f%%\n",
+               printf("IOPS=%lu, IOS/call=%lu/%lu, inflight=%u (head=%u tail=%u), Cachehit=%0.2f%%\n",
                                this_done - done, rpc, ipc, s->inflight,
-                               s->cq_ring->head, s->cq_ring->tail, hit);
+                               *s->cq_ring.head, *s->cq_ring.tail, hit);
                done = this_done;
                calls = this_call;
                reap = this_reap;