t/aio-ring: update to newer mmap() API
authorJens Axboe <axboe@kernel.dk>
Fri, 4 Jan 2019 20:27:46 +0000 (13:27 -0700)
committerJens Axboe <axboe@kernel.dk>
Fri, 4 Jan 2019 20:31:22 +0000 (13:31 -0700)
Signed-off-by: Jens Axboe <axboe@kernel.dk>
t/aio-ring.c

index c0c5009e6ca63b68d7c7a5f59a03fdc8ed2c120d..7d8de3528c49149ebab3e5a8aa8eb7fce97d43f3 100644 (file)
@@ -14,6 +14,7 @@
 #include <sys/ioctl.h>
 #include <sys/syscall.h>
 #include <sys/resource.h>
 #include <sys/ioctl.h>
 #include <sys/syscall.h>
 #include <sys/resource.h>
+#include <sys/mman.h>
 #include <linux/fs.h>
 #include <fcntl.h>
 #include <unistd.h>
 #include <linux/fs.h>
 #include <fcntl.h>
 #include <unistd.h>
 #include <pthread.h>
 #include <sched.h>
 
 #include <pthread.h>
 #include <sched.h>
 
-#define IOCTX_FLAG_IOPOLL      (1 << 0)
-#define IOCTX_FLAG_SCQRING     (1 << 1)        /* Use SQ/CQ rings */
+#define IOCTX_FLAG_SCQRING     (1 << 0)        /* Use SQ/CQ rings */
+#define IOCTX_FLAG_IOPOLL      (1 << 1)
 #define IOCTX_FLAG_FIXEDBUFS   (1 << 2)
 #define IOCTX_FLAG_SQTHREAD    (1 << 3)        /* Use SQ thread */
 #define IOCTX_FLAG_SQWQ                (1 << 4)        /* Use SQ wq */
 #define IOCTX_FLAG_FIXEDBUFS   (1 << 2)
 #define IOCTX_FLAG_SQTHREAD    (1 << 3)        /* Use SQ thread */
 #define IOCTX_FLAG_SQWQ                (1 << 4)        /* Use SQ wq */
+#define IOCTX_FLAG_SQPOLL      (1 << 5)
 
 #define IOEV_RES2_CACHEHIT     (1 << 0)
 
 
 #define IOEV_RES2_CACHEHIT     (1 << 0)
 
@@ -38,33 +40,46 @@ typedef uint64_t u64;
 typedef uint32_t u32;
 typedef uint16_t u16;
 
 typedef uint32_t u32;
 typedef uint16_t u16;
 
+#define IORING_OFF_SQ_RING     0ULL
+#define IORING_OFF_CQ_RING     0x8000000ULL
+#define IORING_OFF_IOCB                0x10000000ULL
+
+struct aio_uring_offsets {
+       u32 head;
+       u32 tail;
+       u32 ring_mask;
+       u32 ring_entries;
+       u32 flags;
+       u32 elems;
+};
+
+struct aio_uring_params {
+       u32 sq_entries;
+       u32 cq_entries;
+       u32 flags;
+       u16 sq_thread_cpu;
+       u16 resv[9];
+       struct aio_uring_offsets sq_off;
+       struct aio_uring_offsets cq_off;
+};
+
 struct aio_sq_ring {
 struct aio_sq_ring {
-       union {
-               struct {
-                       u32 head;
-                       u32 tail;
-                       u32 nr_events;
-                       u16 sq_thread_cpu;
-                       u64 iocbs;
-               };
-               u32 pad[16];
-       };
-       u32 array[0];
+       u32 *head;
+       u32 *tail;
+       u32 *ring_mask;
+       u32 *ring_entries;
+       u32 *array;
 };
 
 struct aio_cq_ring {
 };
 
 struct aio_cq_ring {
-       union {
-               struct {
-                       u32 head;
-                       u32 tail;
-                       u32 nr_events;
-               };
-               struct io_event pad;
-       };
-       struct io_event events[0];
+       u32 *head;
+       u32 *tail;
+       u32 *ring_mask;
+       u32 *ring_entries;
+       struct io_event *events;
 };
 
 };
 
-#define IORING_FLAG_GETEVENTS  (1 << 0)
+#define IORING_ENTER_GETEVENTS (1 << 0)
 
 #define DEPTH                  32
 
 
 #define DEPTH                  32
 
@@ -73,17 +88,17 @@ struct aio_cq_ring {
 
 #define BS                     4096
 
 
 #define BS                     4096
 
-static unsigned sq_ring_mask = DEPTH - 1;
-static unsigned cq_ring_mask = (2 * DEPTH) - 1;
+static unsigned sq_ring_mask, cq_ring_mask;
 
 struct submitter {
        pthread_t thread;
        unsigned long max_blocks;
 
 struct submitter {
        pthread_t thread;
        unsigned long max_blocks;
-       io_context_t ioc;
+       int fd;
        struct drand48_data rand;
        struct drand48_data rand;
-       struct aio_sq_ring *sq_ring;
+       struct aio_sq_ring sq_ring;
        struct iocb *iocbs;
        struct iocb *iocbs;
-       struct aio_cq_ring *cq_ring;
+       struct iovec iovecs[DEPTH];
+       struct aio_cq_ring cq_ring;
        int inflight;
        unsigned long reaps;
        unsigned long done;
        int inflight;
        unsigned long reaps;
        unsigned long done;
@@ -96,23 +111,22 @@ struct submitter {
 static struct submitter submitters[1];
 static volatile int finish;
 
 static struct submitter submitters[1];
 static volatile int finish;
 
-static int polled = 1;         /* use IO polling */
-static int fixedbufs = 1;      /* use fixed user buffers */
-static int buffered = 0;       /* use buffered IO, not O_DIRECT */
+static int polled = 0;         /* use IO polling */
+static int fixedbufs = 0;      /* use fixed user buffers */
+static int buffered = 1;       /* use buffered IO, not O_DIRECT */
 static int sq_thread = 0;      /* use kernel submission thread */
 static int sq_thread_cpu = 0;  /* pin above thread to this CPU */
 
 static int sq_thread = 0;      /* use kernel submission thread */
 static int sq_thread_cpu = 0;  /* pin above thread to this CPU */
 
-static int io_setup2(unsigned int nr_events, unsigned int flags,
-                    struct aio_sq_ring *sq_ring, struct aio_cq_ring *cq_ring,
-                    io_context_t *ctx_idp)
+static int io_uring_setup(unsigned entries, struct iovec *iovecs,
+                         struct aio_uring_params *p)
 {
 {
-       return syscall(335, nr_events, flags, sq_ring, cq_ring, ctx_idp);
+       return syscall(335, entries, iovecs, p);
 }
 
 }
 
-static int io_ring_enter(io_context_t ctx, unsigned int to_submit,
-                        unsigned int min_complete, unsigned int flags)
+static int io_uring_enter(struct submitter *s, unsigned int to_submit,
+                         unsigned int min_complete, unsigned int flags)
 {
 {
-       return syscall(336, ctx, to_submit, min_complete, flags);
+       return syscall(336, s->fd, to_submit, min_complete, flags);
 }
 
 static int gettid(void)
 }
 
 static int gettid(void)
@@ -120,8 +134,9 @@ static int gettid(void)
        return syscall(__NR_gettid);
 }
 
        return syscall(__NR_gettid);
 }
 
-static void init_io(struct submitter *s, int fd, struct iocb *iocb)
+static void init_io(struct submitter *s, int fd, unsigned index)
 {
 {
+       struct iocb *iocb = &s->iocbs[index];
        unsigned long offset;
        long r;
 
        unsigned long offset;
        long r;
 
@@ -130,34 +145,34 @@ static void init_io(struct submitter *s, int fd, struct iocb *iocb)
 
        iocb->aio_fildes = fd;
        iocb->aio_lio_opcode = IO_CMD_PREAD;
 
        iocb->aio_fildes = fd;
        iocb->aio_lio_opcode = IO_CMD_PREAD;
+       iocb->u.c.buf = s->iovecs[index].iov_base;
+       iocb->u.c.nbytes = BS;
        iocb->u.c.offset = offset;
        iocb->u.c.offset = offset;
-       if (!fixedbufs)
-               iocb->u.c.nbytes = BS;
 }
 
 static int prep_more_ios(struct submitter *s, int fd, int max_ios)
 {
 }
 
 static int prep_more_ios(struct submitter *s, int fd, int max_ios)
 {
-       struct aio_sq_ring *ring = s->sq_ring;
+       struct aio_sq_ring *ring = &s->sq_ring;
        u32 index, tail, next_tail, prepped = 0;
 
        u32 index, tail, next_tail, prepped = 0;
 
-       next_tail = tail = ring->tail;
+       next_tail = tail = *ring->tail;
        do {
                next_tail++;
                barrier();
        do {
                next_tail++;
                barrier();
-               if (next_tail == ring->head)
+               if (next_tail == *ring->head)
                        break;
 
                index = tail & sq_ring_mask;
                        break;
 
                index = tail & sq_ring_mask;
-               init_io(s, fd, &s->iocbs[index]);
-               s->sq_ring->array[index] = index;
+               init_io(s, fd, index);
+               ring->array[index] = index;
                prepped++;
                tail = next_tail;
        } while (prepped < max_ios);
 
                prepped++;
                tail = next_tail;
        } while (prepped < max_ios);
 
-       if (ring->tail != tail) {
+       if (*ring->tail != tail) {
                /* order tail store with writes to iocbs above */
                barrier();
                /* order tail store with writes to iocbs above */
                barrier();
-               ring->tail = tail;
+               *ring->tail = tail;
                barrier();
        }
        return prepped;
                barrier();
        }
        return prepped;
@@ -187,14 +202,14 @@ static int get_file_size(int fd, unsigned long *blocks)
 
 static int reap_events(struct submitter *s)
 {
 
 static int reap_events(struct submitter *s)
 {
-       struct aio_cq_ring *ring = s->cq_ring;
+       struct aio_cq_ring *ring = &s->cq_ring;
        struct io_event *ev;
        u32 head, reaped = 0;
 
        struct io_event *ev;
        u32 head, reaped = 0;
 
-       head = ring->head;
+       head = *ring->head;
        do {
                barrier();
        do {
                barrier();
-               if (head == ring->tail)
+               if (head == *ring->tail)
                        break;
                ev = &ring->events[head & cq_ring_mask];
                if (ev->res != BS) {
                        break;
                ev = &ring->events[head & cq_ring_mask];
                if (ev->res != BS) {
@@ -213,7 +228,7 @@ static int reap_events(struct submitter *s)
        } while (1);
 
        s->inflight -= reaped;
        } while (1);
 
        s->inflight -= reaped;
-       ring->head = head;
+       *ring->head = head;
        barrier();
        return reaped;
 }
        barrier();
        return reaped;
 }
@@ -262,8 +277,7 @@ submit:
                else
                        to_wait = min(s->inflight + to_submit, BATCH_COMPLETE);
 
                else
                        to_wait = min(s->inflight + to_submit, BATCH_COMPLETE);
 
-               ret = io_ring_enter(s->ioc, to_submit, to_wait,
-                                       IORING_FLAG_GETEVENTS);
+               ret = io_uring_enter(s, to_submit, to_wait, IORING_ENTER_GETEVENTS);
                s->calls++;
 
                this_reap = reap_events(s);
                s->calls++;
 
                this_reap = reap_events(s);
@@ -327,15 +341,74 @@ static void arm_sig_int(void)
        sigaction(SIGINT, &act, NULL);
 }
 
        sigaction(SIGINT, &act, NULL);
 }
 
+static int setup_ring(struct submitter *s)
+{
+       struct aio_sq_ring *sring = &s->sq_ring;
+       struct aio_cq_ring *cring = &s->cq_ring;
+       struct aio_uring_params p;
+       void *ptr;
+       int fd;
+
+       memset(&p, 0, sizeof(p));
+
+       p.flags = IOCTX_FLAG_SCQRING;
+       if (polled)
+               p.flags |= IOCTX_FLAG_IOPOLL;
+       if (fixedbufs)
+               p.flags |= IOCTX_FLAG_FIXEDBUFS;
+       if (buffered)
+               p.flags |= IOCTX_FLAG_SQWQ;
+       else if (sq_thread) {
+               p.flags |= IOCTX_FLAG_SQTHREAD;
+               p.sq_thread_cpu = sq_thread_cpu;
+       }
+
+       if (fixedbufs)
+               fd = io_uring_setup(DEPTH, s->iovecs, &p);
+       else
+               fd = io_uring_setup(DEPTH, NULL, &p);
+       if (fd < 0) {
+               perror("io_uring_setup");
+               return 1;
+       }
+
+       s->fd = fd;
+
+       ptr = mmap(0, p.sq_off.elems + p.sq_entries * sizeof(u32),
+                       PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
+                       fd, IORING_OFF_SQ_RING);
+       printf("sq_ring ptr = 0x%p\n", ptr);
+       sring->head = ptr + p.sq_off.head;
+       sring->tail = ptr + p.sq_off.tail;
+       sring->ring_mask = ptr + p.sq_off.ring_mask;
+       sring->ring_entries = ptr + p.sq_off.ring_entries;
+       sring->array = ptr + p.sq_off.elems;
+       sq_ring_mask = *sring->ring_mask;
+
+       s->iocbs = mmap(0, p.sq_entries * sizeof(struct iocb), PROT_READ | PROT_WRITE,
+                       MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_IOCB);
+       printf("iocbs ptr   = 0x%p\n", s->iocbs);
+
+       ptr = mmap(0, p.cq_off.elems + p.cq_entries * sizeof(struct io_event),
+                       PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
+                       fd, IORING_OFF_CQ_RING);
+       printf("cq_ring ptr = 0x%p\n", ptr);
+       cring->head = ptr + p.cq_off.head;
+       cring->tail = ptr + p.cq_off.tail;
+       cring->ring_mask = ptr + p.cq_off.ring_mask;
+       cring->ring_entries = ptr + p.cq_off.ring_entries;
+       cring->events = ptr + p.cq_off.elems;
+       cq_ring_mask = *cring->ring_mask;
+       return 0;
+}
+
 int main(int argc, char *argv[])
 {
        struct submitter *s = &submitters[0];
        unsigned long done, calls, reap, cache_hit, cache_miss;
 int main(int argc, char *argv[])
 {
        struct submitter *s = &submitters[0];
        unsigned long done, calls, reap, cache_hit, cache_miss;
-       int flags = 0, err;
-       int j;
-       size_t size;
-       void *p, *ret;
+       int err, i;
        struct rlimit rlim;
        struct rlimit rlim;
+       void *ret;
 
        if (argc < 2) {
                printf("%s: filename\n", argv[0]);
 
        if (argc < 2) {
                printf("%s: filename\n", argv[0]);
@@ -351,58 +424,24 @@ int main(int argc, char *argv[])
 
        arm_sig_int();
 
 
        arm_sig_int();
 
-       size = sizeof(struct iocb) * DEPTH;
-       if (posix_memalign(&p, 4096, size))
-               return 1;
-       memset(p, 0, size);
-       s->iocbs = p;
-
-       size = sizeof(struct aio_sq_ring) + DEPTH * sizeof(u32);
-       if (posix_memalign(&p, 4096, size))
-               return 1;
-       s->sq_ring = p;
-       memset(p, 0, size);
-       s->sq_ring->nr_events = DEPTH;
-       s->sq_ring->iocbs = (u64) s->iocbs;
-
-       /* CQ ring must be twice as big */
-       size = sizeof(struct aio_cq_ring) +
-                       2 * DEPTH * sizeof(struct io_event);
-       if (posix_memalign(&p, 4096, size))
-               return 1;
-       s->cq_ring = p;
-       memset(p, 0, size);
-       s->cq_ring->nr_events = 2 * DEPTH;
-
-       for (j = 0; j < DEPTH; j++) {
-               struct iocb *iocb = &s->iocbs[j];
+       for (i = 0; i < DEPTH; i++) {
+               void *buf;
 
 
-               if (posix_memalign(&iocb->u.c.buf, BS, BS)) {
+               if (posix_memalign(&buf, BS, BS)) {
                        printf("failed alloc\n");
                        return 1;
                }
                        printf("failed alloc\n");
                        return 1;
                }
-               iocb->u.c.nbytes = BS;
-       }
-
-       flags = IOCTX_FLAG_SCQRING;
-       if (polled)
-               flags |= IOCTX_FLAG_IOPOLL;
-       if (fixedbufs)
-               flags |= IOCTX_FLAG_FIXEDBUFS;
-       if (buffered)
-               flags |= IOCTX_FLAG_SQWQ;
-       else if (sq_thread) {
-               flags |= IOCTX_FLAG_SQTHREAD;
-               s->sq_ring->sq_thread_cpu = sq_thread_cpu;
+               s->iovecs[i].iov_base = buf;
+               s->iovecs[i].iov_len = BS;
        }
 
        }
 
-       err = io_setup2(DEPTH, flags, s->sq_ring, s->cq_ring, &s->ioc);
+       err = setup_ring(s);
        if (err) {
        if (err) {
-               printf("ctx_init failed: %s, %d\n", strerror(errno), err);
+               printf("ring setup failed: %s, %d\n", strerror(errno), err);
                return 1;
        }
                return 1;
        }
-       printf("polled=%d, fixedbufs=%d, buffered=%d\n", polled, fixedbufs, buffered);
-       printf("  QD=%d, sq_ring=%d, cq_ring=%d\n", DEPTH, s->sq_ring->nr_events, s->cq_ring->nr_events);
+       printf("polled=%d, fixedbufs=%d, buffered=%d", polled, fixedbufs, buffered);
+       printf(" QD=%d, sq_ring=%d, cq_ring=%d\n", DEPTH, *s->sq_ring.ring_entries, *s->cq_ring.ring_entries);
        strcpy(s->filename, argv[1]);
 
        pthread_create(&s->thread, NULL, submitter_fn, s);
        strcpy(s->filename, argv[1]);
 
        pthread_create(&s->thread, NULL, submitter_fn, s);
@@ -437,7 +476,7 @@ int main(int argc, char *argv[])
                }
                printf("IOPS=%lu, IOS/call=%lu/%lu, inflight=%u (head=%u tail=%u), Cachehit=%0.2f%%\n",
                                this_done - done, rpc, ipc, s->inflight,
                }
                printf("IOPS=%lu, IOS/call=%lu/%lu, inflight=%u (head=%u tail=%u), Cachehit=%0.2f%%\n",
                                this_done - done, rpc, ipc, s->inflight,
-                               s->cq_ring->head, s->cq_ring->tail, hit);
+                               *s->cq_ring.head, *s->cq_ring.tail, hit);
                done = this_done;
                calls = this_call;
                reap = this_reap;
                done = this_done;
                calls = this_call;
                reap = this_reap;