#include <sys/ioctl.h>
#include <sys/syscall.h>
#include <sys/resource.h>
+#include <sys/mman.h>
#include <linux/fs.h>
#include <fcntl.h>
#include <unistd.h>
#include <pthread.h>
#include <sched.h>
-#define IOCTX_FLAG_IOPOLL (1 << 0)
-#define IOCTX_FLAG_SCQRING (1 << 1) /* Use SQ/CQ rings */
+#define IOCTX_FLAG_SCQRING (1 << 0) /* Use SQ/CQ rings */
+#define IOCTX_FLAG_IOPOLL (1 << 1)
#define IOCTX_FLAG_FIXEDBUFS (1 << 2)
#define IOCTX_FLAG_SQTHREAD (1 << 3) /* Use SQ thread */
#define IOCTX_FLAG_SQWQ (1 << 4) /* Use SQ wq */
+#define IOCTX_FLAG_SQPOLL (1 << 5)
#define IOEV_RES2_CACHEHIT (1 << 0)
typedef uint32_t u32;
typedef uint16_t u16;
+#define IORING_OFF_SQ_RING 0ULL
+#define IORING_OFF_CQ_RING 0x8000000ULL
+#define IORING_OFF_IOCB 0x10000000ULL
+
+struct aio_uring_offsets {
+ u32 head;
+ u32 tail;
+ u32 ring_mask;
+ u32 ring_entries;
+ u32 flags;
+ u32 elems;
+};
+
+struct aio_uring_params {
+ u32 sq_entries;
+ u32 cq_entries;
+ u32 flags;
+ u16 sq_thread_cpu;
+ u16 resv[9];
+ struct aio_uring_offsets sq_off;
+ struct aio_uring_offsets cq_off;
+};
+
struct aio_sq_ring {
- union {
- struct {
- u32 head;
- u32 tail;
- u32 nr_events;
- u16 sq_thread_cpu;
- u64 iocbs;
- };
- u32 pad[16];
- };
- u32 array[0];
+ u32 *head;
+ u32 *tail;
+ u32 *ring_mask;
+ u32 *ring_entries;
+ u32 *array;
};
struct aio_cq_ring {
- union {
- struct {
- u32 head;
- u32 tail;
- u32 nr_events;
- };
- struct io_event pad;
- };
- struct io_event events[0];
+ u32 *head;
+ u32 *tail;
+ u32 *ring_mask;
+ u32 *ring_entries;
+ struct io_event *events;
};
-#define IORING_FLAG_GETEVENTS (1 << 0)
+#define IORING_ENTER_GETEVENTS (1 << 0)
#define DEPTH 32
#define BS 4096
-static unsigned sq_ring_mask = DEPTH - 1;
-static unsigned cq_ring_mask = (2 * DEPTH) - 1;
+static unsigned sq_ring_mask, cq_ring_mask;
struct submitter {
pthread_t thread;
unsigned long max_blocks;
- io_context_t ioc;
+ int fd;
struct drand48_data rand;
- struct aio_sq_ring *sq_ring;
+ struct aio_sq_ring sq_ring;
struct iocb *iocbs;
- struct aio_cq_ring *cq_ring;
+ struct iovec iovecs[DEPTH];
+ struct aio_cq_ring cq_ring;
int inflight;
unsigned long reaps;
unsigned long done;
static struct submitter submitters[1];
static volatile int finish;
-static int polled = 1; /* use IO polling */
-static int fixedbufs = 1; /* use fixed user buffers */
-static int buffered = 0; /* use buffered IO, not O_DIRECT */
+static int polled = 0; /* use IO polling */
+static int fixedbufs = 0; /* use fixed user buffers */
+static int buffered = 1; /* use buffered IO, not O_DIRECT */
static int sq_thread = 0; /* use kernel submission thread */
static int sq_thread_cpu = 0; /* pin above thread to this CPU */
-static int io_setup2(unsigned int nr_events, unsigned int flags,
- struct aio_sq_ring *sq_ring, struct aio_cq_ring *cq_ring,
- io_context_t *ctx_idp)
+static int io_uring_setup(unsigned entries, struct iovec *iovecs,
+ struct aio_uring_params *p)
{
- return syscall(335, nr_events, flags, sq_ring, cq_ring, ctx_idp);
+ return syscall(335, entries, iovecs, p);
}
-static int io_ring_enter(io_context_t ctx, unsigned int to_submit,
- unsigned int min_complete, unsigned int flags)
+static int io_uring_enter(struct submitter *s, unsigned int to_submit,
+ unsigned int min_complete, unsigned int flags)
{
- return syscall(336, ctx, to_submit, min_complete, flags);
+ return syscall(336, s->fd, to_submit, min_complete, flags);
}
static int gettid(void)
return syscall(__NR_gettid);
}
-static void init_io(struct submitter *s, int fd, struct iocb *iocb)
+static void init_io(struct submitter *s, int fd, unsigned index)
{
+ struct iocb *iocb = &s->iocbs[index];
unsigned long offset;
long r;
iocb->aio_fildes = fd;
iocb->aio_lio_opcode = IO_CMD_PREAD;
+ iocb->u.c.buf = s->iovecs[index].iov_base;
+ iocb->u.c.nbytes = BS;
iocb->u.c.offset = offset;
- if (!fixedbufs)
- iocb->u.c.nbytes = BS;
}
static int prep_more_ios(struct submitter *s, int fd, int max_ios)
{
- struct aio_sq_ring *ring = s->sq_ring;
+ struct aio_sq_ring *ring = &s->sq_ring;
u32 index, tail, next_tail, prepped = 0;
- next_tail = tail = ring->tail;
+ next_tail = tail = *ring->tail;
do {
next_tail++;
barrier();
- if (next_tail == ring->head)
+ if (next_tail == *ring->head)
break;
index = tail & sq_ring_mask;
- init_io(s, fd, &s->iocbs[index]);
- s->sq_ring->array[index] = index;
+ init_io(s, fd, index);
+ ring->array[index] = index;
prepped++;
tail = next_tail;
} while (prepped < max_ios);
- if (ring->tail != tail) {
+ if (*ring->tail != tail) {
/* order tail store with writes to iocbs above */
barrier();
- ring->tail = tail;
+ *ring->tail = tail;
barrier();
}
return prepped;
static int reap_events(struct submitter *s)
{
- struct aio_cq_ring *ring = s->cq_ring;
+ struct aio_cq_ring *ring = &s->cq_ring;
struct io_event *ev;
u32 head, reaped = 0;
- head = ring->head;
+ head = *ring->head;
do {
barrier();
- if (head == ring->tail)
+ if (head == *ring->tail)
break;
ev = &ring->events[head & cq_ring_mask];
if (ev->res != BS) {
} while (1);
s->inflight -= reaped;
- ring->head = head;
+ *ring->head = head;
barrier();
return reaped;
}
else
to_wait = min(s->inflight + to_submit, BATCH_COMPLETE);
- ret = io_ring_enter(s->ioc, to_submit, to_wait,
- IORING_FLAG_GETEVENTS);
+ ret = io_uring_enter(s, to_submit, to_wait, IORING_ENTER_GETEVENTS);
s->calls++;
this_reap = reap_events(s);
sigaction(SIGINT, &act, NULL);
}
+static int setup_ring(struct submitter *s)
+{
+ struct aio_sq_ring *sring = &s->sq_ring;
+ struct aio_cq_ring *cring = &s->cq_ring;
+ struct aio_uring_params p;
+ void *ptr;
+ int fd;
+
+ memset(&p, 0, sizeof(p));
+
+ p.flags = IOCTX_FLAG_SCQRING;
+ if (polled)
+ p.flags |= IOCTX_FLAG_IOPOLL;
+ if (fixedbufs)
+ p.flags |= IOCTX_FLAG_FIXEDBUFS;
+ if (buffered)
+ p.flags |= IOCTX_FLAG_SQWQ;
+ else if (sq_thread) {
+ p.flags |= IOCTX_FLAG_SQTHREAD;
+ p.sq_thread_cpu = sq_thread_cpu;
+ }
+
+ if (fixedbufs)
+ fd = io_uring_setup(DEPTH, s->iovecs, &p);
+ else
+ fd = io_uring_setup(DEPTH, NULL, &p);
+ if (fd < 0) {
+ perror("io_uring_setup");
+ return 1;
+ }
+
+ s->fd = fd;
+
+ ptr = mmap(0, p.sq_off.elems + p.sq_entries * sizeof(u32),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
+ fd, IORING_OFF_SQ_RING);
+ printf("sq_ring ptr = 0x%p\n", ptr);
+ sring->head = ptr + p.sq_off.head;
+ sring->tail = ptr + p.sq_off.tail;
+ sring->ring_mask = ptr + p.sq_off.ring_mask;
+ sring->ring_entries = ptr + p.sq_off.ring_entries;
+ sring->array = ptr + p.sq_off.elems;
+ sq_ring_mask = *sring->ring_mask;
+
+ s->iocbs = mmap(0, p.sq_entries * sizeof(struct iocb), PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_IOCB);
+ printf("iocbs ptr = 0x%p\n", s->iocbs);
+
+ ptr = mmap(0, p.cq_off.elems + p.cq_entries * sizeof(struct io_event),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
+ fd, IORING_OFF_CQ_RING);
+ printf("cq_ring ptr = 0x%p\n", ptr);
+ cring->head = ptr + p.cq_off.head;
+ cring->tail = ptr + p.cq_off.tail;
+ cring->ring_mask = ptr + p.cq_off.ring_mask;
+ cring->ring_entries = ptr + p.cq_off.ring_entries;
+ cring->events = ptr + p.cq_off.elems;
+ cq_ring_mask = *cring->ring_mask;
+ return 0;
+}
+
int main(int argc, char *argv[])
{
struct submitter *s = &submitters[0];
unsigned long done, calls, reap, cache_hit, cache_miss;
- int flags = 0, err;
- int j;
- size_t size;
- void *p, *ret;
+ int err, i;
struct rlimit rlim;
+ void *ret;
if (argc < 2) {
printf("%s: filename\n", argv[0]);
arm_sig_int();
- size = sizeof(struct iocb) * DEPTH;
- if (posix_memalign(&p, 4096, size))
- return 1;
- memset(p, 0, size);
- s->iocbs = p;
-
- size = sizeof(struct aio_sq_ring) + DEPTH * sizeof(u32);
- if (posix_memalign(&p, 4096, size))
- return 1;
- s->sq_ring = p;
- memset(p, 0, size);
- s->sq_ring->nr_events = DEPTH;
- s->sq_ring->iocbs = (u64) s->iocbs;
-
- /* CQ ring must be twice as big */
- size = sizeof(struct aio_cq_ring) +
- 2 * DEPTH * sizeof(struct io_event);
- if (posix_memalign(&p, 4096, size))
- return 1;
- s->cq_ring = p;
- memset(p, 0, size);
- s->cq_ring->nr_events = 2 * DEPTH;
-
- for (j = 0; j < DEPTH; j++) {
- struct iocb *iocb = &s->iocbs[j];
+ for (i = 0; i < DEPTH; i++) {
+ void *buf;
- if (posix_memalign(&iocb->u.c.buf, BS, BS)) {
+ if (posix_memalign(&buf, BS, BS)) {
printf("failed alloc\n");
return 1;
}
- iocb->u.c.nbytes = BS;
- }
-
- flags = IOCTX_FLAG_SCQRING;
- if (polled)
- flags |= IOCTX_FLAG_IOPOLL;
- if (fixedbufs)
- flags |= IOCTX_FLAG_FIXEDBUFS;
- if (buffered)
- flags |= IOCTX_FLAG_SQWQ;
- else if (sq_thread) {
- flags |= IOCTX_FLAG_SQTHREAD;
- s->sq_ring->sq_thread_cpu = sq_thread_cpu;
+ s->iovecs[i].iov_base = buf;
+ s->iovecs[i].iov_len = BS;
}
- err = io_setup2(DEPTH, flags, s->sq_ring, s->cq_ring, &s->ioc);
+ err = setup_ring(s);
if (err) {
- printf("ctx_init failed: %s, %d\n", strerror(errno), err);
+ printf("ring setup failed: %s, %d\n", strerror(errno), err);
return 1;
}
- printf("polled=%d, fixedbufs=%d, buffered=%d\n", polled, fixedbufs, buffered);
- printf(" QD=%d, sq_ring=%d, cq_ring=%d\n", DEPTH, s->sq_ring->nr_events, s->cq_ring->nr_events);
+ printf("polled=%d, fixedbufs=%d, buffered=%d", polled, fixedbufs, buffered);
+ printf(" QD=%d, sq_ring=%d, cq_ring=%d\n", DEPTH, *s->sq_ring.ring_entries, *s->cq_ring.ring_entries);
strcpy(s->filename, argv[1]);
pthread_create(&s->thread, NULL, submitter_fn, s);
}
printf("IOPS=%lu, IOS/call=%lu/%lu, inflight=%u (head=%u tail=%u), Cachehit=%0.2f%%\n",
this_done - done, rpc, ipc, s->inflight,
- s->cq_ring->head, s->cq_ring->tail, hit);
+ *s->cq_ring.head, *s->cq_ring.tail, hit);
done = this_done;
calls = this_call;
reap = this_reap;