#include "../arch/arch.h"
#include "../lib/types.h"
#include "../lib/roundup.h"
+#include "../lib/rand.h"
#include "../minmax.h"
#include "../os/linux/io_uring.h"
struct file {
unsigned long max_blocks;
+ unsigned long max_size;
+ unsigned long cur_off;
unsigned pending_ios;
int real_fd;
int fixed_fd;
__s32 *fds;
+ struct taus258_state rand_state;
+
unsigned long *clock_batch;
int clock_index;
unsigned long *plat;
static struct submitter *submitter;
static volatile int finish;
+static int stats_running;
+static unsigned long max_iops;
static int depth = DEPTH;
static int batch_submit = BATCH_SUBMIT;
static int bs = BS;
static int polled = 1; /* use IO polling */
static int fixedbufs = 1; /* use fixed user buffers */
+static int dma_map; /* pre-map DMA buffers */
static int register_files = 1; /* use fixed files */
static int buffered = 0; /* use buffered IO, not O_DIRECT */
static int sq_thread_poll = 0; /* use kernel submission/poller thread */
static int nthreads = 1;
static int stats = 0; /* generate IO stats */
static int aio = 0; /* use libaio */
+static int runtime = 0; /* runtime */
+static int random_io = 1; /* random or sequential IO */
static unsigned long tsc_rate;
80.0, 90.0, 95.0, 99.0, 99.5, 99.9, 99.95, 99.99 };
static int plist_len = 17;
+#ifndef IORING_REGISTER_MAP_BUFFERS
+#define IORING_REGISTER_MAP_BUFFERS 20
+struct io_uring_map_buffers {
+ __s32 fd;
+ __u32 buf_start;
+ __u32 buf_end;
+ __u32 flags;
+ __u64 rsvd[2];
+};
+#endif
+
static unsigned long cycles_to_nsec(unsigned long cycles)
{
uint64_t val;
unsigned long cycles;
unsigned int pidx;
- cycles = get_cpu_clock();
- cycles -= s->clock_batch[clock_index];
- pidx = plat_val_to_idx(cycles);
- s->plat[pidx] += nr;
+ if (!s->finish && clock_index) {
+ cycles = get_cpu_clock();
+ cycles -= s->clock_batch[clock_index];
+ pidx = plat_val_to_idx(cycles);
+ s->plat[pidx] += nr;
+ }
#endif
}
+static int io_uring_map_buffers(struct submitter *s)
+{
+ struct io_uring_map_buffers map = {
+ .fd = s->files[0].real_fd,
+ .buf_end = depth,
+ };
+
+ if (do_nop)
+ return 0;
+ if (s->nr_files > 1) {
+ fprintf(stderr, "Can't map buffers with multiple files\n");
+ return -1;
+ }
+
+ return syscall(__NR_io_uring_register, s->ring_fd,
+ IORING_REGISTER_MAP_BUFFERS, &map, 1);
+}
+
static int io_uring_register_buffers(struct submitter *s)
{
if (do_nop)
static int io_uring_setup(unsigned entries, struct io_uring_params *p)
{
+ /*
+ * Clamp CQ ring size at our SQ ring size, we don't need more entries
+ * than that.
+ */
+ p->flags |= IORING_SETUP_CQSIZE;
+ p->cq_entries = entries;
+
return syscall(__NR_io_uring_setup, entries, p);
}
}
f->pending_ios++;
- r = lrand48();
- offset = (r % (f->max_blocks - 1)) * bs;
+ if (random_io) {
+ r = __rand64(&s->rand_state);
+ offset = (r % (f->max_blocks - 1)) * bs;
+ } else {
+ offset = f->cur_off;
+ f->cur_off += bs;
+ if (f->cur_off + bs > f->max_size)
+ f->cur_off = 0;
+ }
if (register_files) {
sqe->flags = IOSQE_FIXED_FILE;
sqe->ioprio = 0;
sqe->off = offset;
sqe->user_data = (unsigned long) f->fileno;
- if (stats)
+ if (stats && stats_running)
sqe->user_data |= ((unsigned long)s->clock_index << 32);
}
return -1;
f->max_blocks = bytes / bs;
+ f->max_size = bytes;
return 0;
} else if (S_ISREG(st.st_mode)) {
f->max_blocks = st.st_size / bs;
+ f->max_size = st.st_size;
return 0;
}
int i, nr_batch;
s->tid = gettid();
- printf("submitter=%d\n", s->tid);
+ printf("submitter=%d, tid=%d\n", s->index, s->tid);
+ __init_rand64(&s->rand_state, pthread_self());
srand48(pthread_self());
for (i = 0; i < MAX_FDS; i++)
if (stats) {
nr_batch = roundup_pow2(depth / batch_submit);
+ if (nr_batch < 2)
+ nr_batch = 2;
s->clock_batch = calloc(nr_batch, sizeof(unsigned long));
- s->clock_index = 0;
+ s->clock_index = 1;
s->plat = calloc(PLAT_NR, sizeof(unsigned long));
} else {
s->iovecs[index].iov_len, offset);
data = f->fileno;
- if (stats)
+ if (stats && stats_running)
data |= ((unsigned long) s->clock_index << 32);
iocb->data = (void *) (uintptr_t) data;
index++;
}
prepped = 0;
- if (to_wait) {
+ while (to_wait) {
int r;
- do {
- s->calls++;
- r = io_getevents(s->aio_ctx, to_wait, to_wait, events, NULL);
- if (r < 0) {
- perror("io_getevents");
- break;
- } else if (r != to_wait) {
- printf("r=%d, wait=%d\n", r, to_wait);
- break;
- }
- r = reap_events_aio(s, events, r);
- s->reaps += r;
- to_wait -= r;
- } while (to_wait);
+ s->calls++;
+ r = io_getevents(s->aio_ctx, to_wait, to_wait, events, NULL);
+ if (r < 0) {
+ perror("io_getevents");
+ break;
+ } else if (r != to_wait) {
+ printf("r=%d, wait=%d\n", r, to_wait);
+ break;
+ }
+ r = reap_events_aio(s, events, r);
+ s->reaps += r;
+ to_wait -= r;
}
} while (!s->finish);
return ret;
}
-static void sig_int(int sig)
+static void do_finish(const char *reason)
{
int j;
-
- printf("Exiting on signal %d\n", sig);
+ printf("Exiting on %s\n", reason);
for (j = 0; j < nthreads; j++) {
struct submitter *s = get_submitter(j);
s->finish = 1;
}
+ if (max_iops > 100000)
+ printf("Maximum IOPS=%luK\n", max_iops / 1000);
+ else if (max_iops)
+ printf("Maximum IOPS=%lu\n", max_iops);
finish = 1;
}
+static void sig_int(int sig)
+{
+ do_finish("signal");
+}
+
static void arm_sig_int(void)
{
struct sigaction act;
perror("io_uring_register_buffers");
return 1;
}
+
+ if (dma_map) {
+ ret = io_uring_map_buffers(s);
+ if (ret < 0) {
+ perror("io_uring_map_buffers");
+ return 1;
+ }
+ }
}
if (register_files) {
ptr = mmap(0, p.sq_off.array + p.sq_entries * sizeof(__u32),
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
IORING_OFF_SQ_RING);
- printf("sq_ring ptr = 0x%p\n", ptr);
sring->head = ptr + p.sq_off.head;
sring->tail = ptr + p.sq_off.tail;
sring->ring_mask = ptr + p.sq_off.ring_mask;
s->sqes = mmap(0, p.sq_entries * sizeof(struct io_uring_sqe),
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
IORING_OFF_SQES);
- printf("sqes ptr = 0x%p\n", s->sqes);
ptr = mmap(0, p.cq_off.cqes + p.cq_entries * sizeof(struct io_uring_cqe),
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
IORING_OFF_CQ_RING);
- printf("cq_ring ptr = 0x%p\n", ptr);
cring->head = ptr + p.cq_off.head;
cring->tail = ptr + p.cq_off.tail;
cring->ring_mask = ptr + p.cq_off.ring_mask;
static void usage(char *argv, int status)
{
+ char runtime_str[16];
+ snprintf(runtime_str, sizeof(runtime_str), "%d", runtime);
printf("%s [options] -- [filenames]\n"
" -d <int> : IO Depth, default %d\n"
" -s <int> : Batch submit, default %d\n"
" -b <int> : Block size, default %d\n"
" -p <bool> : Polled IO, default %d\n"
" -B <bool> : Fixed buffers, default %d\n"
+ " -D <bool> : DMA map fixed buffers, default %d\n"
" -F <bool> : Register files, default %d\n"
" -n <int> : Number of threads, default %d\n"
" -O <bool> : Use O_DIRECT, default %d\n"
" -N <bool> : Perform just no-op requests, default %d\n"
" -t <bool> : Track IO latencies, default %d\n"
" -T <int> : TSC rate in HZ\n"
+ " -r <int> : Runtime in seconds, default %s\n"
+ " -R <bool> : Use random IO, default %d\n"
" -a <bool> : Use legacy aio, default %d\n",
argv, DEPTH, BATCH_SUBMIT, BATCH_COMPLETE, BS, polled,
- fixedbufs, register_files, nthreads, !buffered, do_nop, stats, aio);
+ fixedbufs, dma_map, register_files, nthreads, !buffered, do_nop,
+ stats, runtime == 0 ? "unlimited" : runtime_str, aio, random_io);
exit(status);
}
if (!do_nop && argc < 2)
usage(argv[0], 1);
- while ((opt = getopt(argc, argv, "d:s:c:b:p:B:F:n:N:O:t:T:a:h?")) != -1) {
+ while ((opt = getopt(argc, argv, "d:s:c:b:p:B:F:n:N:O:t:T:a:r:D:R:h?")) != -1) {
switch (opt) {
case 'a':
aio = !!atoi(optarg);
tsc_rate = strtoul(optarg, NULL, 10);
write_tsc_rate();
break;
+ case 'r':
+ runtime = atoi(optarg);
+ break;
+ case 'D':
+ dma_map = !!atoi(optarg);
+ break;
+ case 'R':
+ random_io = !!atoi(optarg);
+ break;
case 'h':
case '?':
default:
batch_complete = depth;
if (batch_submit > depth)
batch_submit = depth;
+ if (!fixedbufs && dma_map)
+ dma_map = 0;
submitter = calloc(nthreads, sizeof(*submitter) +
depth * sizeof(struct iovec));
}
}
s = get_submitter(0);
- printf("polled=%d, fixedbufs=%d, register_files=%d, buffered=%d, QD=%d\n", polled, fixedbufs, register_files, buffered, depth);
+ printf("polled=%d, fixedbufs=%d/%d, register_files=%d, buffered=%d, QD=%d\n", polled, fixedbufs, dma_map, register_files, buffered, depth);
if (!aio)
printf("Engine=io_uring, sq_ring=%d, cq_ring=%d\n", *s->sq_ring.ring_entries, *s->cq_ring.ring_entries);
-#ifdef CONFIG_LIBAIO
else
- printf("Engine=aio, ctx=%p\n", &s->aio_ctx);
-#endif
+ printf("Engine=aio\n");
for (j = 0; j < nthreads; j++) {
s = get_submitter(j);
unsigned long iops, bw;
sleep(1);
+ if (runtime && !--runtime)
+ do_finish("timeout");
+
+ /* don't print partial run, if interrupted by signal */
+ if (finish)
+ break;
+
+ /* one second in to the run, enable stats */
+ if (stats)
+ stats_running = 1;
+
for (j = 0; j < nthreads; j++) {
+ s = get_submitter(j);
this_done += s->done;
this_call += s->calls;
this_reap += s->reaps;
bw = iops * (bs / 1048576);
else
bw = iops / (1048576 / bs);
- printf("IOPS=%lu, ", iops);
+ if (iops > 100000)
+ printf("IOPS=%luK, ", iops / 1000);
+ else
+ printf("IOPS=%lu, ", iops);
+ max_iops = max(max_iops, iops);
if (!do_nop)
printf("BW=%luMiB/s, ", bw);
printf("IOS/call=%ld/%ld, inflight=(%s)\n", rpc, ipc, fdepths);