Merge branch 'dedupe_and_compression' of https://github.com/bardavid/fio
[fio.git] / t / io_uring.c
index 2ae65bc19bb1dc487bbe176274743f6eb87e0fe6..7bf215c7188a6269e8a910749c7300c64055660a 100644 (file)
@@ -28,6 +28,7 @@
 #include "../arch/arch.h"
 #include "../lib/types.h"
 #include "../lib/roundup.h"
+#include "../lib/rand.h"
 #include "../minmax.h"
 #include "../os/linux/io_uring.h"
 
@@ -59,6 +60,8 @@ static unsigned sq_ring_mask, cq_ring_mask;
 
 struct file {
        unsigned long max_blocks;
+       unsigned long max_size;
+       unsigned long cur_off;
        unsigned pending_ios;
        int real_fd;
        int fixed_fd;
@@ -86,6 +89,8 @@ struct submitter {
 
        __s32 *fds;
 
+       struct taus258_state rand_state;
+
        unsigned long *clock_batch;
        int clock_index;
        unsigned long *plat;
@@ -103,6 +108,7 @@ struct submitter {
 static struct submitter *submitter;
 static volatile int finish;
 static int stats_running;
+static unsigned long max_iops;
 
 static int depth = DEPTH;
 static int batch_submit = BATCH_SUBMIT;
@@ -110,6 +116,7 @@ static int batch_complete = BATCH_COMPLETE;
 static int bs = BS;
 static int polled = 1;         /* use IO polling */
 static int fixedbufs = 1;      /* use fixed user buffers */
+static int dma_map;            /* pre-map DMA buffers */
 static int register_files = 1; /* use fixed files */
 static int buffered = 0;       /* use buffered IO, not O_DIRECT */
 static int sq_thread_poll = 0; /* use kernel submission/poller thread */
@@ -118,6 +125,8 @@ static int do_nop = 0;              /* no-op SQ ring commands */
 static int nthreads = 1;
 static int stats = 0;          /* generate IO stats */
 static int aio = 0;            /* use libaio */
+static int runtime = 0;                /* runtime */
+static int random_io = 1;      /* random or sequential IO */
 
 static unsigned long tsc_rate;
 
@@ -129,6 +138,17 @@ static float plist[] = { 1.0, 5.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0,
                        80.0, 90.0, 95.0, 99.0, 99.5, 99.9, 99.95, 99.99 };
 static int plist_len = 17;
 
+#ifndef IORING_REGISTER_MAP_BUFFERS
+#define IORING_REGISTER_MAP_BUFFERS    20
+struct io_uring_map_buffers {
+       __s32   fd;
+       __u32   buf_start;
+       __u32   buf_end;
+       __u32   flags;
+       __u64   rsvd[2];
+};
+#endif
+
 static unsigned long cycles_to_nsec(unsigned long cycles)
 {
        uint64_t val;
@@ -309,13 +329,33 @@ static void add_stat(struct submitter *s, int clock_index, int nr)
        unsigned long cycles;
        unsigned int pidx;
 
-       cycles = get_cpu_clock();
-       cycles -= s->clock_batch[clock_index];
-       pidx = plat_val_to_idx(cycles);
-       s->plat[pidx] += nr;
+       if (!s->finish && clock_index) {
+               cycles = get_cpu_clock();
+               cycles -= s->clock_batch[clock_index];
+               pidx = plat_val_to_idx(cycles);
+               s->plat[pidx] += nr;
+       }
 #endif
 }
 
+static int io_uring_map_buffers(struct submitter *s)
+{
+       struct io_uring_map_buffers map = {
+               .fd             = s->files[0].real_fd,
+               .buf_end        = depth,
+       };
+
+       if (do_nop)
+               return 0;
+       if (s->nr_files > 1) {
+               fprintf(stderr, "Can't map buffers with multiple files\n");
+               return -1;
+       }
+
+       return syscall(__NR_io_uring_register, s->ring_fd,
+                       IORING_REGISTER_MAP_BUFFERS, &map, 1);
+}
+
 static int io_uring_register_buffers(struct submitter *s)
 {
        if (do_nop)
@@ -344,6 +384,13 @@ static int io_uring_register_files(struct submitter *s)
 
 static int io_uring_setup(unsigned entries, struct io_uring_params *p)
 {
+       /*
+        * Clamp CQ ring size at our SQ ring size, we don't need more entries
+        * than that.
+        */
+       p->flags |= IORING_SETUP_CQSIZE;
+       p->cq_entries = entries;
+
        return syscall(__NR_io_uring_setup, entries, p);
 }
 
@@ -414,8 +461,15 @@ static void init_io(struct submitter *s, unsigned index)
        }
        f->pending_ios++;
 
-       r = lrand48();
-       offset = (r % (f->max_blocks - 1)) * bs;
+       if (random_io) {
+               r = __rand64(&s->rand_state);
+               offset = (r % (f->max_blocks - 1)) * bs;
+       } else {
+               offset = f->cur_off;
+               f->cur_off += bs;
+               if (f->cur_off + bs > f->max_size)
+                       f->cur_off = 0;
+       }
 
        if (register_files) {
                sqe->flags = IOSQE_FIXED_FILE;
@@ -483,9 +537,11 @@ static int get_file_size(struct file *f)
                        return -1;
 
                f->max_blocks = bytes / bs;
+               f->max_size = bytes;
                return 0;
        } else if (S_ISREG(st.st_mode)) {
                f->max_blocks = st.st_size / bs;
+               f->max_size = st.st_size;
                return 0;
        }
 
@@ -528,8 +584,8 @@ static int reap_events_uring(struct submitter *s)
                                        stat_nr = 0;
                                }
                                last_idx = clock_index;
-                       } else if (clock_index)
-                               stat_nr++;
+                       }
+                       stat_nr++;
                }
                reaped++;
                head++;
@@ -550,8 +606,9 @@ static int submitter_init(struct submitter *s)
        int i, nr_batch;
 
        s->tid = gettid();
-       printf("submitter=%d\n", s->tid);
+       printf("submitter=%d, tid=%d\n", s->index, s->tid);
 
+       __init_rand64(&s->rand_state, pthread_self());
        srand48(pthread_self());
 
        for (i = 0; i < MAX_FDS; i++)
@@ -559,6 +616,8 @@ static int submitter_init(struct submitter *s)
 
        if (stats) {
                nr_batch = roundup_pow2(depth / batch_submit);
+               if (nr_batch < 2)
+                       nr_batch = 2;
                s->clock_batch = calloc(nr_batch, sizeof(unsigned long));
                s->clock_index = 1;
 
@@ -634,8 +693,8 @@ static int reap_events_aio(struct submitter *s, struct io_event *events, int evs
                                        stat_nr = 0;
                                }
                                last_idx = clock_index;
-                       } else if (clock_index)
-                               stat_nr++;
+                       }
+                       stat_nr++;
                }
                reaped++;
                evs--;
@@ -699,23 +758,21 @@ static void *submitter_aio_fn(void *data)
                }
                prepped = 0;
 
-               if (to_wait) {
+               while (to_wait) {
                        int r;
 
-                       do {
-                               s->calls++;
-                               r = io_getevents(s->aio_ctx, to_wait, to_wait, events, NULL);
-                               if (r < 0) {
-                                       perror("io_getevents");
-                                       break;
-                               } else if (r != to_wait) {
-                                       printf("r=%d, wait=%d\n", r, to_wait);
-                                       break;
-                               }
-                               r = reap_events_aio(s, events, r);
-                               s->reaps += r;
-                               to_wait -= r;
-                       } while (to_wait);
+                       s->calls++;
+                       r = io_getevents(s->aio_ctx, to_wait, to_wait, events, NULL);
+                       if (r < 0) {
+                               perror("io_getevents");
+                               break;
+                       } else if (r != to_wait) {
+                               printf("r=%d, wait=%d\n", r, to_wait);
+                               break;
+                       }
+                       r = reap_events_aio(s, events, r);
+                       s->reaps += r;
+                       to_wait -= r;
                }
        } while (!s->finish);
 
@@ -841,18 +898,26 @@ static struct submitter *get_submitter(int offset)
        return ret;
 }
 
-static void sig_int(int sig)
+static void do_finish(const char *reason)
 {
        int j;
-
-       printf("Exiting on signal %d\n", sig);
+       printf("Exiting on %s\n", reason);
        for (j = 0; j < nthreads; j++) {
                struct submitter *s = get_submitter(j);
                s->finish = 1;
        }
+       if (max_iops > 100000)
+               printf("Maximum IOPS=%luK\n", max_iops / 1000);
+       else if (max_iops)
+               printf("Maximum IOPS=%lu\n", max_iops);
        finish = 1;
 }
 
+static void sig_int(int sig)
+{
+       do_finish("signal");
+}
+
 static void arm_sig_int(void)
 {
        struct sigaction act;
@@ -938,6 +1003,14 @@ static int setup_ring(struct submitter *s)
                        perror("io_uring_register_buffers");
                        return 1;
                }
+
+               if (dma_map) {
+                       ret = io_uring_map_buffers(s);
+                       if (ret < 0) {
+                               perror("io_uring_map_buffers");
+                               return 1;
+                       }
+               }
        }
 
        if (register_files) {
@@ -951,7 +1024,6 @@ static int setup_ring(struct submitter *s)
        ptr = mmap(0, p.sq_off.array + p.sq_entries * sizeof(__u32),
                        PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
                        IORING_OFF_SQ_RING);
-       printf("sq_ring ptr = 0x%p\n", ptr);
        sring->head = ptr + p.sq_off.head;
        sring->tail = ptr + p.sq_off.tail;
        sring->ring_mask = ptr + p.sq_off.ring_mask;
@@ -963,12 +1035,10 @@ static int setup_ring(struct submitter *s)
        s->sqes = mmap(0, p.sq_entries * sizeof(struct io_uring_sqe),
                        PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
                        IORING_OFF_SQES);
-       printf("sqes ptr    = 0x%p\n", s->sqes);
 
        ptr = mmap(0, p.cq_off.cqes + p.cq_entries * sizeof(struct io_uring_cqe),
                        PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
                        IORING_OFF_CQ_RING);
-       printf("cq_ring ptr = 0x%p\n", ptr);
        cring->head = ptr + p.cq_off.head;
        cring->tail = ptr + p.cq_off.tail;
        cring->ring_mask = ptr + p.cq_off.ring_mask;
@@ -1003,6 +1073,8 @@ static void file_depths(char *buf)
 
 static void usage(char *argv, int status)
 {
+       char runtime_str[16];
+       snprintf(runtime_str, sizeof(runtime_str), "%d", runtime);
        printf("%s [options] -- [filenames]\n"
                " -d <int>  : IO Depth, default %d\n"
                " -s <int>  : Batch submit, default %d\n"
@@ -1010,15 +1082,19 @@ static void usage(char *argv, int status)
                " -b <int>  : Block size, default %d\n"
                " -p <bool> : Polled IO, default %d\n"
                " -B <bool> : Fixed buffers, default %d\n"
+               " -D <bool> : DMA map fixed buffers, default %d\n"
                " -F <bool> : Register files, default %d\n"
                " -n <int>  : Number of threads, default %d\n"
                " -O <bool> : Use O_DIRECT, default %d\n"
                " -N <bool> : Perform just no-op requests, default %d\n"
                " -t <bool> : Track IO latencies, default %d\n"
                " -T <int>  : TSC rate in HZ\n"
+               " -r <int>  : Runtime in seconds, default %s\n"
+               " -R <bool> : Use random IO, default %d\n"
                " -a <bool> : Use legacy aio, default %d\n",
                argv, DEPTH, BATCH_SUBMIT, BATCH_COMPLETE, BS, polled,
-               fixedbufs, register_files, nthreads, !buffered, do_nop, stats, aio);
+               fixedbufs, dma_map, register_files, nthreads, !buffered, do_nop,
+               stats, runtime == 0 ? "unlimited" : runtime_str, aio, random_io);
        exit(status);
 }
 
@@ -1078,7 +1154,7 @@ int main(int argc, char *argv[])
        if (!do_nop && argc < 2)
                usage(argv[0], 1);
 
-       while ((opt = getopt(argc, argv, "d:s:c:b:p:B:F:n:N:O:t:T:a:h?")) != -1) {
+       while ((opt = getopt(argc, argv, "d:s:c:b:p:B:F:n:N:O:t:T:a:r:D:R:h?")) != -1) {
                switch (opt) {
                case 'a':
                        aio = !!atoi(optarg);
@@ -1136,6 +1212,15 @@ int main(int argc, char *argv[])
                        tsc_rate = strtoul(optarg, NULL, 10);
                        write_tsc_rate();
                        break;
+               case 'r':
+                       runtime = atoi(optarg);
+                       break;
+               case 'D':
+                       dma_map = !!atoi(optarg);
+                       break;
+               case 'R':
+                       random_io = !!atoi(optarg);
+                       break;
                case 'h':
                case '?':
                default:
@@ -1151,6 +1236,8 @@ int main(int argc, char *argv[])
                batch_complete = depth;
        if (batch_submit > depth)
                batch_submit = depth;
+       if (!fixedbufs && dma_map)
+               dma_map = 0;
 
        submitter = calloc(nthreads, sizeof(*submitter) +
                                depth * sizeof(struct iovec));
@@ -1250,13 +1337,11 @@ int main(int argc, char *argv[])
                }
        }
        s = get_submitter(0);
-       printf("polled=%d, fixedbufs=%d, register_files=%d, buffered=%d, QD=%d\n", polled, fixedbufs, register_files, buffered, depth);
+       printf("polled=%d, fixedbufs=%d/%d, register_files=%d, buffered=%d, QD=%d\n", polled, fixedbufs, dma_map, register_files, buffered, depth);
        if (!aio)
                printf("Engine=io_uring, sq_ring=%d, cq_ring=%d\n", *s->sq_ring.ring_entries, *s->cq_ring.ring_entries);
-#ifdef CONFIG_LIBAIO
        else
-               printf("Engine=aio, ctx=%p\n", &s->aio_ctx);
-#endif
+               printf("Engine=aio\n");
 
        for (j = 0; j < nthreads; j++) {
                s = get_submitter(j);
@@ -1278,6 +1363,8 @@ int main(int argc, char *argv[])
                unsigned long iops, bw;
 
                sleep(1);
+               if (runtime && !--runtime)
+                       do_finish("timeout");
 
                /* don't print partial run, if interrupted by signal */
                if (finish)
@@ -1288,6 +1375,7 @@ int main(int argc, char *argv[])
                        stats_running = 1;
 
                for (j = 0; j < nthreads; j++) {
+                       s = get_submitter(j);
                        this_done += s->done;
                        this_call += s->calls;
                        this_reap += s->reaps;
@@ -1303,7 +1391,11 @@ int main(int argc, char *argv[])
                        bw = iops * (bs / 1048576);
                else
                        bw = iops / (1048576 / bs);
-               printf("IOPS=%lu, ", iops);
+               if (iops > 100000)
+                       printf("IOPS=%luK, ", iops / 1000);
+               else
+                       printf("IOPS=%lu, ", iops);
+               max_iops = max(max_iops, iops);
                if (!do_nop)
                        printf("BW=%luMiB/s, ", bw);
                printf("IOS/call=%ld/%ld, inflight=(%s)\n", rpc, ipc, fdepths);