Update to newer io_uring API
authorJens Axboe <axboe@kernel.dk>
Thu, 10 Jan 2019 16:39:14 +0000 (09:39 -0700)
committerJens Axboe <axboe@kernel.dk>
Thu, 10 Jan 2019 16:39:14 +0000 (09:39 -0700)
Signed-off-by: Jens Axboe <axboe@kernel.dk>
engines/io_uring.c
os/io_uring.h
t/io_uring.c

index 06355e9c18263b896f053a75b13d342d249ec3a1..475ead79cb8613e1b0c9ef6d823394338ccf134b 100644 (file)
@@ -166,6 +166,7 @@ static int fio_ioring_prep(struct thread_data *td, struct io_u *io_u)
                                sqe->opcode = IORING_OP_WRITE_FIXED;
                        sqe->addr = io_u->xfer_buf;
                        sqe->len = io_u->xfer_buflen;
                                sqe->opcode = IORING_OP_WRITE_FIXED;
                        sqe->addr = io_u->xfer_buf;
                        sqe->len = io_u->xfer_buflen;
+                       sqe->index = io_u->index;
                } else {
                        if (io_u->ddir == DDIR_READ)
                                sqe->opcode = IORING_OP_READV;
                } else {
                        if (io_u->ddir == DDIR_READ)
                                sqe->opcode = IORING_OP_READV;
@@ -178,6 +179,7 @@ static int fio_ioring_prep(struct thread_data *td, struct io_u *io_u)
        } else if (ddir_sync(io_u->ddir))
                sqe->opcode = IORING_OP_FSYNC;
 
        } else if (ddir_sync(io_u->ddir))
                sqe->opcode = IORING_OP_FSYNC;
 
+       sqe->data = (unsigned long) io_u;
        return 0;
 }
 
        return 0;
 }
 
@@ -191,7 +193,7 @@ static struct io_u *fio_ioring_event(struct thread_data *td, int event)
        index = (event + ld->cq_ring_off) & ld->cq_ring_mask;
 
        cqe = &ld->cq_ring.cqes[index];
        index = (event + ld->cq_ring_off) & ld->cq_ring_mask;
 
        cqe = &ld->cq_ring.cqes[index];
-       io_u = ld->io_u_index[cqe->index];
+       io_u = (struct io_u *) cqe->data;
 
        if (cqe->res != io_u->xfer_buflen) {
                if (cqe->res > io_u->xfer_buflen)
 
        if (cqe->res != io_u->xfer_buflen) {
                if (cqe->res > io_u->xfer_buflen)
index 20e4c22e040d6023bef78e64cafca8694c629733..b07bbbb3dd656dc4fbd2f14bc867b11eca0d5b49 100644 (file)
  * IO submission data structure (Submission Queue Entry)
  */
 struct io_uring_sqe {
  * IO submission data structure (Submission Queue Entry)
  */
 struct io_uring_sqe {
-       __u8    opcode;
-       __u8    flags;
-       __u16   ioprio;
-       __s32   fd;
-       __u64   off;
+       __u8    opcode;         /* type of operation for this sqe */
+       __u8    flags;          /* as of now unused */
+       __u16   ioprio;         /* ioprio for the request */
+       __s32   fd;             /* file descriptor to do IO on */
+       __u64   off;            /* offset into file */
        union {
        union {
-               void    *addr;
+               void    *addr;  /* buffer or iovecs */
                __u64   __pad;
        };
                __u64   __pad;
        };
-       __u32   len;
+       __u32   len;            /* buffer size or number of iovecs */
        union {
                __kernel_rwf_t  rw_flags;
                __u32           __resv;
        };
        union {
                __kernel_rwf_t  rw_flags;
                __u32           __resv;
        };
+       __u16   index;          /* index into fixed buffers, if used */
+       __u16   __pad2[3];
+       __u64   data;           /* data to be passed back at completion time */
 };
 
 /*
 };
 
 /*
@@ -50,7 +53,7 @@ struct io_uring_sqe {
  * IO completion data structure (Completion Queue Entry)
  */
 struct io_uring_cqe {
  * IO completion data structure (Completion Queue Entry)
  */
 struct io_uring_cqe {
-       __u64   index;          /* what sqe this event came from */
+       __u64   data;           /* sqe->data submission passed back */
        __s32   res;            /* result code for this event */
        __u32   flags;
 };
        __s32   res;            /* result code for this event */
        __u32   flags;
 };
index 3edc87c663bddc625ed67422bd7896f63bc49308..92227c748accc2aee19a7e3f6d19e60a3e22b928 100644 (file)
@@ -51,11 +51,17 @@ struct io_cq_ring {
 
 #define BS                     4096
 
 
 #define BS                     4096
 
+#define MAX_FDS                        16
+
 static unsigned sq_ring_mask, cq_ring_mask;
 
 static unsigned sq_ring_mask, cq_ring_mask;
 
+struct file {
+       unsigned long max_blocks;
+       int fd;
+};
+
 struct submitter {
        pthread_t thread;
 struct submitter {
        pthread_t thread;
-       unsigned long max_blocks;
        int ring_fd;
        struct drand48_data rand;
        struct io_sq_ring sq_ring;
        int ring_fd;
        struct drand48_data rand;
        struct io_sq_ring sq_ring;
@@ -68,7 +74,9 @@ struct submitter {
        unsigned long calls;
        unsigned long cachehit, cachemiss;
        volatile int finish;
        unsigned long calls;
        unsigned long cachehit, cachemiss;
        volatile int finish;
-       char filename[128];
+       struct file files[MAX_FDS];
+       unsigned nr_files;
+       unsigned cur_file;
 };
 
 static struct submitter submitters[1];
 };
 
 static struct submitter submitters[1];
@@ -98,19 +106,26 @@ static int gettid(void)
        return syscall(__NR_gettid);
 }
 
        return syscall(__NR_gettid);
 }
 
-static void init_io(struct submitter *s, int fd, unsigned index)
+static void init_io(struct submitter *s, unsigned index)
 {
        struct io_uring_sqe *sqe = &s->sqes[index];
        unsigned long offset;
 {
        struct io_uring_sqe *sqe = &s->sqes[index];
        unsigned long offset;
+       struct file *f;
        long r;
 
        long r;
 
+       f = &s->files[s->cur_file];
+       s->cur_file++;
+       if (s->cur_file == s->nr_files)
+               s->cur_file = 0;
+
        lrand48_r(&s->rand, &r);
        lrand48_r(&s->rand, &r);
-       offset = (r % (s->max_blocks - 1)) * BS;
+       offset = (r % (f->max_blocks - 1)) * BS;
 
        if (fixedbufs) {
                sqe->opcode = IORING_OP_READ_FIXED;
                sqe->addr = s->iovecs[index].iov_base;
                sqe->len = BS;
 
        if (fixedbufs) {
                sqe->opcode = IORING_OP_READ_FIXED;
                sqe->addr = s->iovecs[index].iov_base;
                sqe->len = BS;
+               sqe->index = index;
        } else {
                sqe->opcode = IORING_OP_READV;
                sqe->addr = &s->iovecs[index];
        } else {
                sqe->opcode = IORING_OP_READV;
                sqe->addr = &s->iovecs[index];
@@ -118,11 +133,11 @@ static void init_io(struct submitter *s, int fd, unsigned index)
        }
        sqe->flags = 0;
        sqe->ioprio = 0;
        }
        sqe->flags = 0;
        sqe->ioprio = 0;
-       sqe->fd = fd;
+       sqe->fd = f->fd;
        sqe->off = offset;
 }
 
        sqe->off = offset;
 }
 
-static int prep_more_ios(struct submitter *s, int fd, int max_ios)
+static int prep_more_ios(struct submitter *s, int max_ios)
 {
        struct io_sq_ring *ring = &s->sq_ring;
        unsigned index, tail, next_tail, prepped = 0;
 {
        struct io_sq_ring *ring = &s->sq_ring;
        unsigned index, tail, next_tail, prepped = 0;
@@ -135,7 +150,7 @@ static int prep_more_ios(struct submitter *s, int fd, int max_ios)
                        break;
 
                index = tail & sq_ring_mask;
                        break;
 
                index = tail & sq_ring_mask;
-               init_io(s, fd, index);
+               init_io(s, index);
                ring->array[index] = index;
                prepped++;
                tail = next_tail;
                ring->array[index] = index;
                prepped++;
                tail = next_tail;
@@ -150,22 +165,22 @@ static int prep_more_ios(struct submitter *s, int fd, int max_ios)
        return prepped;
 }
 
        return prepped;
 }
 
-static int get_file_size(int fd, unsigned long *blocks)
+static int get_file_size(struct file *f)
 {
        struct stat st;
 
 {
        struct stat st;
 
-       if (fstat(fd, &st) < 0)
+       if (fstat(f->fd, &st) < 0)
                return -1;
        if (S_ISBLK(st.st_mode)) {
                unsigned long long bytes;
 
                return -1;
        if (S_ISBLK(st.st_mode)) {
                unsigned long long bytes;
 
-               if (ioctl(fd, BLKGETSIZE64, &bytes) != 0)
+               if (ioctl(f->fd, BLKGETSIZE64, &bytes) != 0)
                        return -1;
 
                        return -1;
 
-               *blocks = bytes / BS;
+               f->max_blocks = bytes / BS;
                return 0;
        } else if (S_ISREG(st.st_mode)) {
                return 0;
        } else if (S_ISREG(st.st_mode)) {
-               *blocks = st.st_size / BS;
+               f->max_blocks = st.st_size / BS;
                return 0;
        }
 
                return 0;
        }
 
@@ -185,12 +200,7 @@ static int reap_events(struct submitter *s)
                        break;
                cqe = &ring->cqes[head & cq_ring_mask];
                if (cqe->res != BS) {
                        break;
                cqe = &ring->cqes[head & cq_ring_mask];
                if (cqe->res != BS) {
-                       struct io_uring_sqe *sqe = &s->sqes[cqe->index];
-
                        printf("io: unexpected ret=%d\n", cqe->res);
                        printf("io: unexpected ret=%d\n", cqe->res);
-                       printf("offset=%lu, size=%lu\n",
-                                       (unsigned long) sqe->off,
-                                       (unsigned long) sqe->len);
                        return -1;
                }
                if (cqe->flags & IOCQE_FLAG_CACHEHIT)
                        return -1;
                }
                if (cqe->flags & IOCQE_FLAG_CACHEHIT)
@@ -210,29 +220,10 @@ static int reap_events(struct submitter *s)
 static void *submitter_fn(void *data)
 {
        struct submitter *s = data;
 static void *submitter_fn(void *data)
 {
        struct submitter *s = data;
-       int fd, ret, prepped, flags;
+       int ret, prepped;
 
        printf("submitter=%d\n", gettid());
 
 
        printf("submitter=%d\n", gettid());
 
-       flags = O_RDONLY;
-       if (!buffered)
-               flags |= O_DIRECT;
-       fd = open(s->filename, flags);
-       if (fd < 0) {
-               perror("open");
-               goto done;
-       }
-
-       if (get_file_size(fd, &s->max_blocks)) {
-               printf("failed getting size of device/file\n");
-               goto err;
-       }
-       if (s->max_blocks <= 1) {
-               printf("Zero file/device size?\n");
-               goto err;
-       }
-       s->max_blocks--;
-
        srand48_r(pthread_self(), &s->rand);
 
        prepped = 0;
        srand48_r(pthread_self(), &s->rand);
 
        prepped = 0;
@@ -241,7 +232,7 @@ static void *submitter_fn(void *data)
 
                if (!prepped && s->inflight < DEPTH) {
                        to_prep = min(DEPTH - s->inflight, BATCH_SUBMIT);
 
                if (!prepped && s->inflight < DEPTH) {
                        to_prep = min(DEPTH - s->inflight, BATCH_SUBMIT);
-                       prepped = prep_more_ios(s, fd, to_prep);
+                       prepped = prep_more_ios(s, to_prep);
                }
                s->inflight += prepped;
 submit_more:
                }
                s->inflight += prepped;
 submit_more:
@@ -290,9 +281,7 @@ submit:
                        break;
                }
        } while (!s->finish);
                        break;
                }
        } while (!s->finish);
-err:
-       close(fd);
-done:
+
        finish = 1;
        return NULL;
 }
        finish = 1;
        return NULL;
 }
@@ -376,7 +365,7 @@ int main(int argc, char *argv[])
 {
        struct submitter *s = &submitters[0];
        unsigned long done, calls, reap, cache_hit, cache_miss;
 {
        struct submitter *s = &submitters[0];
        unsigned long done, calls, reap, cache_hit, cache_miss;
-       int err, i;
+       int err, i, flags, fd;
        struct rlimit rlim;
        void *ret;
 
        struct rlimit rlim;
        void *ret;
 
@@ -385,6 +374,35 @@ int main(int argc, char *argv[])
                return 1;
        }
 
                return 1;
        }
 
+       flags = O_RDONLY;
+       if (!buffered)
+               flags |= O_DIRECT;
+
+       i = 1;
+       while (i < argc) {
+               struct file *f = &s->files[s->nr_files];
+
+               fd = open(argv[i], flags);
+               if (fd < 0) {
+                       perror("open");
+                       return 1;
+               }
+               f->fd = fd;
+               if (get_file_size(f)) {
+                       printf("failed getting size of device/file\n");
+                       return 1;
+               }
+               if (f->max_blocks <= 1) {
+                       printf("Zero file/device size?\n");
+                       return 1;
+               }
+               f->max_blocks--;
+
+               printf("Added file %s\n", argv[i]);
+               s->nr_files++;
+               i++;
+       }
+
        rlim.rlim_cur = RLIM_INFINITY;
        rlim.rlim_max = RLIM_INFINITY;
        if (setrlimit(RLIMIT_MEMLOCK, &rlim) < 0) {
        rlim.rlim_cur = RLIM_INFINITY;
        rlim.rlim_max = RLIM_INFINITY;
        if (setrlimit(RLIMIT_MEMLOCK, &rlim) < 0) {
@@ -412,7 +430,6 @@ int main(int argc, char *argv[])
        }
        printf("polled=%d, fixedbufs=%d, buffered=%d", polled, fixedbufs, buffered);
        printf(" QD=%d, sq_ring=%d, cq_ring=%d\n", DEPTH, *s->sq_ring.ring_entries, *s->cq_ring.ring_entries);
        }
        printf("polled=%d, fixedbufs=%d, buffered=%d", polled, fixedbufs, buffered);
        printf(" QD=%d, sq_ring=%d, cq_ring=%d\n", DEPTH, *s->sq_ring.ring_entries, *s->cq_ring.ring_entries);
-       strcpy(s->filename, argv[1]);
 
        pthread_create(&s->thread, NULL, submitter_fn, s);
 
 
        pthread_create(&s->thread, NULL, submitter_fn, s);