Add binject IO engine
authorJens Axboe <jaxboe@fusionio.com>
Tue, 7 Sep 2010 11:28:58 +0000 (13:28 +0200)
committerJens Axboe <jaxboe@fusionio.com>
Tue, 7 Sep 2010 11:28:58 +0000 (13:28 +0200)
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Makefile
engines/binject.c [new file with mode: 0644]
ioengine.h
options.c
os/binject.h [new file with mode: 0644]
os/os-linux.h

index 9fec137d91f41391d8f23fffa17fc25957a45f26..13baee837bf4d0ff06b07038f4c88fded5f5ab2a 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -34,6 +34,7 @@ OBJS += engines/null.o
 OBJS += engines/net.o
 OBJS += engines/syslet-rw.o
 OBJS += engines/guasi.o
+OBJS += engines/binject.o
 
 OBJS += profiles/tiobench.o
 
diff --git a/engines/binject.c b/engines/binject.c
new file mode 100644 (file)
index 0000000..3f663bd
--- /dev/null
@@ -0,0 +1,295 @@
+/*
+ * binject engine
+ *
+ * IO engine that uses the Linux binject interface to directly inject
+ * bio's to block devices.
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <assert.h>
+#include <string.h>
+#include <sys/poll.h>
+
+#include "../fio.h"
+
+#ifdef FIO_HAVE_BINJECT
+
+struct binject_data {
+       struct b_user_cmd *cmds;
+       struct io_u **events;
+       struct pollfd *pfds;
+       int *fd_flags;
+       unsigned int bs;
+};
+
+static void binject_buc_init(struct binject_data *bd, struct io_u *io_u)
+{
+       struct b_user_cmd *buc = &io_u->buc;
+
+       memset(buc, 0, sizeof(*buc));
+       binject_buc_set_magic(buc);
+
+       buc->buf = (unsigned long) io_u->xfer_buf;
+       buc->len = io_u->xfer_buflen;
+       buc->offset = io_u->offset;
+       buc->usr_ptr = (unsigned long) io_u;
+
+       buc->flags = B_FLAG_NOIDLE | B_FLAG_UNPLUG;
+       assert(buc->buf);
+}
+
+static int pollin_events(struct pollfd *pfds, int fds)
+{
+       int i;
+
+       for (i = 0; i < fds; i++)
+               if (pfds[i].revents & POLLIN)
+                       return 1;
+
+       return 0;
+}
+
+static int fio_binject_getevents(struct thread_data *td, unsigned int min,
+                             unsigned int max, struct timespec fio_unused *t)
+{
+       struct binject_data *bd = td->io_ops->data;
+       int left = max, ret, r = 0, ev_index = 0;
+       void *buf = bd->cmds;
+       unsigned int i, events;
+       struct fio_file *f;
+
+       /*
+        * Fill in the file descriptors
+        */
+       for_each_file(td, f, i) {
+               /*
+                * don't block for min events == 0
+                */
+               if (!min) {
+                       bd->fd_flags[i] = fcntl(f->fd, F_GETFL);
+                       fcntl(f->fd, F_SETFL, bd->fd_flags[i] | O_NONBLOCK);
+               }
+               bd->pfds[i].fd = f->fd;
+               bd->pfds[i].events = POLLIN;
+       }
+
+       while (left) {
+               void *p;
+
+               do {
+                       if (!min)
+                               break;
+
+                       ret = poll(bd->pfds, td->o.nr_files, -1);
+                       if (ret < 0) {
+                               if (!r)
+                                       r = -errno;
+                               td_verror(td, errno, "poll");
+                               break;
+                       } else if (!ret)
+                               continue;
+
+                       if (pollin_events(bd->pfds, td->o.nr_files))
+                               break;
+               } while (1);
+
+               if (r < 0)
+                       break;
+
+re_read:
+               p = buf;
+               events = 0;
+               for_each_file(td, f, i) {
+                       ret = read(f->fd, p, left * sizeof(struct b_user_cmd));
+                       if (ret < 0) {
+                               if (errno == EAGAIN)
+                                       continue;
+                               r = -errno;
+                               td_verror(td, errno, "read");
+                               break;
+                       } else if (ret) {
+                               p += ret;
+                               events += ret / sizeof(struct b_user_cmd);
+                       }
+               }
+
+               if (r < 0)
+                       break;
+               if (!events) {
+                       usleep(1000);
+                       goto re_read;
+               }
+
+               left -= events;
+               r += events;
+
+               for (i = 0; i < events; i++) {
+                       struct b_user_cmd *buc = (struct b_user_cmd *) buf + i;
+
+                       bd->events[ev_index] = (struct io_u *) buc->usr_ptr;
+                       ev_index++;
+               }
+       }
+
+       if (!min) {
+               for_each_file(td, f, i)
+                       fcntl(f->fd, F_SETFL, bd->fd_flags[i]);
+       }
+
+       if (r > 0)
+               assert(ev_index == r);
+
+       return r;
+}
+
+static int fio_binject_doio(struct thread_data *td, struct io_u *io_u)
+{
+       struct b_user_cmd *buc = &io_u->buc;
+       struct fio_file *f = io_u->file;
+       int ret;
+
+       ret = write(f->fd, buc, sizeof(*buc));
+       if (ret < 0)
+               return ret;
+
+       return FIO_Q_QUEUED;
+}
+
+static int fio_binject_prep(struct thread_data *td, struct io_u *io_u)
+{
+       struct binject_data *bd = td->io_ops->data;
+       struct b_user_cmd *buc = &io_u->buc;
+
+       bd->bs = 512;
+
+       if (io_u->xfer_buflen & (bd->bs - 1)) {
+               log_err("read/write not sector aligned\n");
+               return EINVAL;
+       }
+
+       if (io_u->ddir == DDIR_READ) {
+               binject_buc_init(bd, io_u);
+               buc->type = B_TYPE_READ;
+       } else if (io_u->ddir == DDIR_WRITE) {
+               binject_buc_init(bd, io_u);
+               buc->type = B_TYPE_WRITEZERO;
+       } else if (io_u->ddir == DDIR_TRIM) {
+               binject_buc_init(bd, io_u);
+               buc->type = B_TYPE_DISCARD;
+       } else {
+               assert(0);
+       }
+
+       return 0;
+}
+
+static int fio_binject_queue(struct thread_data *td, struct io_u *io_u)
+{
+       int ret;
+
+       fio_ro_check(td, io_u);
+
+       ret = fio_binject_doio(td, io_u);
+
+       if (ret < 0)
+               io_u->error = errno;
+
+       if (io_u->error) {
+               td_verror(td, io_u->error, "xfer");
+               return FIO_Q_COMPLETED;
+       }
+
+       return ret;
+}
+
+static struct io_u *fio_binject_event(struct thread_data *td, int event)
+{
+       struct binject_data *bd = td->io_ops->data;
+
+       return bd->events[event];
+}
+
+static void fio_binject_cleanup(struct thread_data *td)
+{
+       struct binject_data *bd = td->io_ops->data;
+
+       if (bd) {
+               free(bd->events);
+               free(bd->cmds);
+               free(bd->fd_flags);
+               free(bd->pfds);
+               free(bd);
+       }
+}
+
+static int fio_binject_init(struct thread_data *td)
+{
+       struct binject_data *bd;
+
+       bd = malloc(sizeof(*bd));
+       memset(bd, 0, sizeof(*bd));
+
+       bd->cmds = malloc(td->o.iodepth * sizeof(struct b_user_cmd));
+       memset(bd->cmds, 0, td->o.iodepth * sizeof(struct b_user_cmd));
+
+       bd->events = malloc(td->o.iodepth * sizeof(struct io_u *));
+       memset(bd->events, 0, td->o.iodepth * sizeof(struct io_u *));
+
+       bd->pfds = malloc(sizeof(struct pollfd) * td->o.nr_files);
+       memset(bd->pfds, 0, sizeof(struct pollfd) * td->o.nr_files);
+
+       bd->fd_flags = malloc(sizeof(int) * td->o.nr_files);
+       memset(bd->fd_flags, 0, sizeof(int) * td->o.nr_files);
+
+       td->io_ops->data = bd;
+       return 0;
+}
+
+static struct ioengine_ops ioengine = {
+       .name           = "binject",
+       .version        = FIO_IOOPS_VERSION,
+       .init           = fio_binject_init,
+       .prep           = fio_binject_prep,
+       .queue          = fio_binject_queue,
+       .getevents      = fio_binject_getevents,
+       .event          = fio_binject_event,
+       .cleanup        = fio_binject_cleanup,
+       .open_file      = generic_open_file,
+       .close_file     = generic_close_file,
+       .get_file_size  = generic_get_file_size,
+       .flags          = FIO_RAWIO,
+};
+
+#else /* FIO_HAVE_BINJECT */
+
+/*
+ * When we have a proper configure system in place, we simply wont build
+ * and install this io engine. For now install a crippled version that
+ * just complains and fails to load.
+ */
+static int fio_binject_init(struct thread_data fio_unused *td)
+{
+       fprintf(stderr, "fio: ioengine binject not available\n");
+       return 1;
+}
+
+static struct ioengine_ops ioengine = {
+       .name           = "binject",
+       .version        = FIO_IOOPS_VERSION,
+       .init           = fio_binject_init,
+};
+
+#endif
+
+static void fio_init fio_binject_register(void)
+{
+       register_ioengine(&ioengine);
+}
+
+static void fio_exit fio_binject_unregister(void)
+{
+       unregister_ioengine(&ioengine);
+}
index f6238f83a6a64139fcd941c086ab94dbf6a6e1b5..344cdbf939a3e55fed9c7274133355e058f2a0d8 100644 (file)
@@ -31,6 +31,9 @@ struct io_u {
 #endif
 #ifdef FIO_HAVE_SOLARISAIO
                aio_result_t resultp;
+#endif
+#ifdef FIO_HAVE_BINJECT
+               struct b_user_cmd buc;
 #endif
                void *mmap_data;
        };
index e255e94f78cd552f35f53e0de9080bd04f94cf27..bdf358204197b7ad1ed40bf6c2a749c2403c229c 100644 (file)
--- a/options.c
+++ b/options.c
@@ -973,6 +973,11 @@ static struct fio_option options[FIO_MAX_OPTS] = {
                          { .ival = "guasi",
                            .help = "GUASI IO engine",
                          },
+#endif
+#ifdef FIO_HAVE_BINJECT
+                         { .ival = "binject",
+                           .help = "binject direct inject block engine",
+                         },
 #endif
                          { .ival = "external",
                            .help = "Load external engine (append name)",
diff --git a/os/binject.h b/os/binject.h
new file mode 100644 (file)
index 0000000..1f358f3
--- /dev/null
@@ -0,0 +1,65 @@
+#ifndef BINJECT_H
+#define BINJECT_H
+
+#include <linux/types.h>
+
+#define BINJECT_MAGIC          0x89
+#define BINJECT_VER            0x01
+#define BINJECT_MAGIC_SHIFT    8
+#define BINJECT_VER_MASK       ((1 << BINJECT_MAGIC_SHIFT) - 1)
+
+struct b_user_cmd {
+       __u16 magic;    /* INPUT */
+       __u16 type;     /* INPUT */
+       __u32 error;    /* OUTPUT */
+       __u32 flags;    /* INPUT */
+       __u32 len;      /* INPUT */
+       __u64 offset;   /* INPUT */
+       __u64 buf;      /* INPUT */
+       __u64 usr_ptr;  /* PASSED THROUGH */
+       __u64 nsec;     /* OUTPUT */
+};
+
+struct b_ioctl_cmd {
+       int fd;
+       int minor;
+};
+
+enum {
+       B_TYPE_READ             = 0,
+       B_TYPE_WRITE,
+       B_TYPE_DISCARD,
+       B_TYPE_READVOID,
+       B_TYPE_WRITEZERO,
+       B_TYPE_NR
+};
+
+enum {
+       __B_FLAG_SYNC   = 0,
+       __B_FLAG_UNPLUG,
+       __B_FLAG_NOIDLE,
+       __B_FLAG_BARRIER,
+       __B_FLAG_META,
+       __B_FLAG_RAHEAD,
+       __B_FLAG_FAILFAST_DEV,
+       __B_FLAG_FAILFAST_TRANSPORT,
+       __B_FLAG_FAILFAST_DRIVER,
+       __B_FLAG_NR,
+
+       B_FLAG_SYNC                     = 1 << __B_FLAG_SYNC,
+       B_FLAG_UNPLUG                   = 1 << __B_FLAG_UNPLUG,
+       B_FLAG_NOIDLE                   = 1 << __B_FLAG_NOIDLE,
+       B_FLAG_BARRIER                  = 1 << __B_FLAG_BARRIER,
+       B_FLAG_META                     = 1 << __B_FLAG_META,
+       B_FLAG_RAHEAD                   = 1 << __B_FLAG_RAHEAD,
+       B_FLAG_FAILFAST_DEV             = 1 << __B_FLAG_FAILFAST_DEV,
+       B_FLAG_FAILFAST_TRANSPORT       = 1 << __B_FLAG_FAILFAST_TRANSPORT,
+       B_FLAG_FAILFAST_DRIVER          = 1 << __B_FLAG_FAILFAST_DRIVER,
+};
+
+static inline void binject_buc_set_magic(struct b_user_cmd *buc)
+{
+       buc->magic = (BINJECT_MAGIC << BINJECT_MAGIC_SHIFT) | BINJECT_VER;
+}
+
+#endif
index f7154a4af2c16e32ea9e51989f22e4e7af9a8d21..20f2a94ef4d06783aa006e202a37644ce64f7a1e 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/major.h>
 
 #include "indirect.h"
+#include "binject.h"
 
 #define FIO_HAVE_LIBAIO
 #define FIO_HAVE_POSIXAIO
@@ -37,6 +38,7 @@
 #define FIO_HAVE_FDATASYNC
 #define FIO_HAVE_FS_STAT
 #define FIO_HAVE_TRIM
+#define FIO_HAVE_BINJECT
 
 #ifdef SYNC_FILE_RANGE_WAIT_BEFORE
 #define FIO_HAVE_SYNC_FILE_RANGE