Initial support for explicit write barriers
[fio.git] / engines / binject.c
index a1b0181ce7d2b48e7cf9b7fefbb4db8867a8d00c..44a3796c6782f30e69a3dae9b081fdebccadc7a9 100644 (file)
@@ -22,7 +22,12 @@ struct binject_data {
        struct io_u **events;
        struct pollfd *pfds;
        int *fd_flags;
+};
+
+struct binject_file {
        unsigned int bs;
+       int minor;
+       int fd;
 };
 
 static void binject_buc_init(struct binject_data *bd, struct io_u *io_u)
@@ -52,6 +57,37 @@ static int pollin_events(struct pollfd *pfds, int fds)
        return 0;
 }
 
+static unsigned int binject_read_commands(struct thread_data *td, void *p,
+                                         int left, int *err)
+{
+       struct binject_file *bf;
+       struct fio_file *f;
+       int i, ret, events;
+
+one_more:
+       events = 0;
+       for_each_file(td, f, i) {
+               bf = f->file_data;
+               ret = read(bf->fd, p, left * sizeof(struct b_user_cmd));
+               if (ret < 0) {
+                       if (errno == EAGAIN)
+                               continue;
+                       *err = -errno;
+                       td_verror(td, errno, "read");
+                       break;
+               } else if (ret) {
+                       p += ret;
+                       events += ret / sizeof(struct b_user_cmd);
+               }
+       }
+
+       if (*err || events)
+               return events;
+
+       usleep(1000);
+       goto one_more;
+}
+
 static int fio_binject_getevents(struct thread_data *td, unsigned int min,
                              unsigned int max, struct timespec fio_unused *t)
 {
@@ -60,29 +96,27 @@ static int fio_binject_getevents(struct thread_data *td, unsigned int min,
        void *buf = bd->cmds;
        unsigned int i, events;
        struct fio_file *f;
+       struct binject_file *bf;
 
        /*
         * Fill in the file descriptors
         */
        for_each_file(td, f, i) {
+               bf = f->file_data;
+
                /*
                 * don't block for min events == 0
                 */
                if (!min) {
-                       bd->fd_flags[i] = fcntl(f->fd, F_GETFL);
-                       fcntl(f->fd, F_SETFL, bd->fd_flags[i] | O_NONBLOCK);
+                       bd->fd_flags[i] = fcntl(bf->fd, F_GETFL);
+                       fcntl(bf->fd, F_SETFL, bd->fd_flags[i] | O_NONBLOCK);
                }
-               bd->pfds[i].fd = f->fd;
+               bd->pfds[i].fd = bf->fd;
                bd->pfds[i].events = POLLIN;
        }
 
        while (left) {
-               void *p;
-
-               do {
-                       if (!min)
-                               break;
-
+               while (!min) {
                        ret = poll(bd->pfds, td->o.nr_files, -1);
                        if (ret < 0) {
                                if (!r)
@@ -94,34 +128,15 @@ static int fio_binject_getevents(struct thread_data *td, unsigned int min,
 
                        if (pollin_events(bd->pfds, td->o.nr_files))
                                break;
-               } while (1);
+               }
 
                if (r < 0)
                        break;
 
-re_read:
-               p = buf;
-               events = 0;
-               for_each_file(td, f, i) {
-                       ret = read(f->fd, p, left * sizeof(struct b_user_cmd));
-                       if (ret < 0) {
-                               if (errno == EAGAIN)
-                                       continue;
-                               r = -errno;
-                               td_verror(td, errno, "read");
-                               break;
-                       } else if (ret) {
-                               p += ret;
-                               events += ret / sizeof(struct b_user_cmd);
-                       }
-               }
+               events = binject_read_commands(td, buf, left, &r);
 
                if (r < 0)
                        break;
-               if (!events) {
-                       usleep(1000);
-                       goto re_read;
-               }
 
                left -= events;
                r += events;
@@ -135,8 +150,10 @@ re_read:
        }
 
        if (!min) {
-               for_each_file(td, f, i)
-                       fcntl(f->fd, F_SETFL, bd->fd_flags[i]);
+               for_each_file(td, f, i) {
+                       bf = f->file_data;
+                       fcntl(bf->fd, F_SETFL, bd->fd_flags[i]);
+               }
        }
 
        if (r > 0)
@@ -148,10 +165,10 @@ re_read:
 static int fio_binject_doio(struct thread_data *td, struct io_u *io_u)
 {
        struct b_user_cmd *buc = &io_u->buc;
-       struct fio_file *f = io_u->file;
+       struct binject_file *bf = io_u->file->file_data;
        int ret;
 
-       ret = write(f->fd, buc, sizeof(*buc));
+       ret = write(bf->fd, buc, sizeof(*buc));
        if (ret < 0)
                return ret;
 
@@ -162,10 +179,9 @@ static int fio_binject_prep(struct thread_data *td, struct io_u *io_u)
 {
        struct binject_data *bd = td->io_ops->data;
        struct b_user_cmd *buc = &io_u->buc;
+       struct binject_file *bf = io_u->file->file_data;
 
-       bd->bs = 512;
-
-       if (io_u->xfer_buflen & (bd->bs - 1)) {
+       if (io_u->xfer_buflen & (bf->bs - 1)) {
                log_err("read/write not sector aligned\n");
                return EINVAL;
        }
@@ -175,7 +191,10 @@ static int fio_binject_prep(struct thread_data *td, struct io_u *io_u)
                buc->type = B_TYPE_READ;
        } else if (io_u->ddir == DDIR_WRITE) {
                binject_buc_init(bd, io_u);
-               buc->type = B_TYPE_WRITE;
+               if (io_u->flags & IO_U_F_BARRIER)
+                       buc->type = B_TYPE_WRITEBARRIER;
+               else
+                       buc->type = B_TYPE_WRITE;
        } else if (io_u->ddir == DDIR_TRIM) {
                binject_buc_init(bd, io_u);
                buc->type = B_TYPE_DISCARD;
@@ -212,6 +231,137 @@ static struct io_u *fio_binject_event(struct thread_data *td, int event)
        return bd->events[event];
 }
 
+static void binject_unmap_dev(struct thread_data *td, struct binject_file *bf)
+{
+       struct b_ioctl_cmd bic;
+       int fdb;
+
+       if (bf->fd >= 0) {
+               close(bf->fd);
+               bf->fd = -1;
+       }
+
+       fdb = open("/dev/binject-ctl", O_RDWR);
+       if (fdb < 0) {
+               td_verror(td, errno, "open binject-ctl");
+               return;
+       }
+
+       bic.minor = bf->minor;
+
+       if (ioctl(fdb, 1, &bic) < 0) {
+               td_verror(td, errno, "binject dev unmap");
+               close(fdb);
+               return;
+       }
+
+       close(fdb);
+}
+
+static int binject_map_dev(struct thread_data *td, struct binject_file *bf,
+                          int fd)
+{
+       struct b_ioctl_cmd bic;
+       char name[80];
+       struct stat sb;
+       int fdb, dev_there, loops;
+
+       fdb = open("/dev/binject-ctl", O_RDWR);
+       if (fdb < 0) {
+               td_verror(td, errno, "binject ctl open");
+               return 1;
+       }
+
+       bic.fd = fd;
+
+       if (ioctl(fdb, 0, &bic) < 0) {
+               td_verror(td, errno, "binject dev map");
+               close(fdb);
+               return 1;
+       }
+
+       bf->minor = bic.minor;
+
+       sprintf(name, "/dev/binject%u", bf->minor);
+
+       /*
+        * Wait for udev to create the node...
+        */
+       dev_there = loops = 0;
+       do {
+               if (!stat(name, &sb)) {
+                       dev_there = 1;
+                       break;
+               }
+
+               usleep(10000);
+       } while (++loops < 100);
+
+       close(fdb);
+
+       if (!dev_there) {
+               log_err("fio: timed out waiting for binject dev\n");
+               goto err_unmap;
+       }
+
+       bf->fd = open(name, O_RDWR);
+       if (bf->fd < 0) {
+               td_verror(td, errno, "binject dev open");
+err_unmap:
+               binject_unmap_dev(td, bf);
+               return 1;
+       }
+
+       return 0;
+}
+
+static int fio_binject_close_file(struct thread_data *td, struct fio_file *f)
+{
+       struct binject_file *bf = f->file_data;
+
+       if (bf) {
+               binject_unmap_dev(td, bf);
+               free(bf);
+               f->file_data = NULL;
+               return generic_close_file(td, f);
+       }
+
+       return 0;
+}
+
+static int fio_binject_open_file(struct thread_data *td, struct fio_file *f)
+{
+       struct binject_file *bf;
+       unsigned int bs;
+       int ret;
+
+       ret = generic_open_file(td, f);
+       if (ret)
+               return 1;
+
+       if (f->filetype != FIO_TYPE_BD) {
+               log_err("fio: binject only works with block devices\n");
+               goto err_close;
+       }
+       if (ioctl(f->fd, BLKSSZGET, &bs) < 0) {
+               td_verror(td, errno, "BLKSSZGET");
+               goto err_close;
+       }
+
+       bf = malloc(sizeof(*bf));
+       bf->bs = bs;
+       bf->minor = bf->fd = -1;
+       f->file_data = bf;
+
+       if (binject_map_dev(td, bf, f->fd)) {
+err_close:
+               ret = generic_close_file(td, f);
+               return 1;
+       }
+
+       return 0;
+}
+
 static void fio_binject_cleanup(struct thread_data *td)
 {
        struct binject_data *bd = td->io_ops->data;
@@ -257,10 +407,10 @@ static struct ioengine_ops ioengine = {
        .getevents      = fio_binject_getevents,
        .event          = fio_binject_event,
        .cleanup        = fio_binject_cleanup,
-       .open_file      = generic_open_file,
-       .close_file     = generic_close_file,
+       .open_file      = fio_binject_open_file,
+       .close_file     = fio_binject_close_file,
        .get_file_size  = generic_get_file_size,
-       .flags          = FIO_RAWIO,
+       .flags          = FIO_RAWIO | FIO_BARRIER,
 };
 
 #else /* FIO_HAVE_BINJECT */
@@ -272,7 +422,7 @@ static struct ioengine_ops ioengine = {
  */
 static int fio_binject_init(struct thread_data fio_unused *td)
 {
-       fprintf(stderr, "fio: ioengine binject not available\n");
+       log_err("fio: ioengine binject not available\n");
        return 1;
 }