engines/sg: add cmdp and dxferp for trims to sg error string
[fio.git] / engines / sg.c
index 862cd609b45f5870ed2d53e2ef9901feff75ae76..800f47370ed31f6dd7a8f2756556060022488de5 100644 (file)
 /*
- * scsi generic sg v3 io engine
+ * sg engine
+ *
+ * IO engine that uses the Linux SG v3 interface to talk to SCSI devices
+ *
+ * This ioengine can operate in two modes:
+ *     sync    with block devices (/dev/sdX) or
+ *             with character devices (/dev/sgY) with direct=1 or sync=1
+ *     async   with character devices with direct=0 and sync=0
+ *
+ * What value does queue() return for the different cases?
+ *                             queue() return value
+ * In sync mode:
+ *  /dev/sdX           RWT     FIO_Q_COMPLETED
+ *  /dev/sgY           RWT     FIO_Q_COMPLETED
+ *   with direct=1 or sync=1
+ *
+ * In async mode:
+ *  /dev/sgY           RWT     FIO_Q_QUEUED
+ *   direct=0 and sync=0
+ *
+ * Because FIO_SYNCIO is set for this ioengine td_io_queue() will fill in
+ * issue_time *before* each IO is sent to queue()
+ *
+ * Where are the IO counting functions called for the different cases?
+ *
+ * In sync mode:
+ *  /dev/sdX (commit==NULL)
+ *   RWT
+ *    io_u_mark_depth()                        called in td_io_queue()
+ *    io_u_mark_submit/complete()      called in td_io_queue()
+ *    issue_time                       set in td_io_queue()
+ *
+ *  /dev/sgY with direct=1 or sync=1 (commit does nothing)
+ *   RWT
+ *    io_u_mark_depth()                        called in td_io_queue()
+ *    io_u_mark_submit/complete()      called in queue()
+ *    issue_time                       set in td_io_queue()
+ *  
+ * In async mode:
+ *  /dev/sgY with direct=0 and sync=0
+ *   RW: read and write operations are submitted in queue()
+ *    io_u_mark_depth()                        called in td_io_commit()
+ *    io_u_mark_submit()               called in queue()
+ *    issue_time                       set in td_io_queue()
+ *   T: trim operations are queued in queue() and submitted in commit()
+ *    io_u_mark_depth()                        called in td_io_commit()
+ *    io_u_mark_submit()               called in commit()
+ *    issue_time                       set in commit()
  *
  */
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <errno.h>
-#include <assert.h>
-#include <sys/poll.h>
+#include <poll.h>
 
 #include "../fio.h"
-#include "../os.h"
+#include "../optgroup.h"
 
 #ifdef FIO_HAVE_SGIO
 
+enum {
+       FIO_SG_WRITE            = 1,
+       FIO_SG_WRITE_VERIFY     = 2,
+       FIO_SG_WRITE_SAME       = 3
+};
+
+struct sg_options {
+       void *pad;
+       unsigned int readfua;
+       unsigned int writefua;
+       unsigned int write_mode;
+};
+
+static struct fio_option options[] = {
+       {
+               .name   = "readfua",
+               .lname  = "sg engine read fua flag support",
+               .type   = FIO_OPT_BOOL,
+               .off1   = offsetof(struct sg_options, readfua),
+               .help   = "Set FUA flag (force unit access) for all Read operations",
+               .def    = "0",
+               .category = FIO_OPT_C_ENGINE,
+               .group  = FIO_OPT_G_SG,
+       },
+       {
+               .name   = "writefua",
+               .lname  = "sg engine write fua flag support",
+               .type   = FIO_OPT_BOOL,
+               .off1   = offsetof(struct sg_options, writefua),
+               .help   = "Set FUA flag (force unit access) for all Write operations",
+               .def    = "0",
+               .category = FIO_OPT_C_ENGINE,
+               .group  = FIO_OPT_G_SG,
+       },
+       {
+               .name   = "sg_write_mode",
+               .lname  = "specify sg write mode",
+               .type   = FIO_OPT_STR,
+               .off1   = offsetof(struct sg_options, write_mode),
+               .help   = "Specify SCSI WRITE mode",
+               .def    = "write",
+               .posval = {
+                         { .ival = "write",
+                           .oval = FIO_SG_WRITE,
+                           .help = "Issue standard SCSI WRITE commands",
+                         },
+                         { .ival = "verify",
+                           .oval = FIO_SG_WRITE_VERIFY,
+                           .help = "Issue SCSI WRITE AND VERIFY commands",
+                         },
+                         { .ival = "same",
+                           .oval = FIO_SG_WRITE_SAME,
+                           .help = "Issue SCSI WRITE SAME commands",
+                         },
+               },
+               .category = FIO_OPT_C_ENGINE,
+               .group  = FIO_OPT_G_SG,
+       },
+       {
+               .name   = NULL,
+       },
+};
+
+#define MAX_10B_LBA  0xFFFFFFFFULL
+#define SCSI_TIMEOUT_MS 30000   // 30 second timeout; currently no method to override
+#define MAX_SB 64               // sense block maximum return size
+
 struct sgio_cmd {
-       unsigned char cdb[10];
+       unsigned char cdb[16];      // enhanced from 10 to support 16 byte commands
+       unsigned char sb[MAX_SB];   // add sense block to commands
        int nr;
 };
 
+struct sgio_trim {
+       char *unmap_param;
+       unsigned int unmap_range_count;
+       struct io_u **trim_io_us;
+};
+
 struct sgio_data {
        struct sgio_cmd *cmds;
        struct io_u **events;
+       struct pollfd *pfds;
+       int *fd_flags;
+       void *sgbuf;
        unsigned int bs;
+       int type_checked;
+       struct sgio_trim **trim_queues;
+       int current_queue;
+       unsigned int *trim_queue_map;
 };
 
+static inline bool sgio_unbuffered(struct thread_data *td)
+{
+       return (td->o.odirect || td->o.sync_io);
+}
+
 static void sgio_hdr_init(struct sgio_data *sd, struct sg_io_hdr *hdr,
                          struct io_u *io_u, int fs)
 {
@@ -36,8 +168,11 @@ static void sgio_hdr_init(struct sgio_data *sd, struct sg_io_hdr *hdr,
        hdr->interface_id = 'S';
        hdr->cmdp = sc->cdb;
        hdr->cmd_len = sizeof(sc->cdb);
+       hdr->sbp = sc->sb;
+       hdr->mx_sb_len = sizeof(sc->sb);
        hdr->pack_id = io_u->index;
        hdr->usr_ptr = io_u;
+       hdr->timeout = SCSI_TIMEOUT_MS;
 
        if (fs) {
                hdr->dxferp = io_u->xfer_buf;
@@ -45,286 +180,1022 @@ static void sgio_hdr_init(struct sgio_data *sd, struct sg_io_hdr *hdr,
        }
 }
 
-static int fio_sgio_ioctl_getevents(struct thread_data *td, int fio_unused min,
-                                   int max, struct timespec fio_unused *t)
+static int pollin_events(struct pollfd *pfds, int fds)
 {
-       assert(max <= 1);
+       int i;
 
-       /*
-        * we can only have one finished io_u for sync io, since the depth
-        * is always 1
-        */
-       if (list_empty(&td->io_u_busylist))
-               return 0;
+       for (i = 0; i < fds; i++)
+               if (pfds[i].revents & POLLIN)
+                       return 1;
 
-       return 1;
+       return 0;
 }
 
+static int sg_fd_read(int fd, void *data, size_t size)
+{
+       int err = 0;
+
+       while (size) {
+               ssize_t ret;
+
+               ret = read(fd, data, size);
+               if (ret < 0) {
+                       if (errno == EAGAIN || errno == EINTR)
+                               continue;
+                       err = errno;
+                       break;
+               } else if (!ret)
+                       break;
+               else {
+                       data += ret;
+                       size -= ret;
+               }
+       }
+
+       if (err)
+               return err;
+       if (size)
+               return EAGAIN;
 
-static int fio_sgio_getevents(struct thread_data *td, int min, int max,
-                             struct timespec fio_unused *t)
+       return 0;
+}
+
+static int fio_sgio_getevents(struct thread_data *td, unsigned int min,
+                             unsigned int max,
+                             const struct timespec fio_unused *t)
 {
-       struct fio_file *f = &td->files[0];
-       struct sgio_data *sd = td->io_ops->data;
-       struct pollfd pfd = { .fd = f->fd, .events = POLLIN };
-       void *buf = malloc(max * sizeof(struct sg_io_hdr));
-       int left = max, ret, events, i, r = 0, fl = 0;
+       struct sgio_data *sd = td->io_ops_data;
+       int left = max, eventNum, ret, r = 0, trims = 0;
+       void *buf = sd->sgbuf;
+       unsigned int i, j, events;
+       struct fio_file *f;
+       struct io_u *io_u;
 
        /*
-        * don't block for !events
+        * Fill in the file descriptors
         */
-       if (!min) {
-               fl = fcntl(f->fd, F_GETFL);
-               fcntl(f->fd, F_SETFL, fl | O_NONBLOCK);
+       for_each_file(td, f, i) {
+               /*
+                * don't block for min events == 0
+                */
+               if (!min)
+                       sd->fd_flags[i] = fio_set_fd_nonblocking(f->fd, "sg");
+               else
+                       sd->fd_flags[i] = -1;
+
+               sd->pfds[i].fd = f->fd;
+               sd->pfds[i].events = POLLIN;
        }
 
-       while (left) {
+       /*
+       ** There are two counters here:
+       **  - number of SCSI commands completed
+       **  - number of io_us completed
+       **
+       ** These are the same with reads and writes, but
+       ** could differ with trim/unmap commands because
+       ** a single unmap can include multiple io_us
+       */
+
+       while (left > 0) {
+               char *p;
+
+               dprint(FD_IO, "sgio_getevents: sd %p: min=%d, max=%d, left=%d\n", sd, min, max, left);
+
                do {
                        if (!min)
                                break;
-                       poll(&pfd, 1, -1);
-                       if (pfd.revents & POLLIN)
+
+                       ret = poll(sd->pfds, td->o.nr_files, -1);
+                       if (ret < 0) {
+                               if (!r)
+                                       r = -errno;
+                               td_verror(td, errno, "poll");
                                break;
-               } while (1);
+                       } else if (!ret)
+                               continue;
 
-               ret = read(f->fd, buf, left * sizeof(struct sg_io_hdr));
-               if (ret < 0) {
-                       if (errno == EAGAIN)
+                       if (pollin_events(sd->pfds, td->o.nr_files))
                                break;
-                       td_verror(td, errno);
-                       r = -1;
+               } while (1);
+
+               if (r < 0)
                        break;
-               } else if (!ret)
+
+re_read:
+               p = buf;
+               events = 0;
+               for_each_file(td, f, i) {
+                       for (eventNum = 0; eventNum < left; eventNum++) {
+                               ret = sg_fd_read(f->fd, p, sizeof(struct sg_io_hdr));
+                               dprint(FD_IO, "sgio_getevents: sg_fd_read ret: %d\n", ret);
+                               if (ret) {
+                                       r = -ret;
+                                       td_verror(td, r, "sg_read");
+                                       break;
+                               }
+                               io_u = ((struct sg_io_hdr *)p)->usr_ptr;
+                               if (io_u->ddir == DDIR_TRIM) {
+                                       events += sd->trim_queues[io_u->index]->unmap_range_count;
+                                       eventNum += sd->trim_queues[io_u->index]->unmap_range_count - 1;
+                               } else
+                                       events++;
+
+                               p += sizeof(struct sg_io_hdr);
+                               dprint(FD_IO, "sgio_getevents: events: %d, eventNum: %d, left: %d\n", events, eventNum, left);
+                       }
+               }
+
+               if (r < 0 && !events)
                        break;
+               if (!events) {
+                       usleep(1000);
+                       goto re_read;
+               }
 
-               events = ret / sizeof(struct sg_io_hdr);
                left -= events;
                r += events;
 
                for (i = 0; i < events; i++) {
                        struct sg_io_hdr *hdr = (struct sg_io_hdr *) buf + i;
-
-                       sd->events[i] = hdr->usr_ptr;
+                       sd->events[i + trims] = hdr->usr_ptr;
+                       io_u = (struct io_u *)(hdr->usr_ptr);
+
+                       if (hdr->info & SG_INFO_CHECK) {
+                               /* record if an io error occurred, ignore resid */
+                               memcpy(&io_u->hdr, hdr, sizeof(struct sg_io_hdr));
+                               sd->events[i + trims]->error = EIO;
+                       }
+
+                       if (io_u->ddir == DDIR_TRIM) {
+                               struct sgio_trim *st = sd->trim_queues[io_u->index];
+                               assert(st->trim_io_us[0] == io_u);
+                               dprint(FD_IO, "sgio_getevents: reaping %d io_us from trim queue %d\n", st->unmap_range_count, io_u->index);
+                               dprint(FD_IO, "sgio_getevents: reaped io_u %d and stored in events[%d]\n", io_u->index, i+trims);
+                               for (j = 1; j < st->unmap_range_count; j++) {
+                                       ++trims;
+                                       sd->events[i + trims] = st->trim_io_us[j];
+                                       dprint(FD_IO, "sgio_getevents: reaped io_u %d and stored in events[%d]\n", st->trim_io_us[j]->index, i+trims);
+                                       if (hdr->info & SG_INFO_CHECK) {
+                                               /* record if an io error occurred, ignore resid */
+                                               memcpy(&st->trim_io_us[j]->hdr, hdr, sizeof(struct sg_io_hdr));
+                                               sd->events[i + trims]->error = EIO;
+                                       }
+                               }
+                               events -= st->unmap_range_count - 1;
+                               st->unmap_range_count = 0;
+                       }
                }
        }
 
-       if (!min)
-               fcntl(f->fd, F_SETFL, fl);
+       if (!min) {
+               for_each_file(td, f, i) {
+                       if (sd->fd_flags[i] == -1)
+                               continue;
+
+                       if (fcntl(f->fd, F_SETFL, sd->fd_flags[i]) < 0)
+                               log_err("fio: sg failed to restore fcntl flags: %s\n", strerror(errno));
+               }
+       }
 
-       free(buf);
        return r;
 }
 
-static int fio_sgio_ioctl_doio(struct thread_data *td,
-                              struct fio_file *f, struct io_u *io_u)
+static enum fio_q_status fio_sgio_ioctl_doio(struct thread_data *td,
+                                            struct fio_file *f,
+                                            struct io_u *io_u)
 {
-       struct sgio_data *sd = td->io_ops->data;
+       struct sgio_data *sd = td->io_ops_data;
        struct sg_io_hdr *hdr = &io_u->hdr;
+       int ret;
 
        sd->events[0] = io_u;
 
-       return ioctl(f->fd, SG_IO, hdr);
+       ret = ioctl(f->fd, SG_IO, hdr);
+       if (ret < 0)
+               return ret;
+
+       /* record if an io error occurred */
+       if (hdr->info & SG_INFO_CHECK)
+               io_u->error = EIO;
+
+       return FIO_Q_COMPLETED;
 }
 
-static int fio_sgio_rw_doio(struct fio_file *f, struct io_u *io_u, int sync)
+static enum fio_q_status fio_sgio_rw_doio(struct fio_file *f,
+                                         struct io_u *io_u, int do_sync)
 {
        struct sg_io_hdr *hdr = &io_u->hdr;
        int ret;
 
        ret = write(f->fd, hdr, sizeof(*hdr));
        if (ret < 0)
-               return errno;
+               return ret;
 
-       if (sync) {
+       if (do_sync) {
                ret = read(f->fd, hdr, sizeof(*hdr));
                if (ret < 0)
-                       return errno;
+                       return ret;
+
+               /* record if an io error occurred */
+               if (hdr->info & SG_INFO_CHECK)
+                       io_u->error = EIO;
+
+               return FIO_Q_COMPLETED;
        }
 
-       return 0;
+       return FIO_Q_QUEUED;
 }
 
-static int fio_sgio_doio(struct thread_data *td, struct io_u *io_u, int sync)
+static enum fio_q_status fio_sgio_doio(struct thread_data *td,
+                                      struct io_u *io_u, int do_sync)
 {
        struct fio_file *f = io_u->file;
+       enum fio_q_status ret;
 
-       if (td->filetype == FIO_TYPE_BD)
-               return fio_sgio_ioctl_doio(td, f, io_u);
+       if (f->filetype == FIO_TYPE_BLOCK) {
+               ret = fio_sgio_ioctl_doio(td, f, io_u);
+               td_verror(td, io_u->error, __func__);
+       } else {
+               ret = fio_sgio_rw_doio(f, io_u, do_sync);
+               if (do_sync)
+                       td_verror(td, io_u->error, __func__);
+       }
 
-       return fio_sgio_rw_doio(f, io_u, sync);
+       return ret;
+}
+
+static void fio_sgio_rw_lba(struct sg_io_hdr *hdr, unsigned long long lba,
+                           unsigned long long nr_blocks)
+{
+       if (lba < MAX_10B_LBA) {
+               hdr->cmdp[2] = (unsigned char) ((lba >> 24) & 0xff);
+               hdr->cmdp[3] = (unsigned char) ((lba >> 16) & 0xff);
+               hdr->cmdp[4] = (unsigned char) ((lba >>  8) & 0xff);
+               hdr->cmdp[5] = (unsigned char) (lba & 0xff);
+               hdr->cmdp[7] = (unsigned char) ((nr_blocks >> 8) & 0xff);
+               hdr->cmdp[8] = (unsigned char) (nr_blocks & 0xff);
+       } else {
+               hdr->cmdp[2] = (unsigned char) ((lba >> 56) & 0xff);
+               hdr->cmdp[3] = (unsigned char) ((lba >> 48) & 0xff);
+               hdr->cmdp[4] = (unsigned char) ((lba >> 40) & 0xff);
+               hdr->cmdp[5] = (unsigned char) ((lba >> 32) & 0xff);
+               hdr->cmdp[6] = (unsigned char) ((lba >> 24) & 0xff);
+               hdr->cmdp[7] = (unsigned char) ((lba >> 16) & 0xff);
+               hdr->cmdp[8] = (unsigned char) ((lba >>  8) & 0xff);
+               hdr->cmdp[9] = (unsigned char) (lba & 0xff);
+               hdr->cmdp[10] = (unsigned char) ((nr_blocks >> 32) & 0xff);
+               hdr->cmdp[11] = (unsigned char) ((nr_blocks >> 16) & 0xff);
+               hdr->cmdp[12] = (unsigned char) ((nr_blocks >> 8) & 0xff);
+               hdr->cmdp[13] = (unsigned char) (nr_blocks & 0xff);
+       }
+
+       return;
 }
 
 static int fio_sgio_prep(struct thread_data *td, struct io_u *io_u)
 {
        struct sg_io_hdr *hdr = &io_u->hdr;
-       struct sgio_data *sd = td->io_ops->data;
-       int nr_blocks, lba;
+       struct sg_options *o = td->eo;
+       struct sgio_data *sd = td->io_ops_data;
+       unsigned long long nr_blocks, lba;
+       int offset;
 
        if (io_u->xfer_buflen & (sd->bs - 1)) {
                log_err("read/write not sector aligned\n");
                return EINVAL;
        }
 
+       nr_blocks = io_u->xfer_buflen / sd->bs;
+       lba = io_u->offset / sd->bs;
+
        if (io_u->ddir == DDIR_READ) {
                sgio_hdr_init(sd, hdr, io_u, 1);
 
                hdr->dxfer_direction = SG_DXFER_FROM_DEV;
-               hdr->cmdp[0] = 0x28;
+               if (lba < MAX_10B_LBA)
+                       hdr->cmdp[0] = 0x28; // read(10)
+               else
+                       hdr->cmdp[0] = 0x88; // read(16)
+
+               if (o->readfua)
+                       hdr->cmdp[1] |= 0x08;
+
+               fio_sgio_rw_lba(hdr, lba, nr_blocks);
+
        } else if (io_u->ddir == DDIR_WRITE) {
                sgio_hdr_init(sd, hdr, io_u, 1);
 
                hdr->dxfer_direction = SG_DXFER_TO_DEV;
-               hdr->cmdp[0] = 0x2a;
-       } else {
-               sgio_hdr_init(sd, hdr, io_u, 0);
+               switch(o->write_mode) {
+               case FIO_SG_WRITE:
+                       if (lba < MAX_10B_LBA)
+                               hdr->cmdp[0] = 0x2a; // write(10)
+                       else
+                               hdr->cmdp[0] = 0x8a; // write(16)
+                       if (o->writefua)
+                               hdr->cmdp[1] |= 0x08;
+                       break;
+               case FIO_SG_WRITE_VERIFY:
+                       if (lba < MAX_10B_LBA)
+                               hdr->cmdp[0] = 0x2e; // write and verify(10)
+                       else
+                               hdr->cmdp[0] = 0x8e; // write and verify(16)
+                       break;
+                       // BYTCHK is disabled by virtue of the memset in sgio_hdr_init
+               case FIO_SG_WRITE_SAME:
+                       hdr->dxfer_len = sd->bs;
+                       if (lba < MAX_10B_LBA)
+                               hdr->cmdp[0] = 0x41; // write same(10)
+                       else
+                               hdr->cmdp[0] = 0x93; // write same(16)
+                       break;
+               };
 
-               hdr->dxfer_direction = SG_DXFER_NONE;
-               hdr->cmdp[0] = 0x35;
-       }
+               fio_sgio_rw_lba(hdr, lba, nr_blocks);
 
-       if (hdr->dxfer_direction != SG_DXFER_NONE) {
-               nr_blocks = io_u->xfer_buflen / sd->bs;
-               lba = io_u->offset / sd->bs;
-               hdr->cmdp[2] = (unsigned char) ((lba >> 24) & 0xff);
-               hdr->cmdp[3] = (unsigned char) ((lba >> 16) & 0xff);
-               hdr->cmdp[4] = (unsigned char) ((lba >>  8) & 0xff);
-               hdr->cmdp[5] = (unsigned char) (lba & 0xff);
-               hdr->cmdp[7] = (unsigned char) ((nr_blocks >> 8) & 0xff);
-               hdr->cmdp[8] = (unsigned char) (nr_blocks & 0xff);
-       }
+       } else if (io_u->ddir == DDIR_TRIM) {
+               struct sgio_trim *st;
+
+               if (sd->current_queue == -1) {
+                       sgio_hdr_init(sd, hdr, io_u, 0);
+
+                       hdr->cmd_len = 10;
+                       hdr->dxfer_direction = SG_DXFER_TO_DEV;
+                       hdr->cmdp[0] = 0x42; // unmap
+                       sd->current_queue = io_u->index;
+                       st = sd->trim_queues[sd->current_queue];
+                       hdr->dxferp = st->unmap_param;
+                       assert(sd->trim_queues[io_u->index]->unmap_range_count == 0);
+                       dprint(FD_IO, "sg: creating new queue based on io_u %d\n", io_u->index);
+               }
+               else
+                       st = sd->trim_queues[sd->current_queue];
+
+               dprint(FD_IO, "sg: adding io_u %d to trim queue %d\n", io_u->index, sd->current_queue);
+               st->trim_io_us[st->unmap_range_count] = io_u;
+               sd->trim_queue_map[io_u->index] = sd->current_queue;
+
+               offset = 8 + 16 * st->unmap_range_count;
+               st->unmap_param[offset] = (unsigned char) ((lba >> 56) & 0xff);
+               st->unmap_param[offset+1] = (unsigned char) ((lba >> 48) & 0xff);
+               st->unmap_param[offset+2] = (unsigned char) ((lba >> 40) & 0xff);
+               st->unmap_param[offset+3] = (unsigned char) ((lba >> 32) & 0xff);
+               st->unmap_param[offset+4] = (unsigned char) ((lba >> 24) & 0xff);
+               st->unmap_param[offset+5] = (unsigned char) ((lba >> 16) & 0xff);
+               st->unmap_param[offset+6] = (unsigned char) ((lba >>  8) & 0xff);
+               st->unmap_param[offset+7] = (unsigned char) (lba & 0xff);
+               st->unmap_param[offset+8] = (unsigned char) ((nr_blocks >> 32) & 0xff);
+               st->unmap_param[offset+9] = (unsigned char) ((nr_blocks >> 16) & 0xff);
+               st->unmap_param[offset+10] = (unsigned char) ((nr_blocks >> 8) & 0xff);
+               st->unmap_param[offset+11] = (unsigned char) (nr_blocks & 0xff);
+
+               st->unmap_range_count++;
+
+       } else if (ddir_sync(io_u->ddir)) {
+               sgio_hdr_init(sd, hdr, io_u, 0);
+               hdr->dxfer_direction = SG_DXFER_NONE;
+               if (lba < MAX_10B_LBA)
+                       hdr->cmdp[0] = 0x35; // synccache(10)
+               else
+                       hdr->cmdp[0] = 0x91; // synccache(16)
+       } else
+               assert(0);
 
        return 0;
 }
 
-static int fio_sgio_queue(struct thread_data *td, struct io_u *io_u)
+static void fio_sgio_unmap_setup(struct sg_io_hdr *hdr, struct sgio_trim *st)
+{
+       hdr->dxfer_len = st->unmap_range_count * 16 + 8;
+       hdr->cmdp[7] = (unsigned char) (((st->unmap_range_count * 16 + 8) >> 8) & 0xff);
+       hdr->cmdp[8] = (unsigned char) ((st->unmap_range_count * 16 + 8) & 0xff);
+
+       st->unmap_param[0] = (unsigned char) (((16 * st->unmap_range_count + 6) >> 8) & 0xff);
+       st->unmap_param[1] = (unsigned char)  ((16 * st->unmap_range_count + 6) & 0xff);
+       st->unmap_param[2] = (unsigned char) (((16 * st->unmap_range_count) >> 8) & 0xff);
+       st->unmap_param[3] = (unsigned char)  ((16 * st->unmap_range_count) & 0xff);
+
+       return;
+}
+
+static enum fio_q_status fio_sgio_queue(struct thread_data *td,
+                                       struct io_u *io_u)
 {
        struct sg_io_hdr *hdr = &io_u->hdr;
-       int ret;
+       struct sgio_data *sd = td->io_ops_data;
+       int ret, do_sync = 0;
+
+       fio_ro_check(td, io_u);
+
+       if (sgio_unbuffered(td) || ddir_sync(io_u->ddir))
+               do_sync = 1;
+
+       if (io_u->ddir == DDIR_TRIM) {
+               if (do_sync || io_u->file->filetype == FIO_TYPE_BLOCK) {
+                       struct sgio_trim *st = sd->trim_queues[sd->current_queue];
+
+                       /* finish cdb setup for unmap because we are
+                       ** doing unmap commands synchronously */
+                       assert(st->unmap_range_count == 1);
+                       assert(io_u == st->trim_io_us[0]);
+                       hdr = &io_u->hdr;
+
+                       fio_sgio_unmap_setup(hdr, st);
+
+                       st->unmap_range_count = 0;
+                       sd->current_queue = -1;
+               } else
+                       /* queue up trim ranges and submit in commit() */
+                       return FIO_Q_QUEUED;
+       }
 
-       ret = fio_sgio_doio(td, io_u, io_u->ddir == DDIR_SYNC);
+       ret = fio_sgio_doio(td, io_u, do_sync);
 
        if (ret < 0)
                io_u->error = errno;
        else if (hdr->status) {
                io_u->resid = hdr->resid;
                io_u->error = EIO;
+       } else if (td->io_ops->commit != NULL) {
+               if (do_sync && !ddir_sync(io_u->ddir)) {
+                       io_u_mark_submit(td, 1);
+                       io_u_mark_complete(td, 1);
+               } else if (io_u->ddir == DDIR_READ || io_u->ddir == DDIR_WRITE) {
+                       io_u_mark_submit(td, 1);
+                       io_u_queued(td, io_u);
+               }
+       }
+
+       if (io_u->error) {
+               td_verror(td, io_u->error, "xfer");
+               return FIO_Q_COMPLETED;
        }
 
-       return io_u->error;
+       return ret;
+}
+
+static int fio_sgio_commit(struct thread_data *td)
+{
+       struct sgio_data *sd = td->io_ops_data;
+       struct sgio_trim *st;
+       struct io_u *io_u;
+       struct sg_io_hdr *hdr;
+       struct timespec now;
+       unsigned int i;
+       int ret;
+
+       if (sd->current_queue == -1)
+               return 0;
+
+       st = sd->trim_queues[sd->current_queue];
+       io_u = st->trim_io_us[0];
+       hdr = &io_u->hdr;
+
+       fio_sgio_unmap_setup(hdr, st);
+
+       sd->current_queue = -1;
+
+       ret = fio_sgio_rw_doio(io_u->file, io_u, 0);
+
+       if (ret < 0)
+               for (i = 0; i < st->unmap_range_count; i++)
+                       st->trim_io_us[i]->error = errno;
+       else if (hdr->status)
+               for (i = 0; i < st->unmap_range_count; i++) {
+                       st->trim_io_us[i]->resid = hdr->resid;
+                       st->trim_io_us[i]->error = EIO;
+               }
+       else {
+               if (fio_fill_issue_time(td)) {
+                       fio_gettime(&now, NULL);
+                       for (i = 0; i < st->unmap_range_count; i++) {
+                               struct io_u *io_u = st->trim_io_us[i];
+
+                               memcpy(&io_u->issue_time, &now, sizeof(now));
+                               io_u_queued(td, io_u);
+                       }
+               }
+               io_u_mark_submit(td, st->unmap_range_count);
+       }
+
+       if (io_u->error) {
+               td_verror(td, io_u->error, "xfer");
+               return 0;
+       }
+
+       if (ret == FIO_Q_QUEUED)
+               return 0;
+       else
+               return ret;
 }
 
 static struct io_u *fio_sgio_event(struct thread_data *td, int event)
 {
-       struct sgio_data *sd = td->io_ops->data;
+       struct sgio_data *sd = td->io_ops_data;
 
        return sd->events[event];
 }
 
-static int fio_sgio_get_bs(struct thread_data *td, unsigned int *bs)
+static int fio_sgio_read_capacity(struct thread_data *td, unsigned int *bs,
+                                 unsigned long long *max_lba)
 {
-       struct sgio_data *sd = td->io_ops->data;
-       struct io_u *io_u;
-       struct sg_io_hdr *hdr;
-       unsigned char buf[8];
+       /*
+        * need to do read capacity operation w/o benefit of sd or
+        * io_u structures, which are not initialized until later.
+        */
+       struct sg_io_hdr hdr;
+       unsigned char cmd[16];
+       unsigned char sb[64];
+       unsigned char buf[32];  // read capacity return
        int ret;
+       int fd = -1;
 
-       io_u = __get_io_u(td);
-       assert(io_u);
+       struct fio_file *f = td->files[0];
 
-       hdr = &io_u->hdr;
-       sgio_hdr_init(sd, hdr, io_u, 0);
-       memset(buf, 0, sizeof(buf));
+       /* open file independent of rest of application */
+       fd = open(f->file_name, O_RDONLY);
+       if (fd < 0)
+               return -errno;
 
-       hdr->cmdp[0] = 0x25;
-       hdr->dxfer_direction = SG_DXFER_FROM_DEV;
-       hdr->dxferp = buf;
-       hdr->dxfer_len = sizeof(buf);
+       memset(&hdr, 0, sizeof(hdr));
+       memset(cmd, 0, sizeof(cmd));
+       memset(sb, 0, sizeof(sb));
+       memset(buf, 0, sizeof(buf));
 
-       ret = fio_sgio_doio(td, io_u, 1);
-       if (ret) {
-               put_io_u(td, io_u);
+       /* First let's try a 10 byte read capacity. */
+       hdr.interface_id = 'S';
+       hdr.cmdp = cmd;
+       hdr.cmd_len = 10;
+       hdr.sbp = sb;
+       hdr.mx_sb_len = sizeof(sb);
+       hdr.timeout = SCSI_TIMEOUT_MS;
+       hdr.cmdp[0] = 0x25;  // Read Capacity(10)
+       hdr.dxfer_direction = SG_DXFER_FROM_DEV;
+       hdr.dxferp = buf;
+       hdr.dxfer_len = sizeof(buf);
+
+       ret = ioctl(fd, SG_IO, &hdr);
+       if (ret < 0) {
+               close(fd);
                return ret;
        }
 
-       *bs = (buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | buf[7];
-       put_io_u(td, io_u);
+       *bs      = ((unsigned long) buf[4] << 24) | ((unsigned long) buf[5] << 16) |
+                  ((unsigned long) buf[6] << 8) | (unsigned long) buf[7];
+       *max_lba = ((unsigned long) buf[0] << 24) | ((unsigned long) buf[1] << 16) |
+                  ((unsigned long) buf[2] << 8) | (unsigned long) buf[3];
+
+       /*
+        * If max lba masked by MAX_10B_LBA equals MAX_10B_LBA,
+        * then need to retry with 16 byte Read Capacity command.
+        */
+       if (*max_lba == MAX_10B_LBA) {
+               hdr.cmd_len = 16;
+               hdr.cmdp[0] = 0x9e; // service action
+               hdr.cmdp[1] = 0x10; // Read Capacity(16)
+               hdr.cmdp[10] = (unsigned char) ((sizeof(buf) >> 24) & 0xff);
+               hdr.cmdp[11] = (unsigned char) ((sizeof(buf) >> 16) & 0xff);
+               hdr.cmdp[12] = (unsigned char) ((sizeof(buf) >> 8) & 0xff);
+               hdr.cmdp[13] = (unsigned char) (sizeof(buf) & 0xff);
+
+               hdr.dxfer_direction = SG_DXFER_FROM_DEV;
+               hdr.dxferp = buf;
+               hdr.dxfer_len = sizeof(buf);
+
+               ret = ioctl(fd, SG_IO, &hdr);
+               if (ret < 0) {
+                       close(fd);
+                       return ret;
+               }
+
+               /* record if an io error occurred */
+               if (hdr.info & SG_INFO_CHECK)
+                       td_verror(td, EIO, "fio_sgio_read_capacity");
+
+               *bs = (buf[8] << 24) | (buf[9] << 16) | (buf[10] << 8) | buf[11];
+               *max_lba = ((unsigned long long)buf[0] << 56) |
+                               ((unsigned long long)buf[1] << 48) |
+                               ((unsigned long long)buf[2] << 40) |
+                               ((unsigned long long)buf[3] << 32) |
+                               ((unsigned long long)buf[4] << 24) |
+                               ((unsigned long long)buf[5] << 16) |
+                               ((unsigned long long)buf[6] << 8) |
+                               (unsigned long long)buf[7];
+       }
+
+       close(fd);
        return 0;
 }
 
 static void fio_sgio_cleanup(struct thread_data *td)
 {
-       if (td->io_ops->data) {
-               free(td->io_ops->data);
-               td->io_ops->data = NULL;
+       struct sgio_data *sd = td->io_ops_data;
+       int i;
+
+       if (sd) {
+               free(sd->events);
+               free(sd->cmds);
+               free(sd->fd_flags);
+               free(sd->pfds);
+               free(sd->sgbuf);
+               free(sd->trim_queue_map);
+
+               for (i = 0; i < td->o.iodepth; i++) {
+                       free(sd->trim_queues[i]->unmap_param);
+                       free(sd->trim_queues[i]->trim_io_us);
+                       free(sd->trim_queues[i]);
+               }
+
+               free(sd->trim_queues);
+               free(sd);
        }
 }
 
 static int fio_sgio_init(struct thread_data *td)
 {
-       struct fio_file *f = &td->files[0];
        struct sgio_data *sd;
-       unsigned int bs;
-       int ret;
+       struct sgio_trim *st;
+       int i;
+
+       sd = calloc(1, sizeof(*sd));
+       sd->cmds = calloc(td->o.iodepth, sizeof(struct sgio_cmd));
+       sd->sgbuf = calloc(td->o.iodepth, sizeof(struct sg_io_hdr));
+       sd->events = calloc(td->o.iodepth, sizeof(struct io_u *));
+       sd->pfds = calloc(td->o.nr_files, sizeof(struct pollfd));
+       sd->fd_flags = calloc(td->o.nr_files, sizeof(int));
+       sd->type_checked = 0;
+
+       sd->trim_queues = calloc(td->o.iodepth, sizeof(struct sgio_trim *));
+       sd->current_queue = -1;
+       sd->trim_queue_map = calloc(td->o.iodepth, sizeof(int));
+       for (i = 0; i < td->o.iodepth; i++) {
+               sd->trim_queues[i] = calloc(1, sizeof(struct sgio_trim));
+               st = sd->trim_queues[i];
+               st->unmap_param = calloc(td->o.iodepth + 1, sizeof(char[16]));
+               st->unmap_range_count = 0;
+               st->trim_io_us = calloc(td->o.iodepth, sizeof(struct io_u *));
+       }
+
+       td->io_ops_data = sd;
+
+       /*
+        * we want to do it, regardless of whether odirect is set or not
+        */
+       td->o.override_sync = 1;
+       return 0;
+}
 
-       sd = malloc(sizeof(*sd));
-       memset(sd, 0, sizeof(*sd));
-       sd->cmds = malloc(td->iodepth * sizeof(struct sgio_cmd));
-       memset(sd->cmds, 0, td->iodepth * sizeof(struct sgio_cmd));
-       sd->events = malloc(td->iodepth * sizeof(struct io_u *));
-       memset(sd->events, 0, td->iodepth * sizeof(struct io_u *));
-       td->io_ops->data = sd;
+static int fio_sgio_type_check(struct thread_data *td, struct fio_file *f)
+{
+       struct sgio_data *sd = td->io_ops_data;
+       unsigned int bs = 0;
+       unsigned long long max_lba = 0;
 
-       if (td->filetype == FIO_TYPE_BD) {
+       if (f->filetype == FIO_TYPE_BLOCK) {
                if (ioctl(f->fd, BLKSSZGET, &bs) < 0) {
-                       td_verror(td, errno);
-                       goto err;
+                       td_verror(td, errno, "ioctl");
+                       return 1;
                }
-       } else if (td->filetype == FIO_TYPE_CHAR) {
-               int version;
+       } else if (f->filetype == FIO_TYPE_CHAR) {
+               int version, ret;
 
                if (ioctl(f->fd, SG_GET_VERSION_NUM, &version) < 0) {
-                       td_verror(td, errno);
-                       goto err;
+                       td_verror(td, errno, "ioctl");
+                       return 1;
                }
 
-               ret = fio_sgio_get_bs(td, &bs);
-               if (ret)
-                       goto err;
+               ret = fio_sgio_read_capacity(td, &bs, &max_lba);
+               if (ret) {
+                       td_verror(td, td->error, "fio_sgio_read_capacity");
+                       log_err("ioengine sg unable to read capacity successfully\n");
+                       return 1;
+               }
        } else {
-               log_err("ioengine sgio only works on block devices\n");
-               goto err;
+               td_verror(td, EINVAL, "wrong file type");
+               log_err("ioengine sg only works on block or character devices\n");
+               return 1;
        }
 
        sd->bs = bs;
+       // Determine size of commands needed based on max_lba
+       if (max_lba >= MAX_10B_LBA) {
+               dprint(FD_IO, "sgio_type_check: using 16 byte read/write "
+                       "commands for lba above 0x%016llx/0x%016llx\n",
+                       MAX_10B_LBA, max_lba);
+       }
 
-       if (td->filetype == FIO_TYPE_BD)
-               td->io_ops->getevents = fio_sgio_ioctl_getevents;
-       else
-               td->io_ops->getevents = fio_sgio_getevents;
+       if (f->filetype == FIO_TYPE_BLOCK) {
+               td->io_ops->getevents = NULL;
+               td->io_ops->event = NULL;
+               td->io_ops->commit = NULL;
+               /*
+               ** Setting these functions to null may cause problems
+               ** with filename=/dev/sda:/dev/sg0 since we are only
+               ** considering a single file
+               */
+       }
+       sd->type_checked = 1;
+
+       return 0;
+}
+
+static int fio_sgio_open(struct thread_data *td, struct fio_file *f)
+{
+       struct sgio_data *sd = td->io_ops_data;
+       int ret;
+
+       ret = generic_open_file(td, f);
+       if (ret)
+               return ret;
+
+       if (sd && !sd->type_checked && fio_sgio_type_check(td, f)) {
+               ret = generic_close_file(td, f);
+               return 1;
+       }
+
+       return 0;
+}
+
+/*
+ * Build an error string with details about the driver, host or scsi
+ * error contained in the sg header Caller will use as necessary.
+ */
+static char *fio_sgio_errdetails(struct io_u *io_u)
+{
+       struct sg_io_hdr *hdr = &io_u->hdr;
+#define MAXERRDETAIL 1024
+#define MAXMSGCHUNK  128
+       char *msg, msgchunk[MAXMSGCHUNK];
+       int i;
+
+       msg = calloc(1, MAXERRDETAIL);
+       strcpy(msg, "");
 
        /*
-        * we want to do it, regardless of whether odirect is set or not
+        * can't seem to find sg_err.h, so I'll just echo the define values
+        * so others can search on internet to find clearer clues of meaning.
         */
-       td->override_sync = 1;
+       if (hdr->info & SG_INFO_CHECK) {
+               if (hdr->host_status) {
+                       snprintf(msgchunk, MAXMSGCHUNK, "SG Host Status: 0x%02x; ", hdr->host_status);
+                       strlcat(msg, msgchunk, MAXERRDETAIL);
+                       switch (hdr->host_status) {
+                       case 0x01:
+                               strlcat(msg, "SG_ERR_DID_NO_CONNECT", MAXERRDETAIL);
+                               break;
+                       case 0x02:
+                               strlcat(msg, "SG_ERR_DID_BUS_BUSY", MAXERRDETAIL);
+                               break;
+                       case 0x03:
+                               strlcat(msg, "SG_ERR_DID_TIME_OUT", MAXERRDETAIL);
+                               break;
+                       case 0x04:
+                               strlcat(msg, "SG_ERR_DID_BAD_TARGET", MAXERRDETAIL);
+                               break;
+                       case 0x05:
+                               strlcat(msg, "SG_ERR_DID_ABORT", MAXERRDETAIL);
+                               break;
+                       case 0x06:
+                               strlcat(msg, "SG_ERR_DID_PARITY", MAXERRDETAIL);
+                               break;
+                       case 0x07:
+                               strlcat(msg, "SG_ERR_DID_ERROR (internal error)", MAXERRDETAIL);
+                               break;
+                       case 0x08:
+                               strlcat(msg, "SG_ERR_DID_RESET", MAXERRDETAIL);
+                               break;
+                       case 0x09:
+                               strlcat(msg, "SG_ERR_DID_BAD_INTR (unexpected)", MAXERRDETAIL);
+                               break;
+                       case 0x0a:
+                               strlcat(msg, "SG_ERR_DID_PASSTHROUGH", MAXERRDETAIL);
+                               break;
+                       case 0x0b:
+                               strlcat(msg, "SG_ERR_DID_SOFT_ERROR (driver retry?)", MAXERRDETAIL);
+                               break;
+                       case 0x0c:
+                               strlcat(msg, "SG_ERR_DID_IMM_RETRY", MAXERRDETAIL);
+                               break;
+                       case 0x0d:
+                               strlcat(msg, "SG_ERR_DID_REQUEUE", MAXERRDETAIL);
+                               break;
+                       case 0x0e:
+                               strlcat(msg, "SG_ERR_DID_TRANSPORT_DISRUPTED", MAXERRDETAIL);
+                               break;
+                       case 0x0f:
+                               strlcat(msg, "SG_ERR_DID_TRANSPORT_FAILFAST", MAXERRDETAIL);
+                               break;
+                       case 0x10:
+                               strlcat(msg, "SG_ERR_DID_TARGET_FAILURE", MAXERRDETAIL);
+                               break;
+                       case 0x11:
+                               strlcat(msg, "SG_ERR_DID_NEXUS_FAILURE", MAXERRDETAIL);
+                               break;
+                       case 0x12:
+                               strlcat(msg, "SG_ERR_DID_ALLOC_FAILURE", MAXERRDETAIL);
+                               break;
+                       case 0x13:
+                               strlcat(msg, "SG_ERR_DID_MEDIUM_ERROR", MAXERRDETAIL);
+                               break;
+                       default:
+                               strlcat(msg, "Unknown", MAXERRDETAIL);
+                               break;
+                       }
+                       strlcat(msg, ". ", MAXERRDETAIL);
+               }
+               if (hdr->driver_status) {
+                       snprintf(msgchunk, MAXMSGCHUNK, "SG Driver Status: 0x%02x; ", hdr->driver_status);
+                       strlcat(msg, msgchunk, MAXERRDETAIL);
+                       switch (hdr->driver_status & 0x0F) {
+                       case 0x01:
+                               strlcat(msg, "SG_ERR_DRIVER_BUSY", MAXERRDETAIL);
+                               break;
+                       case 0x02:
+                               strlcat(msg, "SG_ERR_DRIVER_SOFT", MAXERRDETAIL);
+                               break;
+                       case 0x03:
+                               strlcat(msg, "SG_ERR_DRIVER_MEDIA", MAXERRDETAIL);
+                               break;
+                       case 0x04:
+                               strlcat(msg, "SG_ERR_DRIVER_ERROR", MAXERRDETAIL);
+                               break;
+                       case 0x05:
+                               strlcat(msg, "SG_ERR_DRIVER_INVALID", MAXERRDETAIL);
+                               break;
+                       case 0x06:
+                               strlcat(msg, "SG_ERR_DRIVER_TIMEOUT", MAXERRDETAIL);
+                               break;
+                       case 0x07:
+                               strlcat(msg, "SG_ERR_DRIVER_HARD", MAXERRDETAIL);
+                               break;
+                       case 0x08:
+                               strlcat(msg, "SG_ERR_DRIVER_SENSE", MAXERRDETAIL);
+                               break;
+                       default:
+                               strlcat(msg, "Unknown", MAXERRDETAIL);
+                               break;
+                       }
+                       strlcat(msg, "; ", MAXERRDETAIL);
+                       switch (hdr->driver_status & 0xF0) {
+                       case 0x10:
+                               strlcat(msg, "SG_ERR_SUGGEST_RETRY", MAXERRDETAIL);
+                               break;
+                       case 0x20:
+                               strlcat(msg, "SG_ERR_SUGGEST_ABORT", MAXERRDETAIL);
+                               break;
+                       case 0x30:
+                               strlcat(msg, "SG_ERR_SUGGEST_REMAP", MAXERRDETAIL);
+                               break;
+                       case 0x40:
+                               strlcat(msg, "SG_ERR_SUGGEST_DIE", MAXERRDETAIL);
+                               break;
+                       case 0x80:
+                               strlcat(msg, "SG_ERR_SUGGEST_SENSE", MAXERRDETAIL);
+                               break;
+                       }
+                       strlcat(msg, ". ", MAXERRDETAIL);
+               }
+               if (hdr->status) {
+                       snprintf(msgchunk, MAXMSGCHUNK, "SG SCSI Status: 0x%02x; ", hdr->status);
+                       strlcat(msg, msgchunk, MAXERRDETAIL);
+                       // SCSI 3 status codes
+                       switch (hdr->status) {
+                       case 0x02:
+                               strlcat(msg, "CHECK_CONDITION", MAXERRDETAIL);
+                               break;
+                       case 0x04:
+                               strlcat(msg, "CONDITION_MET", MAXERRDETAIL);
+                               break;
+                       case 0x08:
+                               strlcat(msg, "BUSY", MAXERRDETAIL);
+                               break;
+                       case 0x10:
+                               strlcat(msg, "INTERMEDIATE", MAXERRDETAIL);
+                               break;
+                       case 0x14:
+                               strlcat(msg, "INTERMEDIATE_CONDITION_MET", MAXERRDETAIL);
+                               break;
+                       case 0x18:
+                               strlcat(msg, "RESERVATION_CONFLICT", MAXERRDETAIL);
+                               break;
+                       case 0x22:
+                               strlcat(msg, "COMMAND_TERMINATED", MAXERRDETAIL);
+                               break;
+                       case 0x28:
+                               strlcat(msg, "TASK_SET_FULL", MAXERRDETAIL);
+                               break;
+                       case 0x30:
+                               strlcat(msg, "ACA_ACTIVE", MAXERRDETAIL);
+                               break;
+                       case 0x40:
+                               strlcat(msg, "TASK_ABORTED", MAXERRDETAIL);
+                               break;
+                       default:
+                               strlcat(msg, "Unknown", MAXERRDETAIL);
+                               break;
+                       }
+                       strlcat(msg, ". ", MAXERRDETAIL);
+               }
+               if (hdr->sb_len_wr) {
+                       snprintf(msgchunk, MAXMSGCHUNK, "Sense Data (%d bytes):", hdr->sb_len_wr);
+                       strlcat(msg, msgchunk, MAXERRDETAIL);
+                       for (i = 0; i < hdr->sb_len_wr; i++) {
+                               snprintf(msgchunk, MAXMSGCHUNK, " %02x", hdr->sbp[i]);
+                               strlcat(msg, msgchunk, MAXERRDETAIL);
+                       }
+                       strlcat(msg, ". ", MAXERRDETAIL);
+               }
+               if (hdr->resid != 0) {
+                       snprintf(msgchunk, MAXMSGCHUNK, "SG Driver: %d bytes out of %d not transferred. ", hdr->resid, hdr->dxfer_len);
+                       strlcat(msg, msgchunk, MAXERRDETAIL);
+               }
+               if (hdr->cmdp) {
+                       strlcat(msg, "cdb:", MAXERRDETAIL);
+                       for (i = 0; i < hdr->cmd_len; i++) {
+                               snprintf(msgchunk, MAXMSGCHUNK, " %02x", hdr->cmdp[i]);
+                               strlcat(msg, msgchunk, MAXERRDETAIL);
+                       }
+                       strlcat(msg, ". ", MAXERRDETAIL);
+                       if (io_u->ddir == DDIR_TRIM) {
+                               unsigned char *param_list = hdr->dxferp;
+                               strlcat(msg, "dxferp:", MAXERRDETAIL);
+                               for (i = 0; i < hdr->dxfer_len; i++) {
+                                       snprintf(msgchunk, MAXMSGCHUNK, " %02x", param_list[i]);
+                                       strlcat(msg, msgchunk, MAXERRDETAIL);
+                               }
+                               strlcat(msg, ". ", MAXERRDETAIL);
+                       }
+               }
+       }
+
+       if (!(hdr->info & SG_INFO_CHECK) && !strlen(msg))
+               strncpy(msg, "SG Driver did not report a Host, Driver or Device check",
+                       MAXERRDETAIL - 1);
+
+       return msg;
+}
+
+/*
+ * get max file size from read capacity.
+ */
+static int fio_sgio_get_file_size(struct thread_data *td, struct fio_file *f)
+{
+       /*
+        * get_file_size is being called even before sgio_init is
+        * called, so none of the sg_io structures are
+        * initialized in the thread_data yet.  So we need to do the
+        * ReadCapacity without any of those helpers.  One of the effects
+        * is that ReadCapacity may get called 4 times on each open:
+        * readcap(10) followed by readcap(16) if needed - just to get
+        * the file size after the init occurs - it will be called
+        * again when "type_check" is called during structure
+        * initialization I'm not sure how to prevent this little
+        * inefficiency.
+        */
+       unsigned int bs = 0;
+       unsigned long long max_lba = 0;
+       int ret;
+
+       if (fio_file_size_known(f))
+               return 0;
+
+       if (f->filetype != FIO_TYPE_BLOCK && f->filetype != FIO_TYPE_CHAR) {
+               td_verror(td, EINVAL, "wrong file type");
+               log_err("ioengine sg only works on block or character devices\n");
+               return 1;
+       }
+
+       ret = fio_sgio_read_capacity(td, &bs, &max_lba);
+       if (ret ) {
+               td_verror(td, td->error, "fio_sgio_read_capacity");
+               log_err("ioengine sg unable to successfully execute read capacity to get block size and maximum lba\n");
+               return 1;
+       }
+
+       f->real_file_size = (max_lba + 1) * bs;
+       fio_file_set_size_known(f);
        return 0;
-err:
-       free(sd->events);
-       free(sd->cmds);
-       free(sd);
-       td->io_ops->data = NULL;
-       return 1;
 }
 
+
 static struct ioengine_ops ioengine = {
        .name           = "sg",
        .version        = FIO_IOOPS_VERSION,
        .init           = fio_sgio_init,
        .prep           = fio_sgio_prep,
        .queue          = fio_sgio_queue,
+       .commit         = fio_sgio_commit,
        .getevents      = fio_sgio_getevents,
+       .errdetails     = fio_sgio_errdetails,
        .event          = fio_sgio_event,
        .cleanup        = fio_sgio_cleanup,
+       .open_file      = fio_sgio_open,
+       .close_file     = generic_close_file,
+       .get_file_size  = fio_sgio_get_file_size,
        .flags          = FIO_SYNCIO | FIO_RAWIO,
+       .options        = options,
+       .option_struct_size     = sizeof(struct sg_options)
 };
 
 #else /* FIO_HAVE_SGIO */
@@ -336,12 +1207,12 @@ static struct ioengine_ops ioengine = {
  */
 static int fio_sgio_init(struct thread_data fio_unused *td)
 {
-       fprintf(stderr, "fio: sgio not available\n");
+       log_err("fio: ioengine sg not available\n");
        return 1;
 }
 
 static struct ioengine_ops ioengine = {
-       .name           = "sgio",
+       .name           = "sg",
        .version        = FIO_IOOPS_VERSION,
        .init           = fio_sgio_init,
 };