X-Git-Url: https://git.kernel.dk/?p=fio.git;a=blobdiff_plain;f=engines%2Fsg.c;h=33e38c2bd2592d616f0559c991ef36ae5c851594;hp=84c2afe6c354158f6edbbf98fb12f0e0de09d55c;hb=b4b9665e89e2f6a25ca8e2b99207d95d4d7b5154;hpb=da751ca9665bcdeca56d2eec5b629a0953c07662 diff --git a/engines/sg.c b/engines/sg.c index 84c2afe6..33e38c2b 100644 --- a/engines/sg.c +++ b/engines/sg.c @@ -3,24 +3,142 @@ * * IO engine that uses the Linux SG v3 interface to talk to SCSI devices * + * This ioengine can operate in two modes: + * sync with block devices (/dev/sdX) or + * with character devices (/dev/sgY) with direct=1 or sync=1 + * async with character devices with direct=0 and sync=0 + * + * What value does queue() return for the different cases? + * queue() return value + * In sync mode: + * /dev/sdX RWT FIO_Q_COMPLETED + * /dev/sgY RWT FIO_Q_COMPLETED + * with direct=1 or sync=1 + * + * In async mode: + * /dev/sgY RWT FIO_Q_QUEUED + * direct=0 and sync=0 + * + * Because FIO_SYNCIO is set for this ioengine td_io_queue() will fill in + * issue_time *before* each IO is sent to queue() + * + * Where are the IO counting functions called for the different cases? + * + * In sync mode: + * /dev/sdX (commit==NULL) + * RWT + * io_u_mark_depth() called in td_io_queue() + * io_u_mark_submit/complete() called in td_io_queue() + * issue_time set in td_io_queue() + * + * /dev/sgY with direct=1 or sync=1 (commit does nothing) + * RWT + * io_u_mark_depth() called in td_io_queue() + * io_u_mark_submit/complete() called in queue() + * issue_time set in td_io_queue() + * + * In async mode: + * /dev/sgY with direct=0 and sync=0 + * RW: read and write operations are submitted in queue() + * io_u_mark_depth() called in td_io_commit() + * io_u_mark_submit() called in queue() + * issue_time set in td_io_queue() + * T: trim operations are queued in queue() and submitted in commit() + * io_u_mark_depth() called in td_io_commit() + * io_u_mark_submit() called in commit() + * issue_time set in commit() + * */ #include #include #include #include -#include -#include +#include #include "../fio.h" -#include "../os.h" +#include "../optgroup.h" #ifdef FIO_HAVE_SGIO +enum { + FIO_SG_WRITE = 1, + FIO_SG_WRITE_VERIFY = 2, + FIO_SG_WRITE_SAME = 3 +}; + +struct sg_options { + void *pad; + unsigned int readfua; + unsigned int writefua; + unsigned int write_mode; +}; + +static struct fio_option options[] = { + { + .name = "readfua", + .lname = "sg engine read fua flag support", + .type = FIO_OPT_BOOL, + .off1 = offsetof(struct sg_options, readfua), + .help = "Set FUA flag (force unit access) for all Read operations", + .def = "0", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_SG, + }, + { + .name = "writefua", + .lname = "sg engine write fua flag support", + .type = FIO_OPT_BOOL, + .off1 = offsetof(struct sg_options, writefua), + .help = "Set FUA flag (force unit access) for all Write operations", + .def = "0", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_SG, + }, + { + .name = "sg_write_mode", + .lname = "specify sg write mode", + .type = FIO_OPT_STR, + .off1 = offsetof(struct sg_options, write_mode), + .help = "Specify SCSI WRITE mode", + .def = "write", + .posval = { + { .ival = "write", + .oval = FIO_SG_WRITE, + .help = "Issue standard SCSI WRITE commands", + }, + { .ival = "verify", + .oval = FIO_SG_WRITE_VERIFY, + .help = "Issue SCSI WRITE AND VERIFY commands", + }, + { .ival = "same", + .oval = FIO_SG_WRITE_SAME, + .help = "Issue SCSI WRITE SAME commands", + }, + }, + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_SG, + }, + { + .name = NULL, + }, +}; + +#define MAX_10B_LBA 0xFFFFFFFFULL +#define SCSI_TIMEOUT_MS 30000 // 30 second timeout; currently no method to override +#define MAX_SB 64 // sense block maximum return size + struct sgio_cmd { - unsigned char cdb[10]; + unsigned char cdb[16]; // enhanced from 10 to support 16 byte commands + unsigned char sb[MAX_SB]; // add sense block to commands int nr; }; +struct sgio_trim { + char *unmap_param; + unsigned int unmap_range_count; + struct io_u **trim_io_us; +}; + struct sgio_data { struct sgio_cmd *cmds; struct io_u **events; @@ -29,8 +147,16 @@ struct sgio_data { void *sgbuf; unsigned int bs; int type_checked; + struct sgio_trim **trim_queues; + int current_queue; + unsigned int *trim_queue_map; }; +static inline bool sgio_unbuffered(struct thread_data *td) +{ + return (td->o.odirect || td->o.sync_io); +} + static void sgio_hdr_init(struct sgio_data *sd, struct sg_io_hdr *hdr, struct io_u *io_u, int fs) { @@ -42,8 +168,11 @@ static void sgio_hdr_init(struct sgio_data *sd, struct sg_io_hdr *hdr, hdr->interface_id = 'S'; hdr->cmdp = sc->cdb; hdr->cmd_len = sizeof(sc->cdb); + hdr->sbp = sc->sb; + hdr->mx_sb_len = sizeof(sc->sb); hdr->pack_id = io_u->index; hdr->usr_ptr = io_u; + hdr->timeout = SCSI_TIMEOUT_MS; if (fs) { hdr->dxferp = io_u->xfer_buf; @@ -62,18 +191,45 @@ static int pollin_events(struct pollfd *pfds, int fds) return 0; } -static int fio_sgio_getevents(struct thread_data *td, int min, int max, - struct timespec fio_unused *t) +static int sg_fd_read(int fd, void *data, size_t size) { - /* - * normally hard coding &td->files[0] is a bug that needs to be fixed, - * but it's ok here as all files should point to the same device. - */ - struct fio_file *f = &td->files[0]; - struct sgio_data *sd = td->io_ops->data; - int left = max, ret, r = 0; + int err = 0; + + while (size) { + ssize_t ret; + + ret = read(fd, data, size); + if (ret < 0) { + if (errno == EAGAIN || errno == EINTR) + continue; + err = errno; + break; + } else if (!ret) + break; + else { + data += ret; + size -= ret; + } + } + + if (err) + return err; + if (size) + return EAGAIN; + + return 0; +} + +static int fio_sgio_getevents(struct thread_data *td, unsigned int min, + unsigned int max, + const struct timespec fio_unused *t) +{ + struct sgio_data *sd = td->io_ops_data; + int left = max, eventNum, ret, r = 0, trims = 0; void *buf = sd->sgbuf; - unsigned int i, events; + unsigned int i, j, events; + struct fio_file *f; + struct io_u *io_u; /* * Fill in the file descriptors @@ -82,22 +238,35 @@ static int fio_sgio_getevents(struct thread_data *td, int min, int max, /* * don't block for min events == 0 */ - if (!min) { - sd->fd_flags[i] = fcntl(f->fd, F_GETFL); - fcntl(f->fd, F_SETFL, sd->fd_flags[i] | O_NONBLOCK); - } + if (!min) + sd->fd_flags[i] = fio_set_fd_nonblocking(f->fd, "sg"); + else + sd->fd_flags[i] = -1; + sd->pfds[i].fd = f->fd; sd->pfds[i].events = POLLIN; } - while (left) { - void *p; + /* + ** There are two counters here: + ** - number of SCSI commands completed + ** - number of io_us completed + ** + ** These are the same with reads and writes, but + ** could differ with trim/unmap commands because + ** a single unmap can include multiple io_us + */ + + while (left > 0) { + char *p; + + dprint(FD_IO, "sgio_getevents: sd %p: min=%d, max=%d, left=%d\n", sd, min, max, left); do { if (!min) break; - ret = poll(sd->pfds, td->nr_files, -1); + ret = poll(sd->pfds, td->o.nr_files, -1); if (ret < 0) { if (!r) r = -errno; @@ -106,7 +275,7 @@ static int fio_sgio_getevents(struct thread_data *td, int min, int max, } else if (!ret) continue; - if (pollin_events(sd->pfds, td->nr_files)) + if (pollin_events(sd->pfds, td->o.nr_files)) break; } while (1); @@ -117,20 +286,27 @@ re_read: p = buf; events = 0; for_each_file(td, f, i) { - ret = read(f->fd, p, left * sizeof(struct sg_io_hdr)); - if (ret < 0) { - if (errno == EAGAIN) - continue; - r = -errno; - td_verror(td, errno, "read"); - break; - } else if (ret) { - p += ret; - events += ret / sizeof(struct sg_io_hdr); + for (eventNum = 0; eventNum < left; eventNum++) { + ret = sg_fd_read(f->fd, p, sizeof(struct sg_io_hdr)); + dprint(FD_IO, "sgio_getevents: sg_fd_read ret: %d\n", ret); + if (ret) { + r = -ret; + td_verror(td, r, "sg_read"); + break; + } + io_u = ((struct sg_io_hdr *)p)->usr_ptr; + if (io_u->ddir == DDIR_TRIM) { + events += sd->trim_queues[io_u->index]->unmap_range_count; + eventNum += sd->trim_queues[io_u->index]->unmap_range_count - 1; + } else + events++; + + p += sizeof(struct sg_io_hdr); + dprint(FD_IO, "sgio_getevents: events: %d, eventNum: %d, left: %d\n", events, eventNum, left); } } - if (r < 0) + if (r < 0 && !events) break; if (!events) { usleep(1000); @@ -142,23 +318,54 @@ re_read: for (i = 0; i < events; i++) { struct sg_io_hdr *hdr = (struct sg_io_hdr *) buf + i; + sd->events[i + trims] = hdr->usr_ptr; + io_u = (struct io_u *)(hdr->usr_ptr); + + if (hdr->info & SG_INFO_CHECK) { + /* record if an io error occurred, ignore resid */ + memcpy(&io_u->hdr, hdr, sizeof(struct sg_io_hdr)); + sd->events[i + trims]->error = EIO; + } - sd->events[i] = hdr->usr_ptr; + if (io_u->ddir == DDIR_TRIM) { + struct sgio_trim *st = sd->trim_queues[io_u->index]; + assert(st->trim_io_us[0] == io_u); + dprint(FD_IO, "sgio_getevents: reaping %d io_us from trim queue %d\n", st->unmap_range_count, io_u->index); + dprint(FD_IO, "sgio_getevents: reaped io_u %d and stored in events[%d]\n", io_u->index, i+trims); + for (j = 1; j < st->unmap_range_count; j++) { + ++trims; + sd->events[i + trims] = st->trim_io_us[j]; + dprint(FD_IO, "sgio_getevents: reaped io_u %d and stored in events[%d]\n", st->trim_io_us[j]->index, i+trims); + if (hdr->info & SG_INFO_CHECK) { + /* record if an io error occurred, ignore resid */ + memcpy(&st->trim_io_us[j]->hdr, hdr, sizeof(struct sg_io_hdr)); + sd->events[i + trims]->error = EIO; + } + } + events -= st->unmap_range_count - 1; + st->unmap_range_count = 0; + } } } if (!min) { - for_each_file(td, f, i) - fcntl(f->fd, F_SETFL, sd->fd_flags[i]); + for_each_file(td, f, i) { + if (sd->fd_flags[i] == -1) + continue; + + if (fcntl(f->fd, F_SETFL, sd->fd_flags[i]) < 0) + log_err("fio: sg failed to restore fcntl flags: %s\n", strerror(errno)); + } } return r; } -static int fio_sgio_ioctl_doio(struct thread_data *td, - struct fio_file *f, struct io_u *io_u) +static enum fio_q_status fio_sgio_ioctl_doio(struct thread_data *td, + struct fio_file *f, + struct io_u *io_u) { - struct sgio_data *sd = td->io_ops->data; + struct sgio_data *sd = td->io_ops_data; struct sg_io_hdr *hdr = &io_u->hdr; int ret; @@ -166,94 +373,258 @@ static int fio_sgio_ioctl_doio(struct thread_data *td, ret = ioctl(f->fd, SG_IO, hdr); if (ret < 0) - return -errno; + return ret; + + /* record if an io error occurred */ + if (hdr->info & SG_INFO_CHECK) + io_u->error = EIO; return FIO_Q_COMPLETED; } -static int fio_sgio_rw_doio(struct fio_file *f, struct io_u *io_u, int sync) +static enum fio_q_status fio_sgio_rw_doio(struct fio_file *f, + struct io_u *io_u, int do_sync) { struct sg_io_hdr *hdr = &io_u->hdr; int ret; ret = write(f->fd, hdr, sizeof(*hdr)); if (ret < 0) - return errno; + return ret; - if (sync) { + if (do_sync) { ret = read(f->fd, hdr, sizeof(*hdr)); if (ret < 0) - return -errno; + return ret; + + /* record if an io error occurred */ + if (hdr->info & SG_INFO_CHECK) + io_u->error = EIO; + return FIO_Q_COMPLETED; } return FIO_Q_QUEUED; } -static int fio_sgio_doio(struct thread_data *td, struct io_u *io_u, int sync) +static enum fio_q_status fio_sgio_doio(struct thread_data *td, + struct io_u *io_u, int do_sync) { struct fio_file *f = io_u->file; + enum fio_q_status ret; + + if (f->filetype == FIO_TYPE_BLOCK) { + ret = fio_sgio_ioctl_doio(td, f, io_u); + td_verror(td, io_u->error, __func__); + } else { + ret = fio_sgio_rw_doio(f, io_u, do_sync); + if (do_sync) + td_verror(td, io_u->error, __func__); + } + + return ret; +} - if (f->filetype == FIO_TYPE_BD) - return fio_sgio_ioctl_doio(td, f, io_u); +static void fio_sgio_rw_lba(struct sg_io_hdr *hdr, unsigned long long lba, + unsigned long long nr_blocks) +{ + if (lba < MAX_10B_LBA) { + hdr->cmdp[2] = (unsigned char) ((lba >> 24) & 0xff); + hdr->cmdp[3] = (unsigned char) ((lba >> 16) & 0xff); + hdr->cmdp[4] = (unsigned char) ((lba >> 8) & 0xff); + hdr->cmdp[5] = (unsigned char) (lba & 0xff); + hdr->cmdp[7] = (unsigned char) ((nr_blocks >> 8) & 0xff); + hdr->cmdp[8] = (unsigned char) (nr_blocks & 0xff); + } else { + hdr->cmdp[2] = (unsigned char) ((lba >> 56) & 0xff); + hdr->cmdp[3] = (unsigned char) ((lba >> 48) & 0xff); + hdr->cmdp[4] = (unsigned char) ((lba >> 40) & 0xff); + hdr->cmdp[5] = (unsigned char) ((lba >> 32) & 0xff); + hdr->cmdp[6] = (unsigned char) ((lba >> 24) & 0xff); + hdr->cmdp[7] = (unsigned char) ((lba >> 16) & 0xff); + hdr->cmdp[8] = (unsigned char) ((lba >> 8) & 0xff); + hdr->cmdp[9] = (unsigned char) (lba & 0xff); + hdr->cmdp[10] = (unsigned char) ((nr_blocks >> 32) & 0xff); + hdr->cmdp[11] = (unsigned char) ((nr_blocks >> 16) & 0xff); + hdr->cmdp[12] = (unsigned char) ((nr_blocks >> 8) & 0xff); + hdr->cmdp[13] = (unsigned char) (nr_blocks & 0xff); + } - return fio_sgio_rw_doio(f, io_u, sync); + return; } static int fio_sgio_prep(struct thread_data *td, struct io_u *io_u) { struct sg_io_hdr *hdr = &io_u->hdr; - struct sgio_data *sd = td->io_ops->data; - int nr_blocks, lba; + struct sg_options *o = td->eo; + struct sgio_data *sd = td->io_ops_data; + unsigned long long nr_blocks, lba; + int offset; if (io_u->xfer_buflen & (sd->bs - 1)) { log_err("read/write not sector aligned\n"); return EINVAL; } + nr_blocks = io_u->xfer_buflen / sd->bs; + lba = io_u->offset / sd->bs; + if (io_u->ddir == DDIR_READ) { sgio_hdr_init(sd, hdr, io_u, 1); hdr->dxfer_direction = SG_DXFER_FROM_DEV; - hdr->cmdp[0] = 0x28; + if (lba < MAX_10B_LBA) + hdr->cmdp[0] = 0x28; // read(10) + else + hdr->cmdp[0] = 0x88; // read(16) + + if (o->readfua) + hdr->cmdp[1] |= 0x08; + + fio_sgio_rw_lba(hdr, lba, nr_blocks); + } else if (io_u->ddir == DDIR_WRITE) { sgio_hdr_init(sd, hdr, io_u, 1); hdr->dxfer_direction = SG_DXFER_TO_DEV; - hdr->cmdp[0] = 0x2a; - } else { - sgio_hdr_init(sd, hdr, io_u, 0); + switch(o->write_mode) { + case FIO_SG_WRITE: + if (lba < MAX_10B_LBA) + hdr->cmdp[0] = 0x2a; // write(10) + else + hdr->cmdp[0] = 0x8a; // write(16) + if (o->writefua) + hdr->cmdp[1] |= 0x08; + break; + case FIO_SG_WRITE_VERIFY: + if (lba < MAX_10B_LBA) + hdr->cmdp[0] = 0x2e; // write and verify(10) + else + hdr->cmdp[0] = 0x8e; // write and verify(16) + break; + // BYTCHK is disabled by virtue of the memset in sgio_hdr_init + case FIO_SG_WRITE_SAME: + hdr->dxfer_len = sd->bs; + if (lba < MAX_10B_LBA) + hdr->cmdp[0] = 0x41; // write same(10) + else + hdr->cmdp[0] = 0x93; // write same(16) + break; + }; - hdr->dxfer_direction = SG_DXFER_NONE; - hdr->cmdp[0] = 0x35; - } + fio_sgio_rw_lba(hdr, lba, nr_blocks); - if (hdr->dxfer_direction != SG_DXFER_NONE) { - nr_blocks = io_u->xfer_buflen / sd->bs; - lba = io_u->offset / sd->bs; - hdr->cmdp[2] = (unsigned char) ((lba >> 24) & 0xff); - hdr->cmdp[3] = (unsigned char) ((lba >> 16) & 0xff); - hdr->cmdp[4] = (unsigned char) ((lba >> 8) & 0xff); - hdr->cmdp[5] = (unsigned char) (lba & 0xff); - hdr->cmdp[7] = (unsigned char) ((nr_blocks >> 8) & 0xff); - hdr->cmdp[8] = (unsigned char) (nr_blocks & 0xff); - } + } else if (io_u->ddir == DDIR_TRIM) { + struct sgio_trim *st; + + if (sd->current_queue == -1) { + sgio_hdr_init(sd, hdr, io_u, 0); + + hdr->cmd_len = 10; + hdr->dxfer_direction = SG_DXFER_TO_DEV; + hdr->cmdp[0] = 0x42; // unmap + sd->current_queue = io_u->index; + st = sd->trim_queues[sd->current_queue]; + hdr->dxferp = st->unmap_param; + assert(sd->trim_queues[io_u->index]->unmap_range_count == 0); + dprint(FD_IO, "sg: creating new queue based on io_u %d\n", io_u->index); + } + else + st = sd->trim_queues[sd->current_queue]; + + dprint(FD_IO, "sg: adding io_u %d to trim queue %d\n", io_u->index, sd->current_queue); + st->trim_io_us[st->unmap_range_count] = io_u; + sd->trim_queue_map[io_u->index] = sd->current_queue; + + offset = 8 + 16 * st->unmap_range_count; + st->unmap_param[offset] = (unsigned char) ((lba >> 56) & 0xff); + st->unmap_param[offset+1] = (unsigned char) ((lba >> 48) & 0xff); + st->unmap_param[offset+2] = (unsigned char) ((lba >> 40) & 0xff); + st->unmap_param[offset+3] = (unsigned char) ((lba >> 32) & 0xff); + st->unmap_param[offset+4] = (unsigned char) ((lba >> 24) & 0xff); + st->unmap_param[offset+5] = (unsigned char) ((lba >> 16) & 0xff); + st->unmap_param[offset+6] = (unsigned char) ((lba >> 8) & 0xff); + st->unmap_param[offset+7] = (unsigned char) (lba & 0xff); + st->unmap_param[offset+8] = (unsigned char) ((nr_blocks >> 32) & 0xff); + st->unmap_param[offset+9] = (unsigned char) ((nr_blocks >> 16) & 0xff); + st->unmap_param[offset+10] = (unsigned char) ((nr_blocks >> 8) & 0xff); + st->unmap_param[offset+11] = (unsigned char) (nr_blocks & 0xff); + + st->unmap_range_count++; + + } else if (ddir_sync(io_u->ddir)) { + sgio_hdr_init(sd, hdr, io_u, 0); + hdr->dxfer_direction = SG_DXFER_NONE; + if (lba < MAX_10B_LBA) + hdr->cmdp[0] = 0x35; // synccache(10) + else + hdr->cmdp[0] = 0x91; // synccache(16) + } else + assert(0); return 0; } -static int fio_sgio_queue(struct thread_data *td, struct io_u *io_u) +static void fio_sgio_unmap_setup(struct sg_io_hdr *hdr, struct sgio_trim *st) +{ + hdr->dxfer_len = st->unmap_range_count * 16 + 8; + hdr->cmdp[7] = (unsigned char) (((st->unmap_range_count * 16 + 8) >> 8) & 0xff); + hdr->cmdp[8] = (unsigned char) ((st->unmap_range_count * 16 + 8) & 0xff); + + st->unmap_param[0] = (unsigned char) (((16 * st->unmap_range_count + 6) >> 8) & 0xff); + st->unmap_param[1] = (unsigned char) ((16 * st->unmap_range_count + 6) & 0xff); + st->unmap_param[2] = (unsigned char) (((16 * st->unmap_range_count) >> 8) & 0xff); + st->unmap_param[3] = (unsigned char) ((16 * st->unmap_range_count) & 0xff); + + return; +} + +static enum fio_q_status fio_sgio_queue(struct thread_data *td, + struct io_u *io_u) { struct sg_io_hdr *hdr = &io_u->hdr; - int ret; + struct sgio_data *sd = td->io_ops_data; + int ret, do_sync = 0; + + fio_ro_check(td, io_u); - ret = fio_sgio_doio(td, io_u, io_u->ddir == DDIR_SYNC); + if (sgio_unbuffered(td) || ddir_sync(io_u->ddir)) + do_sync = 1; + + if (io_u->ddir == DDIR_TRIM) { + if (do_sync || io_u->file->filetype == FIO_TYPE_BLOCK) { + struct sgio_trim *st = sd->trim_queues[sd->current_queue]; + + /* finish cdb setup for unmap because we are + ** doing unmap commands synchronously */ + assert(st->unmap_range_count == 1); + assert(io_u == st->trim_io_us[0]); + hdr = &io_u->hdr; + + fio_sgio_unmap_setup(hdr, st); + + st->unmap_range_count = 0; + sd->current_queue = -1; + } else + /* queue up trim ranges and submit in commit() */ + return FIO_Q_QUEUED; + } + + ret = fio_sgio_doio(td, io_u, do_sync); if (ret < 0) io_u->error = errno; else if (hdr->status) { io_u->resid = hdr->resid; io_u->error = EIO; + } else if (td->io_ops->commit != NULL) { + if (do_sync && !ddir_sync(io_u->ddir)) { + io_u_mark_submit(td, 1); + io_u_mark_complete(td, 1); + } else if (io_u->ddir == DDIR_READ || io_u->ddir == DDIR_WRITE) { + io_u_mark_submit(td, 1); + io_u_queued(td, io_u); + } } if (io_u->error) { @@ -264,48 +635,163 @@ static int fio_sgio_queue(struct thread_data *td, struct io_u *io_u) return ret; } +static int fio_sgio_commit(struct thread_data *td) +{ + struct sgio_data *sd = td->io_ops_data; + struct sgio_trim *st; + struct io_u *io_u; + struct sg_io_hdr *hdr; + struct timespec now; + unsigned int i; + int ret; + + if (sd->current_queue == -1) + return 0; + + st = sd->trim_queues[sd->current_queue]; + io_u = st->trim_io_us[0]; + hdr = &io_u->hdr; + + fio_sgio_unmap_setup(hdr, st); + + sd->current_queue = -1; + + ret = fio_sgio_rw_doio(io_u->file, io_u, 0); + + if (ret < 0) + for (i = 0; i < st->unmap_range_count; i++) + st->trim_io_us[i]->error = errno; + else if (hdr->status) + for (i = 0; i < st->unmap_range_count; i++) { + st->trim_io_us[i]->resid = hdr->resid; + st->trim_io_us[i]->error = EIO; + } + else { + if (fio_fill_issue_time(td)) { + fio_gettime(&now, NULL); + for (i = 0; i < st->unmap_range_count; i++) { + struct io_u *io_u = st->trim_io_us[i]; + + memcpy(&io_u->issue_time, &now, sizeof(now)); + io_u_queued(td, io_u); + } + } + io_u_mark_submit(td, st->unmap_range_count); + } + + if (io_u->error) { + td_verror(td, io_u->error, "xfer"); + return 0; + } + + if (ret == FIO_Q_QUEUED) + return 0; + else + return ret; +} + static struct io_u *fio_sgio_event(struct thread_data *td, int event) { - struct sgio_data *sd = td->io_ops->data; + struct sgio_data *sd = td->io_ops_data; return sd->events[event]; } -static int fio_sgio_get_bs(struct thread_data *td, unsigned int *bs) +static int fio_sgio_read_capacity(struct thread_data *td, unsigned int *bs, + unsigned long long *max_lba) { - struct sgio_data *sd = td->io_ops->data; - struct io_u *io_u; - struct sg_io_hdr *hdr; - unsigned char buf[8]; + /* + * need to do read capacity operation w/o benefit of sd or + * io_u structures, which are not initialized until later. + */ + struct sg_io_hdr hdr; + unsigned char cmd[16]; + unsigned char sb[64]; + unsigned char buf[32]; // read capacity return int ret; + int fd = -1; - io_u = __get_io_u(td); - io_u->file = &td->files[0]; - assert(io_u); + struct fio_file *f = td->files[0]; - hdr = &io_u->hdr; - sgio_hdr_init(sd, hdr, io_u, 0); - memset(buf, 0, sizeof(buf)); + /* open file independent of rest of application */ + fd = open(f->file_name, O_RDONLY); + if (fd < 0) + return -errno; - hdr->cmdp[0] = 0x25; - hdr->dxfer_direction = SG_DXFER_FROM_DEV; - hdr->dxferp = buf; - hdr->dxfer_len = sizeof(buf); + memset(&hdr, 0, sizeof(hdr)); + memset(cmd, 0, sizeof(cmd)); + memset(sb, 0, sizeof(sb)); + memset(buf, 0, sizeof(buf)); - ret = fio_sgio_doio(td, io_u, 1); - if (ret) { - put_io_u(td, io_u); + /* First let's try a 10 byte read capacity. */ + hdr.interface_id = 'S'; + hdr.cmdp = cmd; + hdr.cmd_len = 10; + hdr.sbp = sb; + hdr.mx_sb_len = sizeof(sb); + hdr.timeout = SCSI_TIMEOUT_MS; + hdr.cmdp[0] = 0x25; // Read Capacity(10) + hdr.dxfer_direction = SG_DXFER_FROM_DEV; + hdr.dxferp = buf; + hdr.dxfer_len = sizeof(buf); + + ret = ioctl(fd, SG_IO, &hdr); + if (ret < 0) { + close(fd); return ret; } - *bs = (buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | buf[7]; - put_io_u(td, io_u); + *bs = ((unsigned long) buf[4] << 24) | ((unsigned long) buf[5] << 16) | + ((unsigned long) buf[6] << 8) | (unsigned long) buf[7]; + *max_lba = ((unsigned long) buf[0] << 24) | ((unsigned long) buf[1] << 16) | + ((unsigned long) buf[2] << 8) | (unsigned long) buf[3]; + + /* + * If max lba masked by MAX_10B_LBA equals MAX_10B_LBA, + * then need to retry with 16 byte Read Capacity command. + */ + if (*max_lba == MAX_10B_LBA) { + hdr.cmd_len = 16; + hdr.cmdp[0] = 0x9e; // service action + hdr.cmdp[1] = 0x10; // Read Capacity(16) + hdr.cmdp[10] = (unsigned char) ((sizeof(buf) >> 24) & 0xff); + hdr.cmdp[11] = (unsigned char) ((sizeof(buf) >> 16) & 0xff); + hdr.cmdp[12] = (unsigned char) ((sizeof(buf) >> 8) & 0xff); + hdr.cmdp[13] = (unsigned char) (sizeof(buf) & 0xff); + + hdr.dxfer_direction = SG_DXFER_FROM_DEV; + hdr.dxferp = buf; + hdr.dxfer_len = sizeof(buf); + + ret = ioctl(fd, SG_IO, &hdr); + if (ret < 0) { + close(fd); + return ret; + } + + /* record if an io error occurred */ + if (hdr.info & SG_INFO_CHECK) + td_verror(td, EIO, "fio_sgio_read_capacity"); + + *bs = (buf[8] << 24) | (buf[9] << 16) | (buf[10] << 8) | buf[11]; + *max_lba = ((unsigned long long)buf[0] << 56) | + ((unsigned long long)buf[1] << 48) | + ((unsigned long long)buf[2] << 40) | + ((unsigned long long)buf[3] << 32) | + ((unsigned long long)buf[4] << 24) | + ((unsigned long long)buf[5] << 16) | + ((unsigned long long)buf[6] << 8) | + (unsigned long long)buf[7]; + } + + close(fd); return 0; } static void fio_sgio_cleanup(struct thread_data *td) { - struct sgio_data *sd = td->io_ops->data; + struct sgio_data *sd = td->io_ops_data; + int i; if (sd) { free(sd->events); @@ -313,44 +799,60 @@ static void fio_sgio_cleanup(struct thread_data *td) free(sd->fd_flags); free(sd->pfds); free(sd->sgbuf); - free(sd); + free(sd->trim_queue_map); + + for (i = 0; i < td->o.iodepth; i++) { + free(sd->trim_queues[i]->unmap_param); + free(sd->trim_queues[i]->trim_io_us); + free(sd->trim_queues[i]); + } - td->io_ops->data = NULL; + free(sd->trim_queues); + free(sd); } } static int fio_sgio_init(struct thread_data *td) { struct sgio_data *sd; + struct sgio_trim *st; + int i; + + sd = calloc(1, sizeof(*sd)); + sd->cmds = calloc(td->o.iodepth, sizeof(struct sgio_cmd)); + sd->sgbuf = calloc(td->o.iodepth, sizeof(struct sg_io_hdr)); + sd->events = calloc(td->o.iodepth, sizeof(struct io_u *)); + sd->pfds = calloc(td->o.nr_files, sizeof(struct pollfd)); + sd->fd_flags = calloc(td->o.nr_files, sizeof(int)); + sd->type_checked = 0; + + sd->trim_queues = calloc(td->o.iodepth, sizeof(struct sgio_trim *)); + sd->current_queue = -1; + sd->trim_queue_map = calloc(td->o.iodepth, sizeof(int)); + for (i = 0; i < td->o.iodepth; i++) { + sd->trim_queues[i] = calloc(1, sizeof(struct sgio_trim)); + st = sd->trim_queues[i]; + st->unmap_param = calloc(td->o.iodepth + 1, sizeof(char[16])); + st->unmap_range_count = 0; + st->trim_io_us = calloc(td->o.iodepth, sizeof(struct io_u *)); + } - sd = malloc(sizeof(*sd)); - memset(sd, 0, sizeof(*sd)); - sd->cmds = malloc(td->iodepth * sizeof(struct sgio_cmd)); - memset(sd->cmds, 0, td->iodepth * sizeof(struct sgio_cmd)); - sd->events = malloc(td->iodepth * sizeof(struct io_u *)); - memset(sd->events, 0, td->iodepth * sizeof(struct io_u *)); - sd->pfds = malloc(sizeof(struct pollfd) * td->nr_files); - memset(sd->pfds, 0, sizeof(struct pollfd) * td->nr_files); - sd->fd_flags = malloc(sizeof(int) * td->nr_files); - memset(sd->fd_flags, 0, sizeof(int) * td->nr_files); - sd->sgbuf = malloc(sizeof(struct sg_io_hdr) * td->iodepth); - memset(sd->sgbuf, 0, sizeof(struct sg_io_hdr) * td->iodepth); - - td->io_ops->data = sd; + td->io_ops_data = sd; /* * we want to do it, regardless of whether odirect is set or not */ - td->override_sync = 1; + td->o.override_sync = 1; return 0; } static int fio_sgio_type_check(struct thread_data *td, struct fio_file *f) { - struct sgio_data *sd = td->io_ops->data; - unsigned int bs; + struct sgio_data *sd = td->io_ops_data; + unsigned int bs = 0; + unsigned long long max_lba = 0; - if (f->filetype == FIO_TYPE_BD) { + if (f->filetype == FIO_TYPE_BLOCK) { if (ioctl(f->fd, BLKSSZGET, &bs) < 0) { td_verror(td, errno, "ioctl"); return 1; @@ -363,53 +865,320 @@ static int fio_sgio_type_check(struct thread_data *td, struct fio_file *f) return 1; } - ret = fio_sgio_get_bs(td, &bs); - if (ret) + ret = fio_sgio_read_capacity(td, &bs, &max_lba); + if (ret) { + td_verror(td, td->error, "fio_sgio_read_capacity"); + log_err("ioengine sg unable to read capacity successfully\n"); return 1; + } } else { - log_err("ioengine sgio only works on block devices\n"); + td_verror(td, EINVAL, "wrong file type"); + log_err("ioengine sg only works on block or character devices\n"); return 1; } sd->bs = bs; + // Determine size of commands needed based on max_lba + if (max_lba >= MAX_10B_LBA) { + dprint(FD_IO, "sgio_type_check: using 16 byte read/write " + "commands for lba above 0x%016llx/0x%016llx\n", + MAX_10B_LBA, max_lba); + } - if (f->filetype == FIO_TYPE_BD) { + if (f->filetype == FIO_TYPE_BLOCK) { td->io_ops->getevents = NULL; td->io_ops->event = NULL; + td->io_ops->commit = NULL; + /* + ** Setting these functions to null may cause problems + ** with filename=/dev/sda:/dev/sg0 since we are only + ** considering a single file + */ } + sd->type_checked = 1; return 0; } static int fio_sgio_open(struct thread_data *td, struct fio_file *f) { - struct sgio_data *sd = td->io_ops->data; + struct sgio_data *sd = td->io_ops_data; int ret; ret = generic_open_file(td, f); if (ret) return ret; - if (!sd->type_checked && fio_sgio_type_check(td, f)) { - generic_close_file(td, f); + if (sd && !sd->type_checked && fio_sgio_type_check(td, f)) { + ret = generic_close_file(td, f); return 1; } return 0; } +/* + * Build an error string with details about the driver, host or scsi + * error contained in the sg header Caller will use as necessary. + */ +static char *fio_sgio_errdetails(struct io_u *io_u) +{ + struct sg_io_hdr *hdr = &io_u->hdr; +#define MAXERRDETAIL 1024 +#define MAXMSGCHUNK 128 + char *msg, msgchunk[MAXMSGCHUNK]; + int i; + + msg = calloc(1, MAXERRDETAIL); + strcpy(msg, ""); + + /* + * can't seem to find sg_err.h, so I'll just echo the define values + * so others can search on internet to find clearer clues of meaning. + */ + if (hdr->info & SG_INFO_CHECK) { + if (hdr->host_status) { + snprintf(msgchunk, MAXMSGCHUNK, "SG Host Status: 0x%02x; ", hdr->host_status); + strlcat(msg, msgchunk, MAXERRDETAIL); + switch (hdr->host_status) { + case 0x01: + strlcat(msg, "SG_ERR_DID_NO_CONNECT", MAXERRDETAIL); + break; + case 0x02: + strlcat(msg, "SG_ERR_DID_BUS_BUSY", MAXERRDETAIL); + break; + case 0x03: + strlcat(msg, "SG_ERR_DID_TIME_OUT", MAXERRDETAIL); + break; + case 0x04: + strlcat(msg, "SG_ERR_DID_BAD_TARGET", MAXERRDETAIL); + break; + case 0x05: + strlcat(msg, "SG_ERR_DID_ABORT", MAXERRDETAIL); + break; + case 0x06: + strlcat(msg, "SG_ERR_DID_PARITY", MAXERRDETAIL); + break; + case 0x07: + strlcat(msg, "SG_ERR_DID_ERROR (internal error)", MAXERRDETAIL); + break; + case 0x08: + strlcat(msg, "SG_ERR_DID_RESET", MAXERRDETAIL); + break; + case 0x09: + strlcat(msg, "SG_ERR_DID_BAD_INTR (unexpected)", MAXERRDETAIL); + break; + case 0x0a: + strlcat(msg, "SG_ERR_DID_PASSTHROUGH", MAXERRDETAIL); + break; + case 0x0b: + strlcat(msg, "SG_ERR_DID_SOFT_ERROR (driver retry?)", MAXERRDETAIL); + break; + case 0x0c: + strlcat(msg, "SG_ERR_DID_IMM_RETRY", MAXERRDETAIL); + break; + case 0x0d: + strlcat(msg, "SG_ERR_DID_REQUEUE", MAXERRDETAIL); + break; + case 0x0e: + strlcat(msg, "SG_ERR_DID_TRANSPORT_DISRUPTED", MAXERRDETAIL); + break; + case 0x0f: + strlcat(msg, "SG_ERR_DID_TRANSPORT_FAILFAST", MAXERRDETAIL); + break; + case 0x10: + strlcat(msg, "SG_ERR_DID_TARGET_FAILURE", MAXERRDETAIL); + break; + case 0x11: + strlcat(msg, "SG_ERR_DID_NEXUS_FAILURE", MAXERRDETAIL); + break; + case 0x12: + strlcat(msg, "SG_ERR_DID_ALLOC_FAILURE", MAXERRDETAIL); + break; + case 0x13: + strlcat(msg, "SG_ERR_DID_MEDIUM_ERROR", MAXERRDETAIL); + break; + default: + strlcat(msg, "Unknown", MAXERRDETAIL); + break; + } + strlcat(msg, ". ", MAXERRDETAIL); + } + if (hdr->driver_status) { + snprintf(msgchunk, MAXMSGCHUNK, "SG Driver Status: 0x%02x; ", hdr->driver_status); + strlcat(msg, msgchunk, MAXERRDETAIL); + switch (hdr->driver_status & 0x0F) { + case 0x01: + strlcat(msg, "SG_ERR_DRIVER_BUSY", MAXERRDETAIL); + break; + case 0x02: + strlcat(msg, "SG_ERR_DRIVER_SOFT", MAXERRDETAIL); + break; + case 0x03: + strlcat(msg, "SG_ERR_DRIVER_MEDIA", MAXERRDETAIL); + break; + case 0x04: + strlcat(msg, "SG_ERR_DRIVER_ERROR", MAXERRDETAIL); + break; + case 0x05: + strlcat(msg, "SG_ERR_DRIVER_INVALID", MAXERRDETAIL); + break; + case 0x06: + strlcat(msg, "SG_ERR_DRIVER_TIMEOUT", MAXERRDETAIL); + break; + case 0x07: + strlcat(msg, "SG_ERR_DRIVER_HARD", MAXERRDETAIL); + break; + case 0x08: + strlcat(msg, "SG_ERR_DRIVER_SENSE", MAXERRDETAIL); + break; + default: + strlcat(msg, "Unknown", MAXERRDETAIL); + break; + } + strlcat(msg, "; ", MAXERRDETAIL); + switch (hdr->driver_status & 0xF0) { + case 0x10: + strlcat(msg, "SG_ERR_SUGGEST_RETRY", MAXERRDETAIL); + break; + case 0x20: + strlcat(msg, "SG_ERR_SUGGEST_ABORT", MAXERRDETAIL); + break; + case 0x30: + strlcat(msg, "SG_ERR_SUGGEST_REMAP", MAXERRDETAIL); + break; + case 0x40: + strlcat(msg, "SG_ERR_SUGGEST_DIE", MAXERRDETAIL); + break; + case 0x80: + strlcat(msg, "SG_ERR_SUGGEST_SENSE", MAXERRDETAIL); + break; + } + strlcat(msg, ". ", MAXERRDETAIL); + } + if (hdr->status) { + snprintf(msgchunk, MAXMSGCHUNK, "SG SCSI Status: 0x%02x; ", hdr->status); + strlcat(msg, msgchunk, MAXERRDETAIL); + // SCSI 3 status codes + switch (hdr->status) { + case 0x02: + strlcat(msg, "CHECK_CONDITION", MAXERRDETAIL); + break; + case 0x04: + strlcat(msg, "CONDITION_MET", MAXERRDETAIL); + break; + case 0x08: + strlcat(msg, "BUSY", MAXERRDETAIL); + break; + case 0x10: + strlcat(msg, "INTERMEDIATE", MAXERRDETAIL); + break; + case 0x14: + strlcat(msg, "INTERMEDIATE_CONDITION_MET", MAXERRDETAIL); + break; + case 0x18: + strlcat(msg, "RESERVATION_CONFLICT", MAXERRDETAIL); + break; + case 0x22: + strlcat(msg, "COMMAND_TERMINATED", MAXERRDETAIL); + break; + case 0x28: + strlcat(msg, "TASK_SET_FULL", MAXERRDETAIL); + break; + case 0x30: + strlcat(msg, "ACA_ACTIVE", MAXERRDETAIL); + break; + case 0x40: + strlcat(msg, "TASK_ABORTED", MAXERRDETAIL); + break; + default: + strlcat(msg, "Unknown", MAXERRDETAIL); + break; + } + strlcat(msg, ". ", MAXERRDETAIL); + } + if (hdr->sb_len_wr) { + snprintf(msgchunk, MAXMSGCHUNK, "Sense Data (%d bytes):", hdr->sb_len_wr); + strlcat(msg, msgchunk, MAXERRDETAIL); + for (i = 0; i < hdr->sb_len_wr; i++) { + snprintf(msgchunk, MAXMSGCHUNK, " %02x", hdr->sbp[i]); + strlcat(msg, msgchunk, MAXERRDETAIL); + } + strlcat(msg, ". ", MAXERRDETAIL); + } + if (hdr->resid != 0) { + snprintf(msgchunk, MAXMSGCHUNK, "SG Driver: %d bytes out of %d not transferred. ", hdr->resid, hdr->dxfer_len); + strlcat(msg, msgchunk, MAXERRDETAIL); + } + } + + if (!(hdr->info & SG_INFO_CHECK) && !strlen(msg)) + strncpy(msg, "SG Driver did not report a Host, Driver or Device check", + MAXERRDETAIL - 1); + + return msg; +} + +/* + * get max file size from read capacity. + */ +static int fio_sgio_get_file_size(struct thread_data *td, struct fio_file *f) +{ + /* + * get_file_size is being called even before sgio_init is + * called, so none of the sg_io structures are + * initialized in the thread_data yet. So we need to do the + * ReadCapacity without any of those helpers. One of the effects + * is that ReadCapacity may get called 4 times on each open: + * readcap(10) followed by readcap(16) if needed - just to get + * the file size after the init occurs - it will be called + * again when "type_check" is called during structure + * initialization I'm not sure how to prevent this little + * inefficiency. + */ + unsigned int bs = 0; + unsigned long long max_lba = 0; + int ret; + + if (fio_file_size_known(f)) + return 0; + + if (f->filetype != FIO_TYPE_BLOCK && f->filetype != FIO_TYPE_CHAR) { + td_verror(td, EINVAL, "wrong file type"); + log_err("ioengine sg only works on block or character devices\n"); + return 1; + } + + ret = fio_sgio_read_capacity(td, &bs, &max_lba); + if (ret ) { + td_verror(td, td->error, "fio_sgio_read_capacity"); + log_err("ioengine sg unable to successfully execute read capacity to get block size and maximum lba\n"); + return 1; + } + + f->real_file_size = (max_lba + 1) * bs; + fio_file_set_size_known(f); + return 0; +} + + static struct ioengine_ops ioengine = { .name = "sg", .version = FIO_IOOPS_VERSION, .init = fio_sgio_init, .prep = fio_sgio_prep, .queue = fio_sgio_queue, + .commit = fio_sgio_commit, .getevents = fio_sgio_getevents, + .errdetails = fio_sgio_errdetails, .event = fio_sgio_event, .cleanup = fio_sgio_cleanup, .open_file = fio_sgio_open, .close_file = generic_close_file, + .get_file_size = fio_sgio_get_file_size, .flags = FIO_SYNCIO | FIO_RAWIO, + .options = options, + .option_struct_size = sizeof(struct sg_options) }; #else /* FIO_HAVE_SGIO */ @@ -421,12 +1190,12 @@ static struct ioengine_ops ioengine = { */ static int fio_sgio_init(struct thread_data fio_unused *td) { - fprintf(stderr, "fio: sgio not available\n"); + log_err("fio: ioengine sg not available\n"); return 1; } static struct ioengine_ops ioengine = { - .name = "sgio", + .name = "sg", .version = FIO_IOOPS_VERSION, .init = fio_sgio_init, };