4 * IO engine that uses the Linux SG v3 interface to talk to SCSI devices
6 * This ioengine can operate in two modes:
7 * sync with block devices (/dev/sdX) or
8 * with character devices (/dev/sgY) with direct=1 or sync=1
9 * async with character devices with direct=0 and sync=0
11 * What value does queue() return for the different cases?
12 * queue() return value
14 * /dev/sdX RWT FIO_Q_COMPLETED
15 * /dev/sgY RWT FIO_Q_COMPLETED
16 * with direct=1 or sync=1
19 * /dev/sgY RWT FIO_Q_QUEUED
22 * Because FIO_SYNCIO is set for this ioengine td_io_queue() will fill in
23 * issue_time *before* each IO is sent to queue()
25 * Where are the IO counting functions called for the different cases?
28 * /dev/sdX (commit==NULL)
30 * io_u_mark_depth() called in td_io_queue()
31 * io_u_mark_submit/complete() called in td_io_queue()
32 * issue_time set in td_io_queue()
34 * /dev/sgY with direct=1 or sync=1 (commit does nothing)
36 * io_u_mark_depth() called in td_io_queue()
37 * io_u_mark_submit/complete() called in queue()
38 * issue_time set in td_io_queue()
41 * /dev/sgY with direct=0 and sync=0
42 * RW: read and write operations are submitted in queue()
43 * io_u_mark_depth() called in td_io_commit()
44 * io_u_mark_submit() called in queue()
45 * issue_time set in td_io_queue()
46 * T: trim operations are queued in queue() and submitted in commit()
47 * io_u_mark_depth() called in td_io_commit()
48 * io_u_mark_submit() called in commit()
49 * issue_time set in commit()
59 #include "../optgroup.h"
63 #ifndef SGV4_FLAG_HIPRI
64 #define SGV4_FLAG_HIPRI 0x800
71 FIO_SG_WRITE_SAME_NDOB,
73 FIO_SG_VERIFY_BYTCHK_00,
74 FIO_SG_VERIFY_BYTCHK_01,
75 FIO_SG_VERIFY_BYTCHK_11,
82 unsigned int writefua;
83 unsigned int write_mode;
87 static struct fio_option options[] = {
90 .lname = "High Priority",
91 .type = FIO_OPT_STR_SET,
92 .off1 = offsetof(struct sg_options, hipri),
93 .help = "Use polled IO completions",
94 .category = FIO_OPT_C_ENGINE,
95 .group = FIO_OPT_G_SG,
99 .lname = "sg engine read fua flag support",
100 .type = FIO_OPT_BOOL,
101 .off1 = offsetof(struct sg_options, readfua),
102 .help = "Set FUA flag (force unit access) for all Read operations",
104 .category = FIO_OPT_C_ENGINE,
105 .group = FIO_OPT_G_SG,
109 .lname = "sg engine write fua flag support",
110 .type = FIO_OPT_BOOL,
111 .off1 = offsetof(struct sg_options, writefua),
112 .help = "Set FUA flag (force unit access) for all Write operations",
114 .category = FIO_OPT_C_ENGINE,
115 .group = FIO_OPT_G_SG,
118 .name = "sg_write_mode",
119 .lname = "specify sg write mode",
121 .off1 = offsetof(struct sg_options, write_mode),
122 .help = "Specify SCSI WRITE mode",
126 .oval = FIO_SG_WRITE,
127 .help = "Issue standard SCSI WRITE commands",
129 { .ival = "write_and_verify",
130 .oval = FIO_SG_WRITE_VERIFY,
131 .help = "Issue SCSI WRITE AND VERIFY commands",
134 .oval = FIO_SG_WRITE_VERIFY,
135 .help = "Issue SCSI WRITE AND VERIFY commands. This "
136 "option is deprecated. Use write_and_verify instead.",
138 { .ival = "write_same",
139 .oval = FIO_SG_WRITE_SAME,
140 .help = "Issue SCSI WRITE SAME commands",
143 .oval = FIO_SG_WRITE_SAME,
144 .help = "Issue SCSI WRITE SAME commands. This "
145 "option is deprecated. Use write_same instead.",
147 { .ival = "write_same_ndob",
148 .oval = FIO_SG_WRITE_SAME_NDOB,
149 .help = "Issue SCSI WRITE SAME(16) commands with NDOB flag set",
151 { .ival = "verify_bytchk_00",
152 .oval = FIO_SG_VERIFY_BYTCHK_00,
153 .help = "Issue SCSI VERIFY commands with BYTCHK set to 00",
155 { .ival = "verify_bytchk_01",
156 .oval = FIO_SG_VERIFY_BYTCHK_01,
157 .help = "Issue SCSI VERIFY commands with BYTCHK set to 01",
159 { .ival = "verify_bytchk_11",
160 .oval = FIO_SG_VERIFY_BYTCHK_11,
161 .help = "Issue SCSI VERIFY commands with BYTCHK set to 11",
163 { .ival = "write_stream",
164 .oval = FIO_SG_WRITE_STREAM,
165 .help = "Issue SCSI WRITE STREAM(16) commands",
168 .category = FIO_OPT_C_ENGINE,
169 .group = FIO_OPT_G_SG,
173 .lname = "stream id for WRITE STREAM(16) commands",
175 .off1 = offsetof(struct sg_options, stream_id),
176 .help = "Stream ID for WRITE STREAM(16) commands",
178 .category = FIO_OPT_C_ENGINE,
179 .group = FIO_OPT_G_SG,
186 #define MAX_10B_LBA 0xFFFFFFFFULL
187 #define SCSI_TIMEOUT_MS 30000 // 30 second timeout; currently no method to override
188 #define MAX_SB 64 // sense block maximum return size
190 #define FIO_SGIO_DEBUG
194 unsigned char cdb[16]; // enhanced from 10 to support 16 byte commands
195 unsigned char sb[MAX_SB]; // add sense block to commands
200 uint8_t *unmap_param;
201 unsigned int unmap_range_count;
202 struct io_u **trim_io_us;
206 struct sgio_cmd *cmds;
207 struct io_u **events;
213 struct sgio_trim **trim_queues;
215 #ifdef FIO_SGIO_DEBUG
216 unsigned int *trim_queue_map;
220 static inline uint32_t sgio_get_be32(uint8_t *buf)
222 return be32_to_cpu(*((uint32_t *) buf));
225 static inline uint64_t sgio_get_be64(uint8_t *buf)
227 return be64_to_cpu(*((uint64_t *) buf));
230 static inline void sgio_set_be16(uint16_t val, uint8_t *buf)
232 uint16_t t = cpu_to_be16(val);
234 memcpy(buf, &t, sizeof(uint16_t));
237 static inline void sgio_set_be32(uint32_t val, uint8_t *buf)
239 uint32_t t = cpu_to_be32(val);
241 memcpy(buf, &t, sizeof(uint32_t));
244 static inline void sgio_set_be64(uint64_t val, uint8_t *buf)
246 uint64_t t = cpu_to_be64(val);
248 memcpy(buf, &t, sizeof(uint64_t));
251 static inline bool sgio_unbuffered(struct thread_data *td)
253 return (td->o.odirect || td->o.sync_io);
256 static void sgio_hdr_init(struct sgio_data *sd, struct sg_io_hdr *hdr,
257 struct io_u *io_u, int fs)
259 struct sgio_cmd *sc = &sd->cmds[io_u->index];
261 memset(hdr, 0, sizeof(*hdr));
262 memset(sc->cdb, 0, sizeof(sc->cdb));
264 hdr->interface_id = 'S';
266 hdr->cmd_len = sizeof(sc->cdb);
268 hdr->mx_sb_len = sizeof(sc->sb);
269 hdr->pack_id = io_u->index;
271 hdr->timeout = SCSI_TIMEOUT_MS;
274 hdr->dxferp = io_u->xfer_buf;
275 hdr->dxfer_len = io_u->xfer_buflen;
279 static int pollin_events(struct pollfd *pfds, int fds)
283 for (i = 0; i < fds; i++)
284 if (pfds[i].revents & POLLIN)
290 static int sg_fd_read(int fd, void *data, size_t size)
297 ret = read(fd, data, size);
299 if (errno == EAGAIN || errno == EINTR)
319 static int fio_sgio_getevents(struct thread_data *td, unsigned int min,
321 const struct timespec fio_unused *t)
323 struct sgio_data *sd = td->io_ops_data;
324 int left = max, eventNum, ret, r = 0, trims = 0;
325 void *buf = sd->sgbuf;
326 unsigned int i, j, events;
331 * Fill in the file descriptors
333 for_each_file(td, f, i) {
335 * don't block for min events == 0
338 sd->fd_flags[i] = fio_set_fd_nonblocking(f->fd, "sg");
340 sd->fd_flags[i] = -1;
342 sd->pfds[i].fd = f->fd;
343 sd->pfds[i].events = POLLIN;
347 ** There are two counters here:
348 ** - number of SCSI commands completed
349 ** - number of io_us completed
351 ** These are the same with reads and writes, but
352 ** could differ with trim/unmap commands because
353 ** a single unmap can include multiple io_us
359 dprint(FD_IO, "sgio_getevents: sd %p: min=%d, max=%d, left=%d\n", sd, min, max, left);
365 ret = poll(sd->pfds, td->o.nr_files, -1);
369 td_verror(td, errno, "poll");
374 if (pollin_events(sd->pfds, td->o.nr_files))
384 for_each_file(td, f, i) {
385 for (eventNum = 0; eventNum < left; eventNum++) {
386 ret = sg_fd_read(f->fd, p, sizeof(struct sg_io_hdr));
387 dprint(FD_IO, "sgio_getevents: sg_fd_read ret: %d\n", ret);
390 td_verror(td, r, "sg_read");
393 io_u = ((struct sg_io_hdr *)p)->usr_ptr;
394 if (io_u->ddir == DDIR_TRIM) {
395 events += sd->trim_queues[io_u->index]->unmap_range_count;
396 eventNum += sd->trim_queues[io_u->index]->unmap_range_count - 1;
400 p += sizeof(struct sg_io_hdr);
401 dprint(FD_IO, "sgio_getevents: events: %d, eventNum: %d, left: %d\n", events, eventNum, left);
405 if (r < 0 && !events)
415 for (i = 0; i < events; i++) {
416 struct sg_io_hdr *hdr = (struct sg_io_hdr *) buf + i;
417 sd->events[i + trims] = hdr->usr_ptr;
418 io_u = (struct io_u *)(hdr->usr_ptr);
420 if (hdr->info & SG_INFO_CHECK) {
421 /* record if an io error occurred, ignore resid */
422 memcpy(&io_u->hdr, hdr, sizeof(struct sg_io_hdr));
423 sd->events[i + trims]->error = EIO;
426 if (io_u->ddir == DDIR_TRIM) {
427 struct sgio_trim *st = sd->trim_queues[io_u->index];
428 #ifdef FIO_SGIO_DEBUG
429 assert(st->trim_io_us[0] == io_u);
430 assert(sd->trim_queue_map[io_u->index] == io_u->index);
431 dprint(FD_IO, "sgio_getevents: reaping %d io_us from trim queue %d\n", st->unmap_range_count, io_u->index);
432 dprint(FD_IO, "sgio_getevents: reaped io_u %d and stored in events[%d]\n", io_u->index, i+trims);
434 for (j = 1; j < st->unmap_range_count; j++) {
436 sd->events[i + trims] = st->trim_io_us[j];
437 #ifdef FIO_SGIO_DEBUG
438 dprint(FD_IO, "sgio_getevents: reaped io_u %d and stored in events[%d]\n", st->trim_io_us[j]->index, i+trims);
439 assert(sd->trim_queue_map[st->trim_io_us[j]->index] == io_u->index);
441 if (hdr->info & SG_INFO_CHECK) {
442 /* record if an io error occurred, ignore resid */
443 memcpy(&st->trim_io_us[j]->hdr, hdr, sizeof(struct sg_io_hdr));
444 sd->events[i + trims]->error = EIO;
447 events -= st->unmap_range_count - 1;
448 st->unmap_range_count = 0;
454 for_each_file(td, f, i) {
455 if (sd->fd_flags[i] == -1)
458 if (fcntl(f->fd, F_SETFL, sd->fd_flags[i]) < 0)
459 log_err("fio: sg failed to restore fcntl flags: %s\n", strerror(errno));
466 static enum fio_q_status fio_sgio_ioctl_doio(struct thread_data *td,
470 struct sgio_data *sd = td->io_ops_data;
471 struct sg_io_hdr *hdr = &io_u->hdr;
474 sd->events[0] = io_u;
476 ret = ioctl(f->fd, SG_IO, hdr);
480 /* record if an io error occurred */
481 if (hdr->info & SG_INFO_CHECK)
484 return FIO_Q_COMPLETED;
487 static enum fio_q_status fio_sgio_rw_doio(struct thread_data *td,
489 struct io_u *io_u, int do_sync)
491 struct sg_io_hdr *hdr = &io_u->hdr;
494 ret = write(f->fd, hdr, sizeof(*hdr));
500 * We can't just read back the first command that completes
501 * and assume it's the one we need, it could be any command
507 ret = read(f->fd, hdr, sizeof(*hdr));
511 __io_u = hdr->usr_ptr;
513 /* record if an io error occurred */
514 if (hdr->info & SG_INFO_CHECK)
520 if (io_u_sync_complete(td, __io_u))
525 return FIO_Q_COMPLETED;
531 static enum fio_q_status fio_sgio_doio(struct thread_data *td,
532 struct io_u *io_u, int do_sync)
534 struct fio_file *f = io_u->file;
535 enum fio_q_status ret;
537 if (f->filetype == FIO_TYPE_BLOCK) {
538 ret = fio_sgio_ioctl_doio(td, f, io_u);
540 td_verror(td, io_u->error, __func__);
542 ret = fio_sgio_rw_doio(td, f, io_u, do_sync);
543 if (io_u->error && do_sync)
544 td_verror(td, io_u->error, __func__);
550 static void fio_sgio_rw_lba(struct sg_io_hdr *hdr, unsigned long long lba,
551 unsigned long long nr_blocks, bool override16)
553 if (lba < MAX_10B_LBA && !override16) {
554 sgio_set_be32((uint32_t) lba, &hdr->cmdp[2]);
555 sgio_set_be16((uint16_t) nr_blocks, &hdr->cmdp[7]);
557 sgio_set_be64(lba, &hdr->cmdp[2]);
558 sgio_set_be32((uint32_t) nr_blocks, &hdr->cmdp[10]);
564 static int fio_sgio_prep(struct thread_data *td, struct io_u *io_u)
566 struct sg_io_hdr *hdr = &io_u->hdr;
567 struct sg_options *o = td->eo;
568 struct sgio_data *sd = td->io_ops_data;
569 unsigned long long nr_blocks, lba;
572 if (io_u->xfer_buflen & (sd->bs - 1)) {
573 log_err("read/write not sector aligned\n");
577 nr_blocks = io_u->xfer_buflen / sd->bs;
578 lba = io_u->offset / sd->bs;
580 if (io_u->ddir == DDIR_READ) {
581 sgio_hdr_init(sd, hdr, io_u, 1);
583 hdr->dxfer_direction = SG_DXFER_FROM_DEV;
584 if (lba < MAX_10B_LBA)
585 hdr->cmdp[0] = 0x28; // read(10)
587 hdr->cmdp[0] = 0x88; // read(16)
590 hdr->flags |= SGV4_FLAG_HIPRI;
592 hdr->cmdp[1] |= 0x08;
594 fio_sgio_rw_lba(hdr, lba, nr_blocks, false);
596 } else if (io_u->ddir == DDIR_WRITE) {
597 sgio_hdr_init(sd, hdr, io_u, 1);
599 hdr->dxfer_direction = SG_DXFER_TO_DEV;
600 switch(o->write_mode) {
602 if (lba < MAX_10B_LBA)
603 hdr->cmdp[0] = 0x2a; // write(10)
605 hdr->cmdp[0] = 0x8a; // write(16)
607 hdr->flags |= SGV4_FLAG_HIPRI;
609 hdr->cmdp[1] |= 0x08;
611 case FIO_SG_WRITE_VERIFY:
612 if (lba < MAX_10B_LBA)
613 hdr->cmdp[0] = 0x2e; // write and verify(10)
615 hdr->cmdp[0] = 0x8e; // write and verify(16)
617 // BYTCHK is disabled by virtue of the memset in sgio_hdr_init
618 case FIO_SG_WRITE_SAME:
619 hdr->dxfer_len = sd->bs;
620 if (lba < MAX_10B_LBA)
621 hdr->cmdp[0] = 0x41; // write same(10)
623 hdr->cmdp[0] = 0x93; // write same(16)
625 case FIO_SG_WRITE_SAME_NDOB:
626 hdr->cmdp[0] = 0x93; // write same(16)
627 hdr->cmdp[1] |= 0x1; // no data output buffer
630 case FIO_SG_WRITE_STREAM:
631 hdr->cmdp[0] = 0x9a; // write stream (16)
633 hdr->cmdp[1] |= 0x08;
634 sgio_set_be64(lba, &hdr->cmdp[2]);
635 sgio_set_be16(o->stream_id, &hdr->cmdp[10]);
636 sgio_set_be16((uint16_t) nr_blocks, &hdr->cmdp[12]);
638 case FIO_SG_VERIFY_BYTCHK_00:
639 if (lba < MAX_10B_LBA)
640 hdr->cmdp[0] = 0x2f; // VERIFY(10)
642 hdr->cmdp[0] = 0x8f; // VERIFY(16)
645 case FIO_SG_VERIFY_BYTCHK_01:
646 if (lba < MAX_10B_LBA)
647 hdr->cmdp[0] = 0x2f; // VERIFY(10)
649 hdr->cmdp[0] = 0x8f; // VERIFY(16)
650 hdr->cmdp[1] |= 0x02; // BYTCHK = 01b
652 case FIO_SG_VERIFY_BYTCHK_11:
653 if (lba < MAX_10B_LBA)
654 hdr->cmdp[0] = 0x2f; // VERIFY(10)
656 hdr->cmdp[0] = 0x8f; // VERIFY(16)
657 hdr->cmdp[1] |= 0x06; // BYTCHK = 11b
658 hdr->dxfer_len = sd->bs;
662 if (o->write_mode != FIO_SG_WRITE_STREAM)
663 fio_sgio_rw_lba(hdr, lba, nr_blocks,
664 o->write_mode == FIO_SG_WRITE_SAME_NDOB);
666 } else if (io_u->ddir == DDIR_TRIM) {
667 struct sgio_trim *st;
669 if (sd->current_queue == -1) {
670 sgio_hdr_init(sd, hdr, io_u, 0);
673 hdr->dxfer_direction = SG_DXFER_TO_DEV;
674 hdr->cmdp[0] = 0x42; // unmap
675 sd->current_queue = io_u->index;
676 st = sd->trim_queues[sd->current_queue];
677 hdr->dxferp = st->unmap_param;
678 #ifdef FIO_SGIO_DEBUG
679 assert(sd->trim_queues[io_u->index]->unmap_range_count == 0);
680 dprint(FD_IO, "sg: creating new queue based on io_u %d\n", io_u->index);
684 st = sd->trim_queues[sd->current_queue];
686 dprint(FD_IO, "sg: adding io_u %d to trim queue %d\n", io_u->index, sd->current_queue);
687 st->trim_io_us[st->unmap_range_count] = io_u;
688 #ifdef FIO_SGIO_DEBUG
689 sd->trim_queue_map[io_u->index] = sd->current_queue;
692 offset = 8 + 16 * st->unmap_range_count;
693 sgio_set_be64(lba, &st->unmap_param[offset]);
694 sgio_set_be32((uint32_t) nr_blocks, &st->unmap_param[offset + 8]);
696 st->unmap_range_count++;
698 } else if (ddir_sync(io_u->ddir)) {
699 sgio_hdr_init(sd, hdr, io_u, 0);
700 hdr->dxfer_direction = SG_DXFER_NONE;
701 if (lba < MAX_10B_LBA)
702 hdr->cmdp[0] = 0x35; // synccache(10)
704 hdr->cmdp[0] = 0x91; // synccache(16)
711 static void fio_sgio_unmap_setup(struct sg_io_hdr *hdr, struct sgio_trim *st)
713 uint16_t cnt = st->unmap_range_count * 16;
715 hdr->dxfer_len = cnt + 8;
716 sgio_set_be16(cnt + 8, &hdr->cmdp[7]);
717 sgio_set_be16(cnt + 6, st->unmap_param);
718 sgio_set_be16(cnt, &st->unmap_param[2]);
723 static enum fio_q_status fio_sgio_queue(struct thread_data *td,
726 struct sg_io_hdr *hdr = &io_u->hdr;
727 struct sgio_data *sd = td->io_ops_data;
728 int ret, do_sync = 0;
730 fio_ro_check(td, io_u);
732 if (sgio_unbuffered(td) || ddir_sync(io_u->ddir))
735 if (io_u->ddir == DDIR_TRIM) {
736 if (do_sync || io_u->file->filetype == FIO_TYPE_BLOCK) {
737 struct sgio_trim *st = sd->trim_queues[sd->current_queue];
739 /* finish cdb setup for unmap because we are
740 ** doing unmap commands synchronously */
741 #ifdef FIO_SGIO_DEBUG
742 assert(st->unmap_range_count == 1);
743 assert(io_u == st->trim_io_us[0]);
747 fio_sgio_unmap_setup(hdr, st);
749 st->unmap_range_count = 0;
750 sd->current_queue = -1;
752 /* queue up trim ranges and submit in commit() */
756 ret = fio_sgio_doio(td, io_u, do_sync);
760 else if (hdr->status) {
761 io_u->resid = hdr->resid;
763 } else if (td->io_ops->commit != NULL) {
764 if (do_sync && !ddir_sync(io_u->ddir)) {
765 io_u_mark_submit(td, 1);
766 io_u_mark_complete(td, 1);
767 } else if (io_u->ddir == DDIR_READ || io_u->ddir == DDIR_WRITE) {
768 io_u_mark_submit(td, 1);
769 io_u_queued(td, io_u);
774 td_verror(td, io_u->error, "xfer");
775 return FIO_Q_COMPLETED;
781 static int fio_sgio_commit(struct thread_data *td)
783 struct sgio_data *sd = td->io_ops_data;
784 struct sgio_trim *st;
786 struct sg_io_hdr *hdr;
791 if (sd->current_queue == -1)
794 st = sd->trim_queues[sd->current_queue];
795 io_u = st->trim_io_us[0];
798 fio_sgio_unmap_setup(hdr, st);
800 sd->current_queue = -1;
802 ret = fio_sgio_rw_doio(td, io_u->file, io_u, 0);
804 if (ret < 0 || hdr->status) {
814 for (i = 0; i < st->unmap_range_count; i++) {
815 st->trim_io_us[i]->error = error;
816 clear_io_u(td, st->trim_io_us[i]);
818 st->trim_io_us[i]->resid = hdr->resid;
821 td_verror(td, error, "xfer");
825 if (fio_fill_issue_time(td)) {
826 fio_gettime(&now, NULL);
827 for (i = 0; i < st->unmap_range_count; i++) {
828 memcpy(&st->trim_io_us[i]->issue_time, &now, sizeof(now));
829 io_u_queued(td, io_u);
832 io_u_mark_submit(td, st->unmap_range_count);
837 static struct io_u *fio_sgio_event(struct thread_data *td, int event)
839 struct sgio_data *sd = td->io_ops_data;
841 return sd->events[event];
844 static int fio_sgio_read_capacity(struct thread_data *td, unsigned int *bs,
845 unsigned long long *max_lba)
848 * need to do read capacity operation w/o benefit of sd or
849 * io_u structures, which are not initialized until later.
851 struct sg_io_hdr hdr;
852 unsigned long long hlba;
853 unsigned int blksz = 0;
854 unsigned char cmd[16];
855 unsigned char sb[64];
856 unsigned char buf[32]; // read capacity return
860 struct fio_file *f = td->files[0];
862 /* open file independent of rest of application */
863 fd = open(f->file_name, O_RDONLY);
867 memset(&hdr, 0, sizeof(hdr));
868 memset(cmd, 0, sizeof(cmd));
869 memset(sb, 0, sizeof(sb));
870 memset(buf, 0, sizeof(buf));
872 /* First let's try a 10 byte read capacity. */
873 hdr.interface_id = 'S';
877 hdr.mx_sb_len = sizeof(sb);
878 hdr.timeout = SCSI_TIMEOUT_MS;
879 hdr.cmdp[0] = 0x25; // Read Capacity(10)
880 hdr.dxfer_direction = SG_DXFER_FROM_DEV;
882 hdr.dxfer_len = sizeof(buf);
884 ret = ioctl(fd, SG_IO, &hdr);
890 if (hdr.info & SG_INFO_CHECK) {
891 /* RCAP(10) might be unsupported by device. Force RCAP(16) */
894 blksz = sgio_get_be32(&buf[4]);
895 hlba = sgio_get_be32(buf);
899 * If max lba masked by MAX_10B_LBA equals MAX_10B_LBA,
900 * then need to retry with 16 byte Read Capacity command.
902 if (hlba == MAX_10B_LBA) {
904 hdr.cmdp[0] = 0x9e; // service action
905 hdr.cmdp[1] = 0x10; // Read Capacity(16)
906 sgio_set_be32(sizeof(buf), &hdr.cmdp[10]);
908 hdr.dxfer_direction = SG_DXFER_FROM_DEV;
910 hdr.dxfer_len = sizeof(buf);
912 ret = ioctl(fd, SG_IO, &hdr);
918 /* record if an io error occurred */
919 if (hdr.info & SG_INFO_CHECK)
920 td_verror(td, EIO, "fio_sgio_read_capacity");
922 blksz = sgio_get_be32(&buf[8]);
923 hlba = sgio_get_be64(buf);
938 static void fio_sgio_cleanup(struct thread_data *td)
940 struct sgio_data *sd = td->io_ops_data;
949 #ifdef FIO_SGIO_DEBUG
950 free(sd->trim_queue_map);
953 for (i = 0; i < td->o.iodepth; i++) {
954 free(sd->trim_queues[i]->unmap_param);
955 free(sd->trim_queues[i]->trim_io_us);
956 free(sd->trim_queues[i]);
959 free(sd->trim_queues);
964 static int fio_sgio_init(struct thread_data *td)
966 struct sgio_data *sd;
967 struct sgio_trim *st;
968 struct sg_io_hdr *h3p;
971 sd = calloc(1, sizeof(*sd));
972 sd->cmds = calloc(td->o.iodepth, sizeof(struct sgio_cmd));
973 sd->sgbuf = calloc(td->o.iodepth, sizeof(struct sg_io_hdr));
974 sd->events = calloc(td->o.iodepth, sizeof(struct io_u *));
975 sd->pfds = calloc(td->o.nr_files, sizeof(struct pollfd));
976 sd->fd_flags = calloc(td->o.nr_files, sizeof(int));
977 sd->type_checked = 0;
979 sd->trim_queues = calloc(td->o.iodepth, sizeof(struct sgio_trim *));
980 sd->current_queue = -1;
981 #ifdef FIO_SGIO_DEBUG
982 sd->trim_queue_map = calloc(td->o.iodepth, sizeof(int));
984 for (i = 0, h3p = sd->sgbuf; i < td->o.iodepth; i++, ++h3p) {
985 sd->trim_queues[i] = calloc(1, sizeof(struct sgio_trim));
986 st = sd->trim_queues[i];
987 st->unmap_param = calloc(td->o.iodepth + 1, sizeof(char[16]));
988 st->unmap_range_count = 0;
989 st->trim_io_us = calloc(td->o.iodepth, sizeof(struct io_u *));
990 h3p->interface_id = 'S';
993 td->io_ops_data = sd;
996 * we want to do it, regardless of whether odirect is set or not
998 td->o.override_sync = 1;
1002 static int fio_sgio_type_check(struct thread_data *td, struct fio_file *f)
1004 struct sgio_data *sd = td->io_ops_data;
1005 unsigned int bs = 0;
1006 unsigned long long max_lba = 0;
1008 if (f->filetype == FIO_TYPE_BLOCK) {
1009 if (ioctl(f->fd, BLKSSZGET, &bs) < 0) {
1010 td_verror(td, errno, "ioctl");
1013 } else if (f->filetype == FIO_TYPE_CHAR) {
1016 if (ioctl(f->fd, SG_GET_VERSION_NUM, &version) < 0) {
1017 td_verror(td, errno, "ioctl");
1021 ret = fio_sgio_read_capacity(td, &bs, &max_lba);
1023 td_verror(td, td->error, "fio_sgio_read_capacity");
1024 log_err("ioengine sg unable to read capacity successfully\n");
1028 td_verror(td, EINVAL, "wrong file type");
1029 log_err("ioengine sg only works on block or character devices\n");
1034 // Determine size of commands needed based on max_lba
1035 if (max_lba >= MAX_10B_LBA) {
1036 dprint(FD_IO, "sgio_type_check: using 16 byte read/write "
1037 "commands for lba above 0x%016llx/0x%016llx\n",
1038 MAX_10B_LBA, max_lba);
1041 if (f->filetype == FIO_TYPE_BLOCK) {
1042 td->io_ops->getevents = NULL;
1043 td->io_ops->event = NULL;
1044 td->io_ops->commit = NULL;
1046 ** Setting these functions to null may cause problems
1047 ** with filename=/dev/sda:/dev/sg0 since we are only
1048 ** considering a single file
1051 sd->type_checked = 1;
1056 static int fio_sgio_open(struct thread_data *td, struct fio_file *f)
1058 struct sgio_data *sd = td->io_ops_data;
1061 ret = generic_open_file(td, f);
1065 if (sd && !sd->type_checked && fio_sgio_type_check(td, f)) {
1066 ret = generic_close_file(td, f);
1074 * Build an error string with details about the driver, host or scsi
1075 * error contained in the sg header Caller will use as necessary.
1077 static char *fio_sgio_errdetails(struct io_u *io_u)
1079 struct sg_io_hdr *hdr = &io_u->hdr;
1080 #define MAXERRDETAIL 1024
1081 #define MAXMSGCHUNK 128
1082 char *msg, msgchunk[MAXMSGCHUNK];
1085 msg = calloc(1, MAXERRDETAIL);
1089 * can't seem to find sg_err.h, so I'll just echo the define values
1090 * so others can search on internet to find clearer clues of meaning.
1092 if (hdr->info & SG_INFO_CHECK) {
1093 if (hdr->host_status) {
1094 snprintf(msgchunk, MAXMSGCHUNK, "SG Host Status: 0x%02x; ", hdr->host_status);
1095 strlcat(msg, msgchunk, MAXERRDETAIL);
1096 switch (hdr->host_status) {
1098 strlcat(msg, "SG_ERR_DID_NO_CONNECT", MAXERRDETAIL);
1101 strlcat(msg, "SG_ERR_DID_BUS_BUSY", MAXERRDETAIL);
1104 strlcat(msg, "SG_ERR_DID_TIME_OUT", MAXERRDETAIL);
1107 strlcat(msg, "SG_ERR_DID_BAD_TARGET", MAXERRDETAIL);
1110 strlcat(msg, "SG_ERR_DID_ABORT", MAXERRDETAIL);
1113 strlcat(msg, "SG_ERR_DID_PARITY", MAXERRDETAIL);
1116 strlcat(msg, "SG_ERR_DID_ERROR (internal error)", MAXERRDETAIL);
1119 strlcat(msg, "SG_ERR_DID_RESET", MAXERRDETAIL);
1122 strlcat(msg, "SG_ERR_DID_BAD_INTR (unexpected)", MAXERRDETAIL);
1125 strlcat(msg, "SG_ERR_DID_PASSTHROUGH", MAXERRDETAIL);
1128 strlcat(msg, "SG_ERR_DID_SOFT_ERROR (driver retry?)", MAXERRDETAIL);
1131 strlcat(msg, "SG_ERR_DID_IMM_RETRY", MAXERRDETAIL);
1134 strlcat(msg, "SG_ERR_DID_REQUEUE", MAXERRDETAIL);
1137 strlcat(msg, "SG_ERR_DID_TRANSPORT_DISRUPTED", MAXERRDETAIL);
1140 strlcat(msg, "SG_ERR_DID_TRANSPORT_FAILFAST", MAXERRDETAIL);
1143 strlcat(msg, "SG_ERR_DID_TARGET_FAILURE", MAXERRDETAIL);
1146 strlcat(msg, "SG_ERR_DID_NEXUS_FAILURE", MAXERRDETAIL);
1149 strlcat(msg, "SG_ERR_DID_ALLOC_FAILURE", MAXERRDETAIL);
1152 strlcat(msg, "SG_ERR_DID_MEDIUM_ERROR", MAXERRDETAIL);
1155 strlcat(msg, "Unknown", MAXERRDETAIL);
1158 strlcat(msg, ". ", MAXERRDETAIL);
1160 if (hdr->driver_status) {
1161 snprintf(msgchunk, MAXMSGCHUNK, "SG Driver Status: 0x%02x; ", hdr->driver_status);
1162 strlcat(msg, msgchunk, MAXERRDETAIL);
1163 switch (hdr->driver_status & 0x0F) {
1165 strlcat(msg, "SG_ERR_DRIVER_BUSY", MAXERRDETAIL);
1168 strlcat(msg, "SG_ERR_DRIVER_SOFT", MAXERRDETAIL);
1171 strlcat(msg, "SG_ERR_DRIVER_MEDIA", MAXERRDETAIL);
1174 strlcat(msg, "SG_ERR_DRIVER_ERROR", MAXERRDETAIL);
1177 strlcat(msg, "SG_ERR_DRIVER_INVALID", MAXERRDETAIL);
1180 strlcat(msg, "SG_ERR_DRIVER_TIMEOUT", MAXERRDETAIL);
1183 strlcat(msg, "SG_ERR_DRIVER_HARD", MAXERRDETAIL);
1186 strlcat(msg, "SG_ERR_DRIVER_SENSE", MAXERRDETAIL);
1189 strlcat(msg, "Unknown", MAXERRDETAIL);
1192 strlcat(msg, "; ", MAXERRDETAIL);
1193 switch (hdr->driver_status & 0xF0) {
1195 strlcat(msg, "SG_ERR_SUGGEST_RETRY", MAXERRDETAIL);
1198 strlcat(msg, "SG_ERR_SUGGEST_ABORT", MAXERRDETAIL);
1201 strlcat(msg, "SG_ERR_SUGGEST_REMAP", MAXERRDETAIL);
1204 strlcat(msg, "SG_ERR_SUGGEST_DIE", MAXERRDETAIL);
1207 strlcat(msg, "SG_ERR_SUGGEST_SENSE", MAXERRDETAIL);
1210 strlcat(msg, ". ", MAXERRDETAIL);
1213 snprintf(msgchunk, MAXMSGCHUNK, "SG SCSI Status: 0x%02x; ", hdr->status);
1214 strlcat(msg, msgchunk, MAXERRDETAIL);
1215 // SCSI 3 status codes
1216 switch (hdr->status) {
1218 strlcat(msg, "CHECK_CONDITION", MAXERRDETAIL);
1221 strlcat(msg, "CONDITION_MET", MAXERRDETAIL);
1224 strlcat(msg, "BUSY", MAXERRDETAIL);
1227 strlcat(msg, "INTERMEDIATE", MAXERRDETAIL);
1230 strlcat(msg, "INTERMEDIATE_CONDITION_MET", MAXERRDETAIL);
1233 strlcat(msg, "RESERVATION_CONFLICT", MAXERRDETAIL);
1236 strlcat(msg, "COMMAND_TERMINATED", MAXERRDETAIL);
1239 strlcat(msg, "TASK_SET_FULL", MAXERRDETAIL);
1242 strlcat(msg, "ACA_ACTIVE", MAXERRDETAIL);
1245 strlcat(msg, "TASK_ABORTED", MAXERRDETAIL);
1248 strlcat(msg, "Unknown", MAXERRDETAIL);
1251 strlcat(msg, ". ", MAXERRDETAIL);
1253 if (hdr->sb_len_wr) {
1254 snprintf(msgchunk, MAXMSGCHUNK, "Sense Data (%d bytes):", hdr->sb_len_wr);
1255 strlcat(msg, msgchunk, MAXERRDETAIL);
1256 for (i = 0; i < hdr->sb_len_wr; i++) {
1257 snprintf(msgchunk, MAXMSGCHUNK, " %02x", hdr->sbp[i]);
1258 strlcat(msg, msgchunk, MAXERRDETAIL);
1260 strlcat(msg, ". ", MAXERRDETAIL);
1262 if (hdr->resid != 0) {
1263 snprintf(msgchunk, MAXMSGCHUNK, "SG Driver: %d bytes out of %d not transferred. ", hdr->resid, hdr->dxfer_len);
1264 strlcat(msg, msgchunk, MAXERRDETAIL);
1267 strlcat(msg, "cdb:", MAXERRDETAIL);
1268 for (i = 0; i < hdr->cmd_len; i++) {
1269 snprintf(msgchunk, MAXMSGCHUNK, " %02x", hdr->cmdp[i]);
1270 strlcat(msg, msgchunk, MAXERRDETAIL);
1272 strlcat(msg, ". ", MAXERRDETAIL);
1273 if (io_u->ddir == DDIR_TRIM) {
1274 unsigned char *param_list = hdr->dxferp;
1275 strlcat(msg, "dxferp:", MAXERRDETAIL);
1276 for (i = 0; i < hdr->dxfer_len; i++) {
1277 snprintf(msgchunk, MAXMSGCHUNK, " %02x", param_list[i]);
1278 strlcat(msg, msgchunk, MAXERRDETAIL);
1280 strlcat(msg, ". ", MAXERRDETAIL);
1285 if (!(hdr->info & SG_INFO_CHECK) && !strlen(msg))
1286 snprintf(msg, MAXERRDETAIL, "%s",
1287 "SG Driver did not report a Host, Driver or Device check");
1293 * get max file size from read capacity.
1295 static int fio_sgio_get_file_size(struct thread_data *td, struct fio_file *f)
1298 * get_file_size is being called even before sgio_init is
1299 * called, so none of the sg_io structures are
1300 * initialized in the thread_data yet. So we need to do the
1301 * ReadCapacity without any of those helpers. One of the effects
1302 * is that ReadCapacity may get called 4 times on each open:
1303 * readcap(10) followed by readcap(16) if needed - just to get
1304 * the file size after the init occurs - it will be called
1305 * again when "type_check" is called during structure
1306 * initialization I'm not sure how to prevent this little
1309 unsigned int bs = 0;
1310 unsigned long long max_lba = 0;
1313 if (fio_file_size_known(f))
1316 if (f->filetype != FIO_TYPE_BLOCK && f->filetype != FIO_TYPE_CHAR) {
1317 td_verror(td, EINVAL, "wrong file type");
1318 log_err("ioengine sg only works on block or character devices\n");
1322 ret = fio_sgio_read_capacity(td, &bs, &max_lba);
1324 td_verror(td, td->error, "fio_sgio_read_capacity");
1325 log_err("ioengine sg unable to successfully execute read capacity to get block size and maximum lba\n");
1329 f->real_file_size = (max_lba + 1) * bs;
1330 fio_file_set_size_known(f);
1335 static struct ioengine_ops ioengine = {
1337 .version = FIO_IOOPS_VERSION,
1338 .init = fio_sgio_init,
1339 .prep = fio_sgio_prep,
1340 .queue = fio_sgio_queue,
1341 .commit = fio_sgio_commit,
1342 .getevents = fio_sgio_getevents,
1343 .errdetails = fio_sgio_errdetails,
1344 .event = fio_sgio_event,
1345 .cleanup = fio_sgio_cleanup,
1346 .open_file = fio_sgio_open,
1347 .close_file = generic_close_file,
1348 .get_file_size = fio_sgio_get_file_size,
1349 .flags = FIO_SYNCIO | FIO_RAWIO,
1351 .option_struct_size = sizeof(struct sg_options)
1354 #else /* FIO_HAVE_SGIO */
1357 * When we have a proper configure system in place, we simply wont build
1358 * and install this io engine. For now install a crippled version that
1359 * just complains and fails to load.
1361 static int fio_sgio_init(struct thread_data fio_unused *td)
1363 log_err("fio: ioengine sg not available\n");
1367 static struct ioengine_ops ioengine = {
1369 .version = FIO_IOOPS_VERSION,
1370 .init = fio_sgio_init,
1375 static void fio_init fio_sgio_register(void)
1377 register_ioengine(&ioengine);
1380 static void fio_exit fio_sgio_unregister(void)
1382 unregister_ioengine(&ioengine);