4 * IO engine that uses the Linux SG v3 interface to talk to SCSI devices
18 #define MAX_10B_LBA 0xFFFFFFFFULL
19 #define SCSI_TIMEOUT_MS 30000 // 30 second timeout; currently no method to override
20 #define MAX_SB 64 // sense block maximum return size
23 unsigned char cdb[16]; // increase to support 16 byte commands
24 unsigned char sb[MAX_SB]; // add sense block to commands
29 struct sgio_cmd *cmds;
39 static void sgio_hdr_init(struct sgio_data *sd, struct sg_io_hdr *hdr,
40 struct io_u *io_u, int fs)
42 struct sgio_cmd *sc = &sd->cmds[io_u->index];
44 memset(hdr, 0, sizeof(*hdr));
45 memset(sc->cdb, 0, sizeof(sc->cdb));
47 hdr->interface_id = 'S';
49 hdr->cmd_len = sizeof(sc->cdb);
51 hdr->mx_sb_len = sizeof(sc->sb);
52 hdr->pack_id = io_u->index;
56 hdr->dxferp = io_u->xfer_buf;
57 hdr->dxfer_len = io_u->xfer_buflen;
61 static int pollin_events(struct pollfd *pfds, int fds)
65 for (i = 0; i < fds; i++)
66 if (pfds[i].revents & POLLIN)
72 static int sg_fd_read(int fd, void *data, size_t size)
79 ret = read(fd, data, size);
81 if (errno == EAGAIN || errno == EINTR)
101 static int fio_sgio_getevents(struct thread_data *td, unsigned int min,
103 const struct timespec fio_unused *t)
105 struct sgio_data *sd = td->io_ops->data;
106 int left = max, eventNum, ret, r = 0;
107 void *buf = sd->sgbuf;
108 unsigned int i, events;
112 * Fill in the file descriptors
114 for_each_file(td, f, i) {
116 * don't block for min events == 0
119 sd->fd_flags[i] = fio_set_fd_nonblocking(f->fd, "sg");
121 sd->fd_flags[i] = -1;
123 sd->pfds[i].fd = f->fd;
124 sd->pfds[i].events = POLLIN;
130 dprint(FD_IO, "sgio_getevents: sd %p: left=%d\n", sd, left);
136 ret = poll(sd->pfds, td->o.nr_files, -1);
140 td_verror(td, errno, "poll");
145 if (pollin_events(sd->pfds, td->o.nr_files))
155 for_each_file(td, f, i) {
156 for (eventNum = 0; eventNum < left; eventNum++) {
157 ret = sg_fd_read(f->fd, p, sizeof(struct sg_io_hdr));
158 dprint(FD_IO, "sgio_getevents: ret: %d\n", ret);
161 td_verror(td, r, "sg_read");
164 p += sizeof(struct sg_io_hdr);
166 dprint(FD_IO, "sgio_getevents: events: %d\n", events);
170 if (r < 0 && !events)
180 for (i = 0; i < events; i++) {
181 struct sg_io_hdr *hdr = (struct sg_io_hdr *) buf + i;
182 sd->events[i] = hdr->usr_ptr;
184 /* record if an io error occurred, ignore resid */
185 if (hdr->info & SG_INFO_CHECK) {
187 io_u = (struct io_u *)(hdr->usr_ptr);
188 memcpy((void*)&(io_u->hdr), (void*)hdr, sizeof(struct sg_io_hdr));
189 sd->events[i]->error = EIO;
195 for_each_file(td, f, i) {
196 if (sd->fd_flags[i] == -1)
199 if (fcntl(f->fd, F_SETFL, sd->fd_flags[i]) < 0)
200 log_err("fio: sg failed to restore fcntl flags: %s\n", strerror(errno));
207 static int fio_sgio_ioctl_doio(struct thread_data *td,
208 struct fio_file *f, struct io_u *io_u)
210 struct sgio_data *sd = td->io_ops->data;
211 struct sg_io_hdr *hdr = &io_u->hdr;
214 sd->events[0] = io_u;
216 ret = ioctl(f->fd, SG_IO, hdr);
220 /* record if an io error occurred */
221 if (hdr->info & SG_INFO_CHECK)
224 return FIO_Q_COMPLETED;
227 static int fio_sgio_rw_doio(struct fio_file *f, struct io_u *io_u, int do_sync)
229 struct sg_io_hdr *hdr = &io_u->hdr;
232 ret = write(f->fd, hdr, sizeof(*hdr));
237 ret = read(f->fd, hdr, sizeof(*hdr));
241 /* record if an io error occurred */
242 if (hdr->info & SG_INFO_CHECK)
245 return FIO_Q_COMPLETED;
251 static int fio_sgio_doio(struct thread_data *td, struct io_u *io_u, int do_sync)
253 struct fio_file *f = io_u->file;
256 if (f->filetype == FIO_TYPE_BD) {
257 ret = fio_sgio_ioctl_doio(td, f, io_u);
258 td->error = io_u->error;
260 ret = fio_sgio_rw_doio(f, io_u, do_sync);
262 td->error = io_u->error;
268 static int fio_sgio_prep(struct thread_data *td, struct io_u *io_u)
270 struct sg_io_hdr *hdr = &io_u->hdr;
271 struct sgio_data *sd = td->io_ops->data;
272 long long nr_blocks, lba;
274 if (io_u->xfer_buflen & (sd->bs - 1)) {
275 log_err("read/write not sector aligned\n");
279 nr_blocks = io_u->xfer_buflen / sd->bs;
280 lba = io_u->offset / sd->bs;
282 if (io_u->ddir == DDIR_READ) {
283 sgio_hdr_init(sd, hdr, io_u, 1);
285 hdr->dxfer_direction = SG_DXFER_FROM_DEV;
286 if (lba < MAX_10B_LBA)
287 hdr->cmdp[0] = 0x28; // read(10)
289 hdr->cmdp[0] = 0x88; // read(16)
290 } else if (io_u->ddir == DDIR_WRITE) {
291 sgio_hdr_init(sd, hdr, io_u, 1);
293 hdr->dxfer_direction = SG_DXFER_TO_DEV;
294 if (lba < MAX_10B_LBA)
295 hdr->cmdp[0] = 0x2a; // write(10)
297 hdr->cmdp[0] = 0x8a; // write(16)
299 sgio_hdr_init(sd, hdr, io_u, 0);
300 hdr->dxfer_direction = SG_DXFER_NONE;
301 if (lba < MAX_10B_LBA)
302 hdr->cmdp[0] = 0x35; // synccache(10)
304 hdr->cmdp[0] = 0x91; // synccache(16)
308 * for synccache, we leave lba and length to 0 to sync all
311 if (hdr->dxfer_direction != SG_DXFER_NONE) {
313 if (lba < MAX_10B_LBA) {
314 hdr->cmdp[2] = (unsigned char) ((lba >> 24) & 0xff);
315 hdr->cmdp[3] = (unsigned char) ((lba >> 16) & 0xff);
316 hdr->cmdp[4] = (unsigned char) ((lba >> 8) & 0xff);
317 hdr->cmdp[5] = (unsigned char) (lba & 0xff);
318 hdr->cmdp[7] = (unsigned char) ((nr_blocks >> 8) & 0xff);
319 hdr->cmdp[8] = (unsigned char) (nr_blocks & 0xff);
321 hdr->cmdp[2] = (unsigned char) ((lba >> 56) & 0xff);
322 hdr->cmdp[3] = (unsigned char) ((lba >> 48) & 0xff);
323 hdr->cmdp[4] = (unsigned char) ((lba >> 40) & 0xff);
324 hdr->cmdp[5] = (unsigned char) ((lba >> 32) & 0xff);
325 hdr->cmdp[6] = (unsigned char) ((lba >> 24) & 0xff);
326 hdr->cmdp[7] = (unsigned char) ((lba >> 16) & 0xff);
327 hdr->cmdp[8] = (unsigned char) ((lba >> 8) & 0xff);
328 hdr->cmdp[9] = (unsigned char) (lba & 0xff);
329 hdr->cmdp[10] = (unsigned char) ((nr_blocks >> 32) & 0xff);
330 hdr->cmdp[11] = (unsigned char) ((nr_blocks >> 16) & 0xff);
331 hdr->cmdp[12] = (unsigned char) ((nr_blocks >> 8) & 0xff);
332 hdr->cmdp[13] = (unsigned char) (nr_blocks & 0xff);
336 hdr->timeout = SCSI_TIMEOUT_MS;
340 static int fio_sgio_queue(struct thread_data *td, struct io_u *io_u)
342 struct sg_io_hdr *hdr = &io_u->hdr;
343 int ret, do_sync = 0;
345 fio_ro_check(td, io_u);
347 if (td->o.sync_io || td->o.odirect || ddir_sync(io_u->ddir))
350 ret = fio_sgio_doio(td, io_u, do_sync);
354 else if (hdr->status) {
355 io_u->resid = hdr->resid;
360 td_verror(td, io_u->error, "xfer");
361 return FIO_Q_COMPLETED;
367 static struct io_u *fio_sgio_event(struct thread_data *td, int event)
369 struct sgio_data *sd = td->io_ops->data;
371 return sd->events[event];
374 static int fio_sgio_read_capacity(struct thread_data *td, unsigned int *bs,
375 unsigned long long *max_lba)
378 * need to do read capacity operation w/o benefit of sd or
379 * io_u structures, which are not initialized until later.
381 struct sg_io_hdr hdr;
382 unsigned char cmd[16];
383 unsigned char sb[64];
384 unsigned char buf[32]; // read capacity return
388 struct fio_file *f = td->files[0];
390 /* open file independent of rest of application */
391 fd = open(f->file_name, O_RDONLY);
395 memset(&hdr, 0, sizeof(hdr));
396 memset(cmd, 0, sizeof(cmd));
397 memset(sb, 0, sizeof(sb));
398 memset(buf, 0, sizeof(buf));
400 /* First let's try a 10 byte read capacity. */
401 hdr.interface_id = 'S';
405 hdr.mx_sb_len = sizeof(sb);
406 hdr.timeout = SCSI_TIMEOUT_MS;
407 hdr.cmdp[0] = 0x25; // Read Capacity(10)
408 hdr.dxfer_direction = SG_DXFER_FROM_DEV;
410 hdr.dxfer_len = sizeof(buf);
412 ret = ioctl(fd, SG_IO, &hdr);
418 *bs = (buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | buf[7];
419 *max_lba = ((buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3]) & 0x00000000FFFFFFFFULL; // for some reason max_lba is being sign extended even though unsigned.
423 * If max lba is 0xFFFFFFFF, then need to retry with
424 * 16 byteread capacity
426 if (*max_lba == MAX_10B_LBA) {
428 hdr.cmdp[0] = 0x9e; // Read Capacity(16)
429 hdr.cmdp[1] = 0x10; // service action
430 hdr.cmdp[10] = (unsigned char) ((sizeof(buf) >> 24) & 0xff);
431 hdr.cmdp[11] = (unsigned char) ((sizeof(buf) >> 16) & 0xff);
432 hdr.cmdp[12] = (unsigned char) ((sizeof(buf) >> 8) & 0xff);
433 hdr.cmdp[13] = (unsigned char) (sizeof(buf) & 0xff);
435 hdr.dxfer_direction = SG_DXFER_FROM_DEV;
437 hdr.dxfer_len = sizeof(buf);
439 ret = ioctl(fd, SG_IO, &hdr);
445 /* record if an io error occurred */
446 if (hdr.info & SG_INFO_CHECK)
447 td_verror(td, EIO, "fio_sgio_read_capacity");
449 *bs = (buf[8] << 24) | (buf[9] << 16) | (buf[10] << 8) | buf[11];
450 *max_lba = ((unsigned long long)buf[0] << 56) |
451 ((unsigned long long)buf[1] << 48) |
452 ((unsigned long long)buf[2] << 40) |
453 ((unsigned long long)buf[3] << 32) |
454 ((unsigned long long)buf[4] << 24) |
455 ((unsigned long long)buf[5] << 16) |
456 ((unsigned long long)buf[6] << 8) |
457 (unsigned long long)buf[7];
464 static void fio_sgio_cleanup(struct thread_data *td)
466 struct sgio_data *sd = td->io_ops->data;
478 static int fio_sgio_init(struct thread_data *td)
480 struct sgio_data *sd;
482 sd = malloc(sizeof(*sd));
483 memset(sd, 0, sizeof(*sd));
484 sd->cmds = malloc(td->o.iodepth * sizeof(struct sgio_cmd));
485 memset(sd->cmds, 0, td->o.iodepth * sizeof(struct sgio_cmd));
486 sd->events = malloc(td->o.iodepth * sizeof(struct io_u *));
487 memset(sd->events, 0, td->o.iodepth * sizeof(struct io_u *));
488 sd->pfds = malloc(sizeof(struct pollfd) * td->o.nr_files);
489 memset(sd->pfds, 0, sizeof(struct pollfd) * td->o.nr_files);
490 sd->fd_flags = malloc(sizeof(int) * td->o.nr_files);
491 memset(sd->fd_flags, 0, sizeof(int) * td->o.nr_files);
492 sd->sgbuf = malloc(sizeof(struct sg_io_hdr) * td->o.iodepth);
493 memset(sd->sgbuf, 0, sizeof(struct sg_io_hdr) * td->o.iodepth);
494 sd->type_checked = 0;
495 td->io_ops->data = sd;
498 * we want to do it, regardless of whether odirect is set or not
500 td->o.override_sync = 1;
504 static int fio_sgio_type_check(struct thread_data *td, struct fio_file *f)
506 struct sgio_data *sd = td->io_ops->data;
508 unsigned long long max_lba = 0;
511 if (f->filetype == FIO_TYPE_BD) {
512 if (ioctl(f->fd, BLKSSZGET, &bs) < 0) {
513 td_verror(td, errno, "ioctl");
516 } else if (f->filetype == FIO_TYPE_CHAR) {
519 if (ioctl(f->fd, SG_GET_VERSION_NUM, &version) < 0) {
520 td_verror(td, errno, "ioctl");
524 ret = fio_sgio_read_capacity(td, &bs, &max_lba);
526 td_verror(td, td->error, "fio_sgio_read_capacity");
527 log_err("ioengine sg unable to read capacity successfully\n");
531 td_verror(td, EINVAL, "wrong file type");
532 log_err("ioengine sg only works on block devices\n");
537 // Determine size of commands needed based on max_lba
538 sd->max_lba = max_lba;
539 if (max_lba > MAX_10B_LBA) {
540 dprint(FD_IO, "sgio_type_check: using 16 byte operations: max_lba = 0x%016llx\n", max_lba);
544 if (f->filetype == FIO_TYPE_BD) {
545 td->io_ops->getevents = NULL;
546 td->io_ops->event = NULL;
548 sd->type_checked = 1;
553 static int fio_sgio_open(struct thread_data *td, struct fio_file *f)
555 struct sgio_data *sd = td->io_ops->data;
558 ret = generic_open_file(td, f);
562 if (sd && !sd->type_checked && fio_sgio_type_check(td, f)) {
563 ret = generic_close_file(td, f);
571 * Build an error string with details about the driver, host or scsi
572 * error contained in the sg header Caller will use as necessary.
574 static char *fio_sgio_errdetails(struct io_u *io_u)
576 struct sg_io_hdr *hdr = &io_u->hdr;
577 #define MAXERRDETAIL 1024
578 #define MAXMSGCHUNK 128
579 char *msg, msgchunk[MAXMSGCHUNK], *ret = NULL;
582 msg = calloc(MAXERRDETAIL, 1);
585 * can't seem to find sg_err.h, so I'll just echo the define values
586 * so others can search on internet to find clearer clues of meaning.
588 if (hdr->info & SG_INFO_CHECK) {
590 if (hdr->host_status) {
591 snprintf(msgchunk, MAXMSGCHUNK, "SG Host Status: 0x%02x; ", hdr->host_status);
592 strlcat(msg, msgchunk, MAXERRDETAIL);
593 switch (hdr->host_status) {
595 strlcat(msg, "SG_ERR_DID_NO_CONNECT", MAXERRDETAIL);
598 strlcat(msg, "SG_ERR_DID_BUS_BUSY", MAXERRDETAIL);
601 strlcat(msg, "SG_ERR_DID_TIME_OUT", MAXERRDETAIL);
604 strlcat(msg, "SG_ERR_DID_BAD_TARGET", MAXERRDETAIL);
607 strlcat(msg, "SG_ERR_DID_ABORT", MAXERRDETAIL);
610 strlcat(msg, "SG_ERR_DID_PARITY", MAXERRDETAIL);
613 strlcat(msg, "SG_ERR_DID_ERROR (internal error)", MAXERRDETAIL);
616 strlcat(msg, "SG_ERR_DID_RESET", MAXERRDETAIL);
619 strlcat(msg, "SG_ERR_DID_BAD_INTR (unexpected)", MAXERRDETAIL);
622 strlcat(msg, "SG_ERR_DID_PASSTHROUGH", MAXERRDETAIL);
625 strlcat(msg, "SG_ERR_DID_SOFT_ERROR (driver retry?)", MAXERRDETAIL);
628 strlcat(msg, "SG_ERR_DID_IMM_RETRY", MAXERRDETAIL);
631 strlcat(msg, "SG_ERR_DID_REQUEUE", MAXERRDETAIL);
634 strlcat(msg, "Unknown", MAXERRDETAIL);
637 strlcat(msg, ". ", MAXERRDETAIL);
639 if (hdr->driver_status) {
640 snprintf(msgchunk, MAXMSGCHUNK, "SG Driver Status: 0x%02x; ", hdr->driver_status);
641 strlcat(msg, msgchunk, MAXERRDETAIL);
642 switch (hdr->driver_status & 0x0F) {
644 strlcat(msg, "SG_ERR_DRIVER_BUSY", MAXERRDETAIL);
647 strlcat(msg, "SG_ERR_DRIVER_SOFT", MAXERRDETAIL);
650 strlcat(msg, "SG_ERR_DRIVER_MEDIA", MAXERRDETAIL);
653 strlcat(msg, "SG_ERR_DRIVER_ERROR", MAXERRDETAIL);
656 strlcat(msg, "SG_ERR_DRIVER_INVALID", MAXERRDETAIL);
659 strlcat(msg, "SG_ERR_DRIVER_TIMEOUT", MAXERRDETAIL);
662 strlcat(msg, "SG_ERR_DRIVER_HARD", MAXERRDETAIL);
665 strlcat(msg, "SG_ERR_DRIVER_SENSE", MAXERRDETAIL);
668 strlcat(msg, "Unknown", MAXERRDETAIL);
671 strlcat(msg, "; ", MAXERRDETAIL);
672 switch (hdr->driver_status & 0xF0) {
674 strlcat(msg, "SG_ERR_SUGGEST_RETRY", MAXERRDETAIL);
677 strlcat(msg, "SG_ERR_SUGGEST_ABORT", MAXERRDETAIL);
680 strlcat(msg, "SG_ERR_SUGGEST_REMAP", MAXERRDETAIL);
683 strlcat(msg, "SG_ERR_SUGGEST_DIE", MAXERRDETAIL);
686 strlcat(msg, "SG_ERR_SUGGEST_SENSE", MAXERRDETAIL);
689 strlcat(msg, ". ", MAXERRDETAIL);
692 snprintf(msgchunk, MAXMSGCHUNK, "SG SCSI Status: 0x%02x; ", hdr->status);
693 strlcat(msg, msgchunk, MAXERRDETAIL);
694 // SCSI 3 status codes
695 switch (hdr->status) {
697 strlcat(msg, "CHECK_CONDITION", MAXERRDETAIL);
700 strlcat(msg, "CONDITION_MET", MAXERRDETAIL);
703 strlcat(msg, "BUSY", MAXERRDETAIL);
706 strlcat(msg, "INTERMEDIATE", MAXERRDETAIL);
709 strlcat(msg, "INTERMEDIATE_CONDITION_MET", MAXERRDETAIL);
712 strlcat(msg, "RESERVATION_CONFLICT", MAXERRDETAIL);
715 strlcat(msg, "COMMAND_TERMINATED", MAXERRDETAIL);
718 strlcat(msg, "TASK_SET_FULL", MAXERRDETAIL);
721 strlcat(msg, "ACA_ACTIVE", MAXERRDETAIL);
724 strlcat(msg, "TASK_ABORTED", MAXERRDETAIL);
727 strlcat(msg, "Unknown", MAXERRDETAIL);
730 strlcat(msg, ". ", MAXERRDETAIL);
732 if (hdr->sb_len_wr) {
733 snprintf(msgchunk, MAXMSGCHUNK, "Sense Data (%d bytes):", hdr->sb_len_wr);
734 strlcat(msg, msgchunk, MAXERRDETAIL);
735 for (i = 0; i < hdr->sb_len_wr; i++) {
736 snprintf(msgchunk, MAXMSGCHUNK, " %02x", hdr->sbp[i]);
737 strlcat(msg, msgchunk, MAXERRDETAIL);
739 strlcat(msg, ". ", MAXERRDETAIL);
741 if (hdr->resid != 0) {
742 snprintf(msgchunk, MAXMSGCHUNK, "SG Driver: %d bytes out of %d not transferred. ", hdr->resid, hdr->dxfer_len);
743 strlcat(msg, msgchunk, MAXERRDETAIL);
749 ret = strdup("SG Driver did not report a Host, Driver or Device check");
755 * get max file size from read capacity.
757 static int fio_sgio_get_file_size(struct thread_data *td, struct fio_file *f)
760 * get_file_size is being called even before sgio_init is
761 * called, so none of the sg_io structures are
762 * initialized in the thread_data yet. So we need to do the
763 * ReadCapacity without any of those helpers. One of the effects
764 * is that ReadCapacity may get called 4 times on each open:
765 * readcap(10) followed by readcap(16) if needed - just to get
766 * the file size after the init occurs - it will be called
767 * again when "type_check" is called during structure
768 * initialization I'm not sure how to prevent this little
772 unsigned long long max_lba = 0;
775 if (fio_file_size_known(f))
778 ret = fio_sgio_read_capacity(td, &bs, &max_lba);
780 td_verror(td, td->error, "fio_sgio_read_capacity");
781 log_err("ioengine sg unable to successfully execute read capacity to get block size and maximum lba\n");
785 f->real_file_size = (max_lba + 1) * bs;
786 fio_file_set_size_known(f);
791 static struct ioengine_ops ioengine = {
793 .version = FIO_IOOPS_VERSION,
794 .init = fio_sgio_init,
795 .prep = fio_sgio_prep,
796 .queue = fio_sgio_queue,
797 .getevents = fio_sgio_getevents,
798 .errdetails = fio_sgio_errdetails,
799 .event = fio_sgio_event,
800 .cleanup = fio_sgio_cleanup,
801 .open_file = fio_sgio_open,
802 .close_file = generic_close_file,
803 .get_file_size = fio_sgio_get_file_size, // generic_get_file_size
804 .flags = FIO_SYNCIO | FIO_RAWIO,
807 #else /* FIO_HAVE_SGIO */
810 * When we have a proper configure system in place, we simply wont build
811 * and install this io engine. For now install a crippled version that
812 * just complains and fails to load.
814 static int fio_sgio_init(struct thread_data fio_unused *td)
816 log_err("fio: ioengine sg not available\n");
820 static struct ioengine_ops ioengine = {
822 .version = FIO_IOOPS_VERSION,
823 .init = fio_sgio_init,
828 static void fio_init fio_sgio_register(void)
830 register_ioengine(&ioengine);
833 static void fio_exit fio_sgio_unregister(void)
835 unregister_ioengine(&ioengine);