4 * IO engine that uses the Linux SG v3 interface to talk to SCSI devices
18 #define MAX_10B_LBA 0xFFFFFFFFULL
19 #define SCSI_TIMEOUT_MS 30000 // 30 second timeout; currently no method to override
20 #define MAX_SB 64 // sense block maximum return size
23 unsigned char cdb[16]; // enhanced from 10 to support 16 byte commands
24 unsigned char sb[MAX_SB]; // add sense block to commands
29 struct sgio_cmd *cmds;
38 static void sgio_hdr_init(struct sgio_data *sd, struct sg_io_hdr *hdr,
39 struct io_u *io_u, int fs)
41 struct sgio_cmd *sc = &sd->cmds[io_u->index];
43 memset(hdr, 0, sizeof(*hdr));
44 memset(sc->cdb, 0, sizeof(sc->cdb));
46 hdr->interface_id = 'S';
48 hdr->cmd_len = sizeof(sc->cdb);
50 hdr->mx_sb_len = sizeof(sc->sb);
51 hdr->pack_id = io_u->index;
55 hdr->dxferp = io_u->xfer_buf;
56 hdr->dxfer_len = io_u->xfer_buflen;
60 static int pollin_events(struct pollfd *pfds, int fds)
64 for (i = 0; i < fds; i++)
65 if (pfds[i].revents & POLLIN)
71 static int sg_fd_read(int fd, void *data, size_t size)
78 ret = read(fd, data, size);
80 if (errno == EAGAIN || errno == EINTR)
100 static int fio_sgio_getevents(struct thread_data *td, unsigned int min,
102 const struct timespec fio_unused *t)
104 struct sgio_data *sd = td->io_ops_data;
105 int left = max, eventNum, ret, r = 0;
106 void *buf = sd->sgbuf;
107 unsigned int i, events;
111 * Fill in the file descriptors
113 for_each_file(td, f, i) {
115 * don't block for min events == 0
118 sd->fd_flags[i] = fio_set_fd_nonblocking(f->fd, "sg");
120 sd->fd_flags[i] = -1;
122 sd->pfds[i].fd = f->fd;
123 sd->pfds[i].events = POLLIN;
129 dprint(FD_IO, "sgio_getevents: sd %p: left=%d\n", sd, left);
135 ret = poll(sd->pfds, td->o.nr_files, -1);
139 td_verror(td, errno, "poll");
144 if (pollin_events(sd->pfds, td->o.nr_files))
154 for_each_file(td, f, i) {
155 for (eventNum = 0; eventNum < left; eventNum++) {
156 ret = sg_fd_read(f->fd, p, sizeof(struct sg_io_hdr));
157 dprint(FD_IO, "sgio_getevents: ret: %d\n", ret);
160 td_verror(td, r, "sg_read");
163 p += sizeof(struct sg_io_hdr);
165 dprint(FD_IO, "sgio_getevents: events: %d\n", events);
169 if (r < 0 && !events)
179 for (i = 0; i < events; i++) {
180 struct sg_io_hdr *hdr = (struct sg_io_hdr *) buf + i;
181 sd->events[i] = hdr->usr_ptr;
183 /* record if an io error occurred, ignore resid */
184 if (hdr->info & SG_INFO_CHECK) {
186 io_u = (struct io_u *)(hdr->usr_ptr);
187 memcpy(&io_u->hdr, hdr, sizeof(struct sg_io_hdr));
188 sd->events[i]->error = EIO;
194 for_each_file(td, f, i) {
195 if (sd->fd_flags[i] == -1)
198 if (fcntl(f->fd, F_SETFL, sd->fd_flags[i]) < 0)
199 log_err("fio: sg failed to restore fcntl flags: %s\n", strerror(errno));
206 static int fio_sgio_ioctl_doio(struct thread_data *td,
207 struct fio_file *f, struct io_u *io_u)
209 struct sgio_data *sd = td->io_ops_data;
210 struct sg_io_hdr *hdr = &io_u->hdr;
213 sd->events[0] = io_u;
215 ret = ioctl(f->fd, SG_IO, hdr);
219 /* record if an io error occurred */
220 if (hdr->info & SG_INFO_CHECK)
223 return FIO_Q_COMPLETED;
226 static int fio_sgio_rw_doio(struct fio_file *f, struct io_u *io_u, int do_sync)
228 struct sg_io_hdr *hdr = &io_u->hdr;
231 ret = write(f->fd, hdr, sizeof(*hdr));
236 ret = read(f->fd, hdr, sizeof(*hdr));
240 /* record if an io error occurred */
241 if (hdr->info & SG_INFO_CHECK)
244 return FIO_Q_COMPLETED;
250 static int fio_sgio_doio(struct thread_data *td, struct io_u *io_u, int do_sync)
252 struct fio_file *f = io_u->file;
255 if (f->filetype == FIO_TYPE_BLOCK) {
256 ret = fio_sgio_ioctl_doio(td, f, io_u);
257 td->error = io_u->error;
259 ret = fio_sgio_rw_doio(f, io_u, do_sync);
261 td->error = io_u->error;
267 static int fio_sgio_prep(struct thread_data *td, struct io_u *io_u)
269 struct sg_io_hdr *hdr = &io_u->hdr;
270 struct sgio_data *sd = td->io_ops_data;
271 long long nr_blocks, lba;
273 if (io_u->xfer_buflen & (sd->bs - 1)) {
274 log_err("read/write not sector aligned\n");
278 nr_blocks = io_u->xfer_buflen / sd->bs;
279 lba = io_u->offset / sd->bs;
281 if (io_u->ddir == DDIR_READ) {
282 sgio_hdr_init(sd, hdr, io_u, 1);
284 hdr->dxfer_direction = SG_DXFER_FROM_DEV;
285 if (lba < MAX_10B_LBA)
286 hdr->cmdp[0] = 0x28; // read(10)
288 hdr->cmdp[0] = 0x88; // read(16)
289 } else if (io_u->ddir == DDIR_WRITE) {
290 sgio_hdr_init(sd, hdr, io_u, 1);
292 hdr->dxfer_direction = SG_DXFER_TO_DEV;
293 if (lba < MAX_10B_LBA)
294 hdr->cmdp[0] = 0x2a; // write(10)
296 hdr->cmdp[0] = 0x8a; // write(16)
298 sgio_hdr_init(sd, hdr, io_u, 0);
299 hdr->dxfer_direction = SG_DXFER_NONE;
300 if (lba < MAX_10B_LBA)
301 hdr->cmdp[0] = 0x35; // synccache(10)
303 hdr->cmdp[0] = 0x91; // synccache(16)
307 * for synccache, we leave lba and length to 0 to sync all
310 if (hdr->dxfer_direction != SG_DXFER_NONE) {
311 if (lba < MAX_10B_LBA) {
312 hdr->cmdp[2] = (unsigned char) ((lba >> 24) & 0xff);
313 hdr->cmdp[3] = (unsigned char) ((lba >> 16) & 0xff);
314 hdr->cmdp[4] = (unsigned char) ((lba >> 8) & 0xff);
315 hdr->cmdp[5] = (unsigned char) (lba & 0xff);
316 hdr->cmdp[7] = (unsigned char) ((nr_blocks >> 8) & 0xff);
317 hdr->cmdp[8] = (unsigned char) (nr_blocks & 0xff);
319 hdr->cmdp[2] = (unsigned char) ((lba >> 56) & 0xff);
320 hdr->cmdp[3] = (unsigned char) ((lba >> 48) & 0xff);
321 hdr->cmdp[4] = (unsigned char) ((lba >> 40) & 0xff);
322 hdr->cmdp[5] = (unsigned char) ((lba >> 32) & 0xff);
323 hdr->cmdp[6] = (unsigned char) ((lba >> 24) & 0xff);
324 hdr->cmdp[7] = (unsigned char) ((lba >> 16) & 0xff);
325 hdr->cmdp[8] = (unsigned char) ((lba >> 8) & 0xff);
326 hdr->cmdp[9] = (unsigned char) (lba & 0xff);
327 hdr->cmdp[10] = (unsigned char) ((nr_blocks >> 32) & 0xff);
328 hdr->cmdp[11] = (unsigned char) ((nr_blocks >> 16) & 0xff);
329 hdr->cmdp[12] = (unsigned char) ((nr_blocks >> 8) & 0xff);
330 hdr->cmdp[13] = (unsigned char) (nr_blocks & 0xff);
334 hdr->timeout = SCSI_TIMEOUT_MS;
338 static int fio_sgio_queue(struct thread_data *td, struct io_u *io_u)
340 struct sg_io_hdr *hdr = &io_u->hdr;
341 int ret, do_sync = 0;
343 fio_ro_check(td, io_u);
345 if (td->o.sync_io || td->o.odirect || ddir_sync(io_u->ddir))
348 ret = fio_sgio_doio(td, io_u, do_sync);
352 else if (hdr->status) {
353 io_u->resid = hdr->resid;
358 td_verror(td, io_u->error, "xfer");
359 return FIO_Q_COMPLETED;
365 static struct io_u *fio_sgio_event(struct thread_data *td, int event)
367 struct sgio_data *sd = td->io_ops_data;
369 return sd->events[event];
372 static int fio_sgio_read_capacity(struct thread_data *td, unsigned int *bs,
373 unsigned long long *max_lba)
376 * need to do read capacity operation w/o benefit of sd or
377 * io_u structures, which are not initialized until later.
379 struct sg_io_hdr hdr;
380 unsigned char cmd[16];
381 unsigned char sb[64];
382 unsigned char buf[32]; // read capacity return
386 struct fio_file *f = td->files[0];
388 /* open file independent of rest of application */
389 fd = open(f->file_name, O_RDONLY);
393 memset(&hdr, 0, sizeof(hdr));
394 memset(cmd, 0, sizeof(cmd));
395 memset(sb, 0, sizeof(sb));
396 memset(buf, 0, sizeof(buf));
398 /* First let's try a 10 byte read capacity. */
399 hdr.interface_id = 'S';
403 hdr.mx_sb_len = sizeof(sb);
404 hdr.timeout = SCSI_TIMEOUT_MS;
405 hdr.cmdp[0] = 0x25; // Read Capacity(10)
406 hdr.dxfer_direction = SG_DXFER_FROM_DEV;
408 hdr.dxfer_len = sizeof(buf);
410 ret = ioctl(fd, SG_IO, &hdr);
416 *bs = (buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | buf[7];
417 *max_lba = ((buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3]) & MAX_10B_LBA; // for some reason max_lba is being sign extended even though unsigned.
420 * If max lba masked by MAX_10B_LBA equals MAX_10B_LBA,
421 * then need to retry with 16 byte Read Capacity command.
423 if (*max_lba == MAX_10B_LBA) {
425 hdr.cmdp[0] = 0x9e; // service action
426 hdr.cmdp[1] = 0x10; // Read Capacity(16)
427 hdr.cmdp[10] = (unsigned char) ((sizeof(buf) >> 24) & 0xff);
428 hdr.cmdp[11] = (unsigned char) ((sizeof(buf) >> 16) & 0xff);
429 hdr.cmdp[12] = (unsigned char) ((sizeof(buf) >> 8) & 0xff);
430 hdr.cmdp[13] = (unsigned char) (sizeof(buf) & 0xff);
432 hdr.dxfer_direction = SG_DXFER_FROM_DEV;
434 hdr.dxfer_len = sizeof(buf);
436 ret = ioctl(fd, SG_IO, &hdr);
442 /* record if an io error occurred */
443 if (hdr.info & SG_INFO_CHECK)
444 td_verror(td, EIO, "fio_sgio_read_capacity");
446 *bs = (buf[8] << 24) | (buf[9] << 16) | (buf[10] << 8) | buf[11];
447 *max_lba = ((unsigned long long)buf[0] << 56) |
448 ((unsigned long long)buf[1] << 48) |
449 ((unsigned long long)buf[2] << 40) |
450 ((unsigned long long)buf[3] << 32) |
451 ((unsigned long long)buf[4] << 24) |
452 ((unsigned long long)buf[5] << 16) |
453 ((unsigned long long)buf[6] << 8) |
454 (unsigned long long)buf[7];
461 static void fio_sgio_cleanup(struct thread_data *td)
463 struct sgio_data *sd = td->io_ops_data;
475 static int fio_sgio_init(struct thread_data *td)
477 struct sgio_data *sd;
479 sd = malloc(sizeof(*sd));
480 memset(sd, 0, sizeof(*sd));
481 sd->cmds = malloc(td->o.iodepth * sizeof(struct sgio_cmd));
482 memset(sd->cmds, 0, td->o.iodepth * sizeof(struct sgio_cmd));
483 sd->events = malloc(td->o.iodepth * sizeof(struct io_u *));
484 memset(sd->events, 0, td->o.iodepth * sizeof(struct io_u *));
485 sd->pfds = malloc(sizeof(struct pollfd) * td->o.nr_files);
486 memset(sd->pfds, 0, sizeof(struct pollfd) * td->o.nr_files);
487 sd->fd_flags = malloc(sizeof(int) * td->o.nr_files);
488 memset(sd->fd_flags, 0, sizeof(int) * td->o.nr_files);
489 sd->sgbuf = malloc(sizeof(struct sg_io_hdr) * td->o.iodepth);
490 memset(sd->sgbuf, 0, sizeof(struct sg_io_hdr) * td->o.iodepth);
491 sd->type_checked = 0;
492 td->io_ops_data = sd;
495 * we want to do it, regardless of whether odirect is set or not
497 td->o.override_sync = 1;
501 static int fio_sgio_type_check(struct thread_data *td, struct fio_file *f)
503 struct sgio_data *sd = td->io_ops_data;
505 unsigned long long max_lba = 0;
507 if (f->filetype == FIO_TYPE_BLOCK) {
508 if (ioctl(f->fd, BLKSSZGET, &bs) < 0) {
509 td_verror(td, errno, "ioctl");
512 } else if (f->filetype == FIO_TYPE_CHAR) {
515 if (ioctl(f->fd, SG_GET_VERSION_NUM, &version) < 0) {
516 td_verror(td, errno, "ioctl");
520 ret = fio_sgio_read_capacity(td, &bs, &max_lba);
522 td_verror(td, td->error, "fio_sgio_read_capacity");
523 log_err("ioengine sg unable to read capacity successfully\n");
527 td_verror(td, EINVAL, "wrong file type");
528 log_err("ioengine sg only works on block or character devices\n");
533 // Determine size of commands needed based on max_lba
534 if (max_lba >= MAX_10B_LBA) {
535 dprint(FD_IO, "sgio_type_check: using 16 byte read/write "
536 "commands for lba above 0x%016llx/0x%016llx\n",
537 MAX_10B_LBA, max_lba);
540 if (f->filetype == FIO_TYPE_BLOCK) {
541 td->io_ops->getevents = NULL;
542 td->io_ops->event = NULL;
544 sd->type_checked = 1;
549 static int fio_sgio_open(struct thread_data *td, struct fio_file *f)
551 struct sgio_data *sd = td->io_ops_data;
554 ret = generic_open_file(td, f);
558 if (sd && !sd->type_checked && fio_sgio_type_check(td, f)) {
559 ret = generic_close_file(td, f);
567 * Build an error string with details about the driver, host or scsi
568 * error contained in the sg header Caller will use as necessary.
570 static char *fio_sgio_errdetails(struct io_u *io_u)
572 struct sg_io_hdr *hdr = &io_u->hdr;
573 #define MAXERRDETAIL 1024
574 #define MAXMSGCHUNK 128
575 char *msg, msgchunk[MAXMSGCHUNK];
578 msg = calloc(1, MAXERRDETAIL);
582 * can't seem to find sg_err.h, so I'll just echo the define values
583 * so others can search on internet to find clearer clues of meaning.
585 if (hdr->info & SG_INFO_CHECK) {
586 if (hdr->host_status) {
587 snprintf(msgchunk, MAXMSGCHUNK, "SG Host Status: 0x%02x; ", hdr->host_status);
588 strlcat(msg, msgchunk, MAXERRDETAIL);
589 switch (hdr->host_status) {
591 strlcat(msg, "SG_ERR_DID_NO_CONNECT", MAXERRDETAIL);
594 strlcat(msg, "SG_ERR_DID_BUS_BUSY", MAXERRDETAIL);
597 strlcat(msg, "SG_ERR_DID_TIME_OUT", MAXERRDETAIL);
600 strlcat(msg, "SG_ERR_DID_BAD_TARGET", MAXERRDETAIL);
603 strlcat(msg, "SG_ERR_DID_ABORT", MAXERRDETAIL);
606 strlcat(msg, "SG_ERR_DID_PARITY", MAXERRDETAIL);
609 strlcat(msg, "SG_ERR_DID_ERROR (internal error)", MAXERRDETAIL);
612 strlcat(msg, "SG_ERR_DID_RESET", MAXERRDETAIL);
615 strlcat(msg, "SG_ERR_DID_BAD_INTR (unexpected)", MAXERRDETAIL);
618 strlcat(msg, "SG_ERR_DID_PASSTHROUGH", MAXERRDETAIL);
621 strlcat(msg, "SG_ERR_DID_SOFT_ERROR (driver retry?)", MAXERRDETAIL);
624 strlcat(msg, "SG_ERR_DID_IMM_RETRY", MAXERRDETAIL);
627 strlcat(msg, "SG_ERR_DID_REQUEUE", MAXERRDETAIL);
630 strlcat(msg, "SG_ERR_DID_TRANSPORT_DISRUPTED", MAXERRDETAIL);
633 strlcat(msg, "SG_ERR_DID_TRANSPORT_FAILFAST", MAXERRDETAIL);
636 strlcat(msg, "SG_ERR_DID_TARGET_FAILURE", MAXERRDETAIL);
639 strlcat(msg, "SG_ERR_DID_NEXUS_FAILURE", MAXERRDETAIL);
642 strlcat(msg, "SG_ERR_DID_ALLOC_FAILURE", MAXERRDETAIL);
645 strlcat(msg, "SG_ERR_DID_MEDIUM_ERROR", MAXERRDETAIL);
648 strlcat(msg, "Unknown", MAXERRDETAIL);
651 strlcat(msg, ". ", MAXERRDETAIL);
653 if (hdr->driver_status) {
654 snprintf(msgchunk, MAXMSGCHUNK, "SG Driver Status: 0x%02x; ", hdr->driver_status);
655 strlcat(msg, msgchunk, MAXERRDETAIL);
656 switch (hdr->driver_status & 0x0F) {
658 strlcat(msg, "SG_ERR_DRIVER_BUSY", MAXERRDETAIL);
661 strlcat(msg, "SG_ERR_DRIVER_SOFT", MAXERRDETAIL);
664 strlcat(msg, "SG_ERR_DRIVER_MEDIA", MAXERRDETAIL);
667 strlcat(msg, "SG_ERR_DRIVER_ERROR", MAXERRDETAIL);
670 strlcat(msg, "SG_ERR_DRIVER_INVALID", MAXERRDETAIL);
673 strlcat(msg, "SG_ERR_DRIVER_TIMEOUT", MAXERRDETAIL);
676 strlcat(msg, "SG_ERR_DRIVER_HARD", MAXERRDETAIL);
679 strlcat(msg, "SG_ERR_DRIVER_SENSE", MAXERRDETAIL);
682 strlcat(msg, "Unknown", MAXERRDETAIL);
685 strlcat(msg, "; ", MAXERRDETAIL);
686 switch (hdr->driver_status & 0xF0) {
688 strlcat(msg, "SG_ERR_SUGGEST_RETRY", MAXERRDETAIL);
691 strlcat(msg, "SG_ERR_SUGGEST_ABORT", MAXERRDETAIL);
694 strlcat(msg, "SG_ERR_SUGGEST_REMAP", MAXERRDETAIL);
697 strlcat(msg, "SG_ERR_SUGGEST_DIE", MAXERRDETAIL);
700 strlcat(msg, "SG_ERR_SUGGEST_SENSE", MAXERRDETAIL);
703 strlcat(msg, ". ", MAXERRDETAIL);
706 snprintf(msgchunk, MAXMSGCHUNK, "SG SCSI Status: 0x%02x; ", hdr->status);
707 strlcat(msg, msgchunk, MAXERRDETAIL);
708 // SCSI 3 status codes
709 switch (hdr->status) {
711 strlcat(msg, "CHECK_CONDITION", MAXERRDETAIL);
714 strlcat(msg, "CONDITION_MET", MAXERRDETAIL);
717 strlcat(msg, "BUSY", MAXERRDETAIL);
720 strlcat(msg, "INTERMEDIATE", MAXERRDETAIL);
723 strlcat(msg, "INTERMEDIATE_CONDITION_MET", MAXERRDETAIL);
726 strlcat(msg, "RESERVATION_CONFLICT", MAXERRDETAIL);
729 strlcat(msg, "COMMAND_TERMINATED", MAXERRDETAIL);
732 strlcat(msg, "TASK_SET_FULL", MAXERRDETAIL);
735 strlcat(msg, "ACA_ACTIVE", MAXERRDETAIL);
738 strlcat(msg, "TASK_ABORTED", MAXERRDETAIL);
741 strlcat(msg, "Unknown", MAXERRDETAIL);
744 strlcat(msg, ". ", MAXERRDETAIL);
746 if (hdr->sb_len_wr) {
747 snprintf(msgchunk, MAXMSGCHUNK, "Sense Data (%d bytes):", hdr->sb_len_wr);
748 strlcat(msg, msgchunk, MAXERRDETAIL);
749 for (i = 0; i < hdr->sb_len_wr; i++) {
750 snprintf(msgchunk, MAXMSGCHUNK, " %02x", hdr->sbp[i]);
751 strlcat(msg, msgchunk, MAXERRDETAIL);
753 strlcat(msg, ". ", MAXERRDETAIL);
755 if (hdr->resid != 0) {
756 snprintf(msgchunk, MAXMSGCHUNK, "SG Driver: %d bytes out of %d not transferred. ", hdr->resid, hdr->dxfer_len);
757 strlcat(msg, msgchunk, MAXERRDETAIL);
761 if (!(hdr->info & SG_INFO_CHECK) && !strlen(msg))
762 strncpy(msg, "SG Driver did not report a Host, Driver or Device check",
769 * get max file size from read capacity.
771 static int fio_sgio_get_file_size(struct thread_data *td, struct fio_file *f)
774 * get_file_size is being called even before sgio_init is
775 * called, so none of the sg_io structures are
776 * initialized in the thread_data yet. So we need to do the
777 * ReadCapacity without any of those helpers. One of the effects
778 * is that ReadCapacity may get called 4 times on each open:
779 * readcap(10) followed by readcap(16) if needed - just to get
780 * the file size after the init occurs - it will be called
781 * again when "type_check" is called during structure
782 * initialization I'm not sure how to prevent this little
786 unsigned long long max_lba = 0;
789 if (fio_file_size_known(f))
792 if (f->filetype != FIO_TYPE_BLOCK && f->filetype != FIO_TYPE_CHAR) {
793 td_verror(td, EINVAL, "wrong file type");
794 log_err("ioengine sg only works on block or character devices\n");
798 ret = fio_sgio_read_capacity(td, &bs, &max_lba);
800 td_verror(td, td->error, "fio_sgio_read_capacity");
801 log_err("ioengine sg unable to successfully execute read capacity to get block size and maximum lba\n");
805 f->real_file_size = (max_lba + 1) * bs;
806 fio_file_set_size_known(f);
811 static struct ioengine_ops ioengine = {
813 .version = FIO_IOOPS_VERSION,
814 .init = fio_sgio_init,
815 .prep = fio_sgio_prep,
816 .queue = fio_sgio_queue,
817 .getevents = fio_sgio_getevents,
818 .errdetails = fio_sgio_errdetails,
819 .event = fio_sgio_event,
820 .cleanup = fio_sgio_cleanup,
821 .open_file = fio_sgio_open,
822 .close_file = generic_close_file,
823 .get_file_size = fio_sgio_get_file_size,
824 .flags = FIO_SYNCIO | FIO_RAWIO,
827 #else /* FIO_HAVE_SGIO */
830 * When we have a proper configure system in place, we simply wont build
831 * and install this io engine. For now install a crippled version that
832 * just complains and fails to load.
834 static int fio_sgio_init(struct thread_data fio_unused *td)
836 log_err("fio: ioengine sg not available\n");
840 static struct ioengine_ops ioengine = {
842 .version = FIO_IOOPS_VERSION,
843 .init = fio_sgio_init,
848 static void fio_init fio_sgio_register(void)
850 register_ioengine(&ioengine);
853 static void fio_exit fio_sgio_unregister(void)
855 unregister_ioengine(&ioengine);