4 * IO engine that uses the Linux SG v3 interface to talk to SCSI devices
18 #define MAX_10B_LBA 0xFFFFFFFFULL
19 #define SCSI_TIMEOUT_MS 30000 // 30 second timeout; currently no method to override
20 #define MAX_SB 64 // sense block maximum return size
23 unsigned char cdb[16]; // increase to support 16 byte commands
24 unsigned char sb[MAX_SB]; // add sense block to commands
29 struct sgio_cmd *cmds;
38 static void sgio_hdr_init(struct sgio_data *sd, struct sg_io_hdr *hdr,
39 struct io_u *io_u, int fs)
41 struct sgio_cmd *sc = &sd->cmds[io_u->index];
43 memset(hdr, 0, sizeof(*hdr));
44 memset(sc->cdb, 0, sizeof(sc->cdb));
46 hdr->interface_id = 'S';
48 hdr->cmd_len = sizeof(sc->cdb);
50 hdr->mx_sb_len = sizeof(sc->sb);
51 hdr->pack_id = io_u->index;
55 hdr->dxferp = io_u->xfer_buf;
56 hdr->dxfer_len = io_u->xfer_buflen;
60 static int pollin_events(struct pollfd *pfds, int fds)
64 for (i = 0; i < fds; i++)
65 if (pfds[i].revents & POLLIN)
71 static int sg_fd_read(int fd, void *data, size_t size)
78 ret = read(fd, data, size);
80 if (errno == EAGAIN || errno == EINTR)
100 static int fio_sgio_getevents(struct thread_data *td, unsigned int min,
102 const struct timespec fio_unused *t)
104 struct sgio_data *sd = td->io_ops_data;
105 int left = max, eventNum, ret, r = 0;
106 void *buf = sd->sgbuf;
107 unsigned int i, events;
111 * Fill in the file descriptors
113 for_each_file(td, f, i) {
115 * don't block for min events == 0
118 sd->fd_flags[i] = fio_set_fd_nonblocking(f->fd, "sg");
120 sd->fd_flags[i] = -1;
122 sd->pfds[i].fd = f->fd;
123 sd->pfds[i].events = POLLIN;
129 dprint(FD_IO, "sgio_getevents: sd %p: left=%d\n", sd, left);
135 ret = poll(sd->pfds, td->o.nr_files, -1);
139 td_verror(td, errno, "poll");
144 if (pollin_events(sd->pfds, td->o.nr_files))
154 for_each_file(td, f, i) {
155 for (eventNum = 0; eventNum < left; eventNum++) {
156 ret = sg_fd_read(f->fd, p, sizeof(struct sg_io_hdr));
157 dprint(FD_IO, "sgio_getevents: ret: %d\n", ret);
160 td_verror(td, r, "sg_read");
163 p += sizeof(struct sg_io_hdr);
165 dprint(FD_IO, "sgio_getevents: events: %d\n", events);
169 if (r < 0 && !events)
179 for (i = 0; i < events; i++) {
180 struct sg_io_hdr *hdr = (struct sg_io_hdr *) buf + i;
181 sd->events[i] = hdr->usr_ptr;
183 /* record if an io error occurred, ignore resid */
184 if (hdr->info & SG_INFO_CHECK) {
186 io_u = (struct io_u *)(hdr->usr_ptr);
187 memcpy((void*)&(io_u->hdr), (void*)hdr, sizeof(struct sg_io_hdr));
188 sd->events[i]->error = EIO;
194 for_each_file(td, f, i) {
195 if (sd->fd_flags[i] == -1)
198 if (fcntl(f->fd, F_SETFL, sd->fd_flags[i]) < 0)
199 log_err("fio: sg failed to restore fcntl flags: %s\n", strerror(errno));
206 static int fio_sgio_ioctl_doio(struct thread_data *td,
207 struct fio_file *f, struct io_u *io_u)
209 struct sgio_data *sd = td->io_ops_data;
210 struct sg_io_hdr *hdr = &io_u->hdr;
213 sd->events[0] = io_u;
215 ret = ioctl(f->fd, SG_IO, hdr);
219 /* record if an io error occurred */
220 if (hdr->info & SG_INFO_CHECK)
223 return FIO_Q_COMPLETED;
226 static int fio_sgio_rw_doio(struct fio_file *f, struct io_u *io_u, int do_sync)
228 struct sg_io_hdr *hdr = &io_u->hdr;
231 ret = write(f->fd, hdr, sizeof(*hdr));
236 ret = read(f->fd, hdr, sizeof(*hdr));
240 /* record if an io error occurred */
241 if (hdr->info & SG_INFO_CHECK)
244 return FIO_Q_COMPLETED;
250 static int fio_sgio_doio(struct thread_data *td, struct io_u *io_u, int do_sync)
252 struct fio_file *f = io_u->file;
255 if (f->filetype == FIO_TYPE_BD) {
256 ret = fio_sgio_ioctl_doio(td, f, io_u);
257 td->error = io_u->error;
259 ret = fio_sgio_rw_doio(f, io_u, do_sync);
261 td->error = io_u->error;
267 static int fio_sgio_prep(struct thread_data *td, struct io_u *io_u)
269 struct sg_io_hdr *hdr = &io_u->hdr;
270 struct sgio_data *sd = td->io_ops_data;
271 long long nr_blocks, lba;
273 if (io_u->xfer_buflen & (sd->bs - 1)) {
274 log_err("read/write not sector aligned\n");
278 nr_blocks = io_u->xfer_buflen / sd->bs;
279 lba = io_u->offset / sd->bs;
281 if (io_u->ddir == DDIR_READ) {
282 sgio_hdr_init(sd, hdr, io_u, 1);
284 hdr->dxfer_direction = SG_DXFER_FROM_DEV;
285 if (lba < MAX_10B_LBA)
286 hdr->cmdp[0] = 0x28; // read(10)
288 hdr->cmdp[0] = 0x88; // read(16)
289 } else if (io_u->ddir == DDIR_WRITE) {
290 sgio_hdr_init(sd, hdr, io_u, 1);
292 hdr->dxfer_direction = SG_DXFER_TO_DEV;
293 if (lba < MAX_10B_LBA)
294 hdr->cmdp[0] = 0x2a; // write(10)
296 hdr->cmdp[0] = 0x8a; // write(16)
298 sgio_hdr_init(sd, hdr, io_u, 0);
299 hdr->dxfer_direction = SG_DXFER_NONE;
300 if (lba < MAX_10B_LBA)
301 hdr->cmdp[0] = 0x35; // synccache(10)
303 hdr->cmdp[0] = 0x91; // synccache(16)
307 * for synccache, we leave lba and length to 0 to sync all
310 if (hdr->dxfer_direction != SG_DXFER_NONE) {
312 if (lba < MAX_10B_LBA) {
313 hdr->cmdp[2] = (unsigned char) ((lba >> 24) & 0xff);
314 hdr->cmdp[3] = (unsigned char) ((lba >> 16) & 0xff);
315 hdr->cmdp[4] = (unsigned char) ((lba >> 8) & 0xff);
316 hdr->cmdp[5] = (unsigned char) (lba & 0xff);
317 hdr->cmdp[7] = (unsigned char) ((nr_blocks >> 8) & 0xff);
318 hdr->cmdp[8] = (unsigned char) (nr_blocks & 0xff);
320 hdr->cmdp[2] = (unsigned char) ((lba >> 56) & 0xff);
321 hdr->cmdp[3] = (unsigned char) ((lba >> 48) & 0xff);
322 hdr->cmdp[4] = (unsigned char) ((lba >> 40) & 0xff);
323 hdr->cmdp[5] = (unsigned char) ((lba >> 32) & 0xff);
324 hdr->cmdp[6] = (unsigned char) ((lba >> 24) & 0xff);
325 hdr->cmdp[7] = (unsigned char) ((lba >> 16) & 0xff);
326 hdr->cmdp[8] = (unsigned char) ((lba >> 8) & 0xff);
327 hdr->cmdp[9] = (unsigned char) (lba & 0xff);
328 hdr->cmdp[10] = (unsigned char) ((nr_blocks >> 32) & 0xff);
329 hdr->cmdp[11] = (unsigned char) ((nr_blocks >> 16) & 0xff);
330 hdr->cmdp[12] = (unsigned char) ((nr_blocks >> 8) & 0xff);
331 hdr->cmdp[13] = (unsigned char) (nr_blocks & 0xff);
335 hdr->timeout = SCSI_TIMEOUT_MS;
339 static int fio_sgio_queue(struct thread_data *td, struct io_u *io_u)
341 struct sg_io_hdr *hdr = &io_u->hdr;
342 int ret, do_sync = 0;
344 fio_ro_check(td, io_u);
346 if (td->o.sync_io || td->o.odirect || ddir_sync(io_u->ddir))
349 ret = fio_sgio_doio(td, io_u, do_sync);
353 else if (hdr->status) {
354 io_u->resid = hdr->resid;
359 td_verror(td, io_u->error, "xfer");
360 return FIO_Q_COMPLETED;
366 static struct io_u *fio_sgio_event(struct thread_data *td, int event)
368 struct sgio_data *sd = td->io_ops_data;
370 return sd->events[event];
373 static int fio_sgio_read_capacity(struct thread_data *td, unsigned int *bs,
374 unsigned long long *max_lba)
377 * need to do read capacity operation w/o benefit of sd or
378 * io_u structures, which are not initialized until later.
380 struct sg_io_hdr hdr;
381 unsigned char cmd[16];
382 unsigned char sb[64];
383 unsigned char buf[32]; // read capacity return
387 struct fio_file *f = td->files[0];
389 /* open file independent of rest of application */
390 fd = open(f->file_name, O_RDONLY);
394 memset(&hdr, 0, sizeof(hdr));
395 memset(cmd, 0, sizeof(cmd));
396 memset(sb, 0, sizeof(sb));
397 memset(buf, 0, sizeof(buf));
399 /* First let's try a 10 byte read capacity. */
400 hdr.interface_id = 'S';
404 hdr.mx_sb_len = sizeof(sb);
405 hdr.timeout = SCSI_TIMEOUT_MS;
406 hdr.cmdp[0] = 0x25; // Read Capacity(10)
407 hdr.dxfer_direction = SG_DXFER_FROM_DEV;
409 hdr.dxfer_len = sizeof(buf);
411 ret = ioctl(fd, SG_IO, &hdr);
417 *bs = (buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | buf[7];
418 *max_lba = ((buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3]) & 0x00000000FFFFFFFFULL; // for some reason max_lba is being sign extended even though unsigned.
422 * If max lba is 0xFFFFFFFF, then need to retry with
423 * 16 byteread capacity
425 if (*max_lba == MAX_10B_LBA) {
427 hdr.cmdp[0] = 0x9e; // service action
428 hdr.cmdp[1] = 0x10; // Read Capacity(16)
429 hdr.cmdp[10] = (unsigned char) ((sizeof(buf) >> 24) & 0xff);
430 hdr.cmdp[11] = (unsigned char) ((sizeof(buf) >> 16) & 0xff);
431 hdr.cmdp[12] = (unsigned char) ((sizeof(buf) >> 8) & 0xff);
432 hdr.cmdp[13] = (unsigned char) (sizeof(buf) & 0xff);
434 hdr.dxfer_direction = SG_DXFER_FROM_DEV;
436 hdr.dxfer_len = sizeof(buf);
438 ret = ioctl(fd, SG_IO, &hdr);
444 /* record if an io error occurred */
445 if (hdr.info & SG_INFO_CHECK)
446 td_verror(td, EIO, "fio_sgio_read_capacity");
448 *bs = (buf[8] << 24) | (buf[9] << 16) | (buf[10] << 8) | buf[11];
449 *max_lba = ((unsigned long long)buf[0] << 56) |
450 ((unsigned long long)buf[1] << 48) |
451 ((unsigned long long)buf[2] << 40) |
452 ((unsigned long long)buf[3] << 32) |
453 ((unsigned long long)buf[4] << 24) |
454 ((unsigned long long)buf[5] << 16) |
455 ((unsigned long long)buf[6] << 8) |
456 (unsigned long long)buf[7];
463 static void fio_sgio_cleanup(struct thread_data *td)
465 struct sgio_data *sd = td->io_ops_data;
477 static int fio_sgio_init(struct thread_data *td)
479 struct sgio_data *sd;
481 sd = malloc(sizeof(*sd));
482 memset(sd, 0, sizeof(*sd));
483 sd->cmds = malloc(td->o.iodepth * sizeof(struct sgio_cmd));
484 memset(sd->cmds, 0, td->o.iodepth * sizeof(struct sgio_cmd));
485 sd->events = malloc(td->o.iodepth * sizeof(struct io_u *));
486 memset(sd->events, 0, td->o.iodepth * sizeof(struct io_u *));
487 sd->pfds = malloc(sizeof(struct pollfd) * td->o.nr_files);
488 memset(sd->pfds, 0, sizeof(struct pollfd) * td->o.nr_files);
489 sd->fd_flags = malloc(sizeof(int) * td->o.nr_files);
490 memset(sd->fd_flags, 0, sizeof(int) * td->o.nr_files);
491 sd->sgbuf = malloc(sizeof(struct sg_io_hdr) * td->o.iodepth);
492 memset(sd->sgbuf, 0, sizeof(struct sg_io_hdr) * td->o.iodepth);
493 sd->type_checked = 0;
494 td->io_ops_data = sd;
497 * we want to do it, regardless of whether odirect is set or not
499 td->o.override_sync = 1;
503 static int fio_sgio_type_check(struct thread_data *td, struct fio_file *f)
505 struct sgio_data *sd = td->io_ops_data;
507 unsigned long long max_lba = 0;
510 if (f->filetype == FIO_TYPE_BD) {
511 if (ioctl(f->fd, BLKSSZGET, &bs) < 0) {
512 td_verror(td, errno, "ioctl");
515 } else if (f->filetype == FIO_TYPE_CHAR) {
518 if (ioctl(f->fd, SG_GET_VERSION_NUM, &version) < 0) {
519 td_verror(td, errno, "ioctl");
523 ret = fio_sgio_read_capacity(td, &bs, &max_lba);
525 td_verror(td, td->error, "fio_sgio_read_capacity");
526 log_err("ioengine sg unable to read capacity successfully\n");
530 td_verror(td, EINVAL, "wrong file type");
531 log_err("ioengine sg only works on block devices\n");
536 // Determine size of commands needed based on max_lba
537 if (max_lba >= MAX_10B_LBA) {
538 dprint(FD_IO, "sgio_type_check: using 16 byte read/write "
539 "commands for lba above 0x%016llx/0x%016llx\n",
540 MAX_10B_LBA, max_lba);
544 if (f->filetype == FIO_TYPE_BD) {
545 td->io_ops->getevents = NULL;
546 td->io_ops->event = NULL;
548 sd->type_checked = 1;
553 static int fio_sgio_open(struct thread_data *td, struct fio_file *f)
555 struct sgio_data *sd = td->io_ops_data;
558 ret = generic_open_file(td, f);
562 if (sd && !sd->type_checked && fio_sgio_type_check(td, f)) {
563 ret = generic_close_file(td, f);
571 * Build an error string with details about the driver, host or scsi
572 * error contained in the sg header Caller will use as necessary.
574 static char *fio_sgio_errdetails(struct io_u *io_u)
576 struct sg_io_hdr *hdr = &io_u->hdr;
577 #define MAXERRDETAIL 1024
578 #define MAXMSGCHUNK 128
579 char *msg, msgchunk[MAXMSGCHUNK], *ret = NULL;
582 msg = calloc(1, MAXERRDETAIL);
585 * can't seem to find sg_err.h, so I'll just echo the define values
586 * so others can search on internet to find clearer clues of meaning.
588 if (hdr->info & SG_INFO_CHECK) {
590 if (hdr->host_status) {
591 snprintf(msgchunk, MAXMSGCHUNK, "SG Host Status: 0x%02x; ", hdr->host_status);
592 strlcat(msg, msgchunk, MAXERRDETAIL);
593 switch (hdr->host_status) {
595 strlcat(msg, "SG_ERR_DID_NO_CONNECT", MAXERRDETAIL);
598 strlcat(msg, "SG_ERR_DID_BUS_BUSY", MAXERRDETAIL);
601 strlcat(msg, "SG_ERR_DID_TIME_OUT", MAXERRDETAIL);
604 strlcat(msg, "SG_ERR_DID_BAD_TARGET", MAXERRDETAIL);
607 strlcat(msg, "SG_ERR_DID_ABORT", MAXERRDETAIL);
610 strlcat(msg, "SG_ERR_DID_PARITY", MAXERRDETAIL);
613 strlcat(msg, "SG_ERR_DID_ERROR (internal error)", MAXERRDETAIL);
616 strlcat(msg, "SG_ERR_DID_RESET", MAXERRDETAIL);
619 strlcat(msg, "SG_ERR_DID_BAD_INTR (unexpected)", MAXERRDETAIL);
622 strlcat(msg, "SG_ERR_DID_PASSTHROUGH", MAXERRDETAIL);
625 strlcat(msg, "SG_ERR_DID_SOFT_ERROR (driver retry?)", MAXERRDETAIL);
628 strlcat(msg, "SG_ERR_DID_IMM_RETRY", MAXERRDETAIL);
631 strlcat(msg, "SG_ERR_DID_REQUEUE", MAXERRDETAIL);
634 strlcat(msg, "SG_ERR_DID_TRANSPORT_DISRUPTED", MAXERRDETAIL);
637 strlcat(msg, "SG_ERR_DID_TRANSPORT_FAILFAST", MAXERRDETAIL);
640 strlcat(msg, "SG_ERR_DID_TARGET_FAILURE", MAXERRDETAIL);
643 strlcat(msg, "SG_ERR_DID_NEXUS_FAILURE", MAXERRDETAIL);
646 strlcat(msg, "SG_ERR_DID_ALLOC_FAILURE", MAXERRDETAIL);
649 strlcat(msg, "SG_ERR_DID_MEDIUM_ERROR", MAXERRDETAIL);
652 strlcat(msg, "Unknown", MAXERRDETAIL);
655 strlcat(msg, ". ", MAXERRDETAIL);
657 if (hdr->driver_status) {
658 snprintf(msgchunk, MAXMSGCHUNK, "SG Driver Status: 0x%02x; ", hdr->driver_status);
659 strlcat(msg, msgchunk, MAXERRDETAIL);
660 switch (hdr->driver_status & 0x0F) {
662 strlcat(msg, "SG_ERR_DRIVER_BUSY", MAXERRDETAIL);
665 strlcat(msg, "SG_ERR_DRIVER_SOFT", MAXERRDETAIL);
668 strlcat(msg, "SG_ERR_DRIVER_MEDIA", MAXERRDETAIL);
671 strlcat(msg, "SG_ERR_DRIVER_ERROR", MAXERRDETAIL);
674 strlcat(msg, "SG_ERR_DRIVER_INVALID", MAXERRDETAIL);
677 strlcat(msg, "SG_ERR_DRIVER_TIMEOUT", MAXERRDETAIL);
680 strlcat(msg, "SG_ERR_DRIVER_HARD", MAXERRDETAIL);
683 strlcat(msg, "SG_ERR_DRIVER_SENSE", MAXERRDETAIL);
686 strlcat(msg, "Unknown", MAXERRDETAIL);
689 strlcat(msg, "; ", MAXERRDETAIL);
690 switch (hdr->driver_status & 0xF0) {
692 strlcat(msg, "SG_ERR_SUGGEST_RETRY", MAXERRDETAIL);
695 strlcat(msg, "SG_ERR_SUGGEST_ABORT", MAXERRDETAIL);
698 strlcat(msg, "SG_ERR_SUGGEST_REMAP", MAXERRDETAIL);
701 strlcat(msg, "SG_ERR_SUGGEST_DIE", MAXERRDETAIL);
704 strlcat(msg, "SG_ERR_SUGGEST_SENSE", MAXERRDETAIL);
707 strlcat(msg, ". ", MAXERRDETAIL);
710 snprintf(msgchunk, MAXMSGCHUNK, "SG SCSI Status: 0x%02x; ", hdr->status);
711 strlcat(msg, msgchunk, MAXERRDETAIL);
712 // SCSI 3 status codes
713 switch (hdr->status) {
715 strlcat(msg, "CHECK_CONDITION", MAXERRDETAIL);
718 strlcat(msg, "CONDITION_MET", MAXERRDETAIL);
721 strlcat(msg, "BUSY", MAXERRDETAIL);
724 strlcat(msg, "INTERMEDIATE", MAXERRDETAIL);
727 strlcat(msg, "INTERMEDIATE_CONDITION_MET", MAXERRDETAIL);
730 strlcat(msg, "RESERVATION_CONFLICT", MAXERRDETAIL);
733 strlcat(msg, "COMMAND_TERMINATED", MAXERRDETAIL);
736 strlcat(msg, "TASK_SET_FULL", MAXERRDETAIL);
739 strlcat(msg, "ACA_ACTIVE", MAXERRDETAIL);
742 strlcat(msg, "TASK_ABORTED", MAXERRDETAIL);
745 strlcat(msg, "Unknown", MAXERRDETAIL);
748 strlcat(msg, ". ", MAXERRDETAIL);
750 if (hdr->sb_len_wr) {
751 snprintf(msgchunk, MAXMSGCHUNK, "Sense Data (%d bytes):", hdr->sb_len_wr);
752 strlcat(msg, msgchunk, MAXERRDETAIL);
753 for (i = 0; i < hdr->sb_len_wr; i++) {
754 snprintf(msgchunk, MAXMSGCHUNK, " %02x", hdr->sbp[i]);
755 strlcat(msg, msgchunk, MAXERRDETAIL);
757 strlcat(msg, ". ", MAXERRDETAIL);
759 if (hdr->resid != 0) {
760 snprintf(msgchunk, MAXMSGCHUNK, "SG Driver: %d bytes out of %d not transferred. ", hdr->resid, hdr->dxfer_len);
761 strlcat(msg, msgchunk, MAXERRDETAIL);
767 ret = strdup("SG Driver did not report a Host, Driver or Device check");
773 * get max file size from read capacity.
775 static int fio_sgio_get_file_size(struct thread_data *td, struct fio_file *f)
778 * get_file_size is being called even before sgio_init is
779 * called, so none of the sg_io structures are
780 * initialized in the thread_data yet. So we need to do the
781 * ReadCapacity without any of those helpers. One of the effects
782 * is that ReadCapacity may get called 4 times on each open:
783 * readcap(10) followed by readcap(16) if needed - just to get
784 * the file size after the init occurs - it will be called
785 * again when "type_check" is called during structure
786 * initialization I'm not sure how to prevent this little
790 unsigned long long max_lba = 0;
793 if (fio_file_size_known(f))
796 ret = fio_sgio_read_capacity(td, &bs, &max_lba);
798 td_verror(td, td->error, "fio_sgio_read_capacity");
799 log_err("ioengine sg unable to successfully execute read capacity to get block size and maximum lba\n");
803 f->real_file_size = (max_lba + 1) * bs;
804 fio_file_set_size_known(f);
809 static struct ioengine_ops ioengine = {
811 .version = FIO_IOOPS_VERSION,
812 .init = fio_sgio_init,
813 .prep = fio_sgio_prep,
814 .queue = fio_sgio_queue,
815 .getevents = fio_sgio_getevents,
816 .errdetails = fio_sgio_errdetails,
817 .event = fio_sgio_event,
818 .cleanup = fio_sgio_cleanup,
819 .open_file = fio_sgio_open,
820 .close_file = generic_close_file,
821 .get_file_size = fio_sgio_get_file_size, // generic_get_file_size
822 .flags = FIO_SYNCIO | FIO_RAWIO,
825 #else /* FIO_HAVE_SGIO */
828 * When we have a proper configure system in place, we simply wont build
829 * and install this io engine. For now install a crippled version that
830 * just complains and fails to load.
832 static int fio_sgio_init(struct thread_data fio_unused *td)
834 log_err("fio: ioengine sg not available\n");
838 static struct ioengine_ops ioengine = {
840 .version = FIO_IOOPS_VERSION,
841 .init = fio_sgio_init,
846 static void fio_init fio_sgio_register(void)
848 register_ioengine(&ioengine);
851 static void fio_exit fio_sgio_unregister(void)
853 unregister_ioengine(&ioengine);