4 * IO engine that uses the Linux SG v3 interface to talk to SCSI devices
18 #define MAX_10B_LBA 0xFFFFFFFFULL
19 #define SCSI_TIMEOUT_MS 30000 // 30 second timeout; currently no method to override
20 #define MAX_SB 64 // sense block maximum return size
23 unsigned char cdb[16]; // increase to support 16 byte commands
24 unsigned char sb[MAX_SB]; // add sense block to commands
29 struct sgio_cmd *cmds;
39 static void sgio_hdr_init(struct sgio_data *sd, struct sg_io_hdr *hdr,
40 struct io_u *io_u, int fs)
42 struct sgio_cmd *sc = &sd->cmds[io_u->index];
44 memset(hdr, 0, sizeof(*hdr));
45 memset(sc->cdb, 0, sizeof(sc->cdb));
47 hdr->interface_id = 'S';
49 hdr->cmd_len = sizeof(sc->cdb);
51 hdr->mx_sb_len = sizeof(sc->sb);
52 hdr->pack_id = io_u->index;
56 hdr->dxferp = io_u->xfer_buf;
57 hdr->dxfer_len = io_u->xfer_buflen;
61 static int pollin_events(struct pollfd *pfds, int fds)
65 for (i = 0; i < fds; i++)
66 if (pfds[i].revents & POLLIN)
72 static int fio_sgio_getevents(struct thread_data *td, unsigned int min,
74 const struct timespec fio_unused *t)
76 struct sgio_data *sd = td->io_ops->data;
77 int left = max, eventNum, ret, r = 0;
78 void *buf = sd->sgbuf;
79 unsigned int i, events;
83 * Fill in the file descriptors
85 for_each_file(td, f, i) {
87 * don't block for min events == 0
90 sd->fd_flags[i] = fio_set_fd_nonblocking(f->fd, "sg");
94 sd->pfds[i].fd = f->fd;
95 sd->pfds[i].events = POLLIN;
101 dprint(FD_IO, "sgio_getevents: sd %p: left=%d\n", sd, left);
107 ret = poll(sd->pfds, td->o.nr_files, -1);
111 td_verror(td, errno, "poll");
116 if (pollin_events(sd->pfds, td->o.nr_files))
126 for_each_file(td, f, i) {
127 for (eventNum = 0; eventNum < left; eventNum++) {
128 ret = read(f->fd, p, sizeof(struct sg_io_hdr));
129 dprint(FD_IO, "sgio_getevents: ret: %d\n", ret);
132 * not sure if EINTR is needed,
133 * but seems like it should be.
135 if (errno == EAGAIN || errno == EINTR)
138 td_verror(td, errno, "read");
142 events += 1; /* ret / sizeof(struct sg_io_hdr); */
143 dprint(FD_IO, "sgio_getevents: events: %d\n", events);
158 for (i = 0; i < events; i++) {
159 struct sg_io_hdr *hdr = (struct sg_io_hdr *) buf + i;
160 sd->events[i] = hdr->usr_ptr;
162 /* record if an io error occurred, ignore resid */
163 if (hdr->info & SG_INFO_CHECK) {
165 io_u = (struct io_u *)(hdr->usr_ptr);
166 memcpy((void*)&(io_u->hdr), (void*)hdr, sizeof(struct sg_io_hdr));
167 sd->events[i]->error = EIO;
173 for_each_file(td, f, i) {
174 if (sd->fd_flags[i] == -1)
177 if (fcntl(f->fd, F_SETFL, sd->fd_flags[i]) < 0)
178 log_err("fio: sg failed to restore fcntl flags: %s\n", strerror(errno));
185 static int fio_sgio_ioctl_doio(struct thread_data *td,
186 struct fio_file *f, struct io_u *io_u)
188 struct sgio_data *sd = td->io_ops->data;
189 struct sg_io_hdr *hdr = &io_u->hdr;
192 sd->events[0] = io_u;
194 ret = ioctl(f->fd, SG_IO, hdr);
198 /* record if an io error occurred */
199 if (hdr->info & SG_INFO_CHECK)
202 return FIO_Q_COMPLETED;
205 static int fio_sgio_rw_doio(struct fio_file *f, struct io_u *io_u, int do_sync)
207 struct sg_io_hdr *hdr = &io_u->hdr;
210 ret = write(f->fd, hdr, sizeof(*hdr));
215 ret = read(f->fd, hdr, sizeof(*hdr));
219 /* record if an io error occurred */
220 if (hdr->info & SG_INFO_CHECK)
223 return FIO_Q_COMPLETED;
229 static int fio_sgio_doio(struct thread_data *td, struct io_u *io_u, int do_sync)
231 struct fio_file *f = io_u->file;
234 if (f->filetype == FIO_TYPE_BD) {
235 ret = fio_sgio_ioctl_doio(td, f, io_u);
236 td->error = io_u->error;
238 ret = fio_sgio_rw_doio(f, io_u, do_sync);
240 td->error = io_u->error;
246 static int fio_sgio_prep(struct thread_data *td, struct io_u *io_u)
248 struct sg_io_hdr *hdr = &io_u->hdr;
249 struct sgio_data *sd = td->io_ops->data;
250 long long nr_blocks, lba;
252 if (io_u->xfer_buflen & (sd->bs - 1)) {
253 log_err("read/write not sector aligned\n");
257 nr_blocks = io_u->xfer_buflen / sd->bs;
258 lba = io_u->offset / sd->bs;
260 if (io_u->ddir == DDIR_READ) {
261 sgio_hdr_init(sd, hdr, io_u, 1);
263 hdr->dxfer_direction = SG_DXFER_FROM_DEV;
264 if (lba < MAX_10B_LBA)
265 hdr->cmdp[0] = 0x28; // read(10)
267 hdr->cmdp[0] = 0x88; // read(16)
268 } else if (io_u->ddir == DDIR_WRITE) {
269 sgio_hdr_init(sd, hdr, io_u, 1);
271 hdr->dxfer_direction = SG_DXFER_TO_DEV;
272 if (lba < MAX_10B_LBA)
273 hdr->cmdp[0] = 0x2a; // write(10)
275 hdr->cmdp[0] = 0x8a; // write(16)
277 sgio_hdr_init(sd, hdr, io_u, 0);
278 hdr->dxfer_direction = SG_DXFER_NONE;
279 if (lba < MAX_10B_LBA)
280 hdr->cmdp[0] = 0x35; // synccache(10)
282 hdr->cmdp[0] = 0x91; // synccache(16)
286 * for synccache, we leave lba and length to 0 to sync all
289 if (hdr->dxfer_direction != SG_DXFER_NONE) {
291 if (lba < MAX_10B_LBA) {
292 hdr->cmdp[2] = (unsigned char) ((lba >> 24) & 0xff);
293 hdr->cmdp[3] = (unsigned char) ((lba >> 16) & 0xff);
294 hdr->cmdp[4] = (unsigned char) ((lba >> 8) & 0xff);
295 hdr->cmdp[5] = (unsigned char) (lba & 0xff);
296 hdr->cmdp[7] = (unsigned char) ((nr_blocks >> 8) & 0xff);
297 hdr->cmdp[8] = (unsigned char) (nr_blocks & 0xff);
299 hdr->cmdp[2] = (unsigned char) ((lba >> 56) & 0xff);
300 hdr->cmdp[3] = (unsigned char) ((lba >> 48) & 0xff);
301 hdr->cmdp[4] = (unsigned char) ((lba >> 40) & 0xff);
302 hdr->cmdp[5] = (unsigned char) ((lba >> 32) & 0xff);
303 hdr->cmdp[6] = (unsigned char) ((lba >> 24) & 0xff);
304 hdr->cmdp[7] = (unsigned char) ((lba >> 16) & 0xff);
305 hdr->cmdp[8] = (unsigned char) ((lba >> 8) & 0xff);
306 hdr->cmdp[9] = (unsigned char) (lba & 0xff);
307 hdr->cmdp[10] = (unsigned char) ((nr_blocks >> 32) & 0xff);
308 hdr->cmdp[11] = (unsigned char) ((nr_blocks >> 16) & 0xff);
309 hdr->cmdp[12] = (unsigned char) ((nr_blocks >> 8) & 0xff);
310 hdr->cmdp[13] = (unsigned char) (nr_blocks & 0xff);
314 hdr->timeout = SCSI_TIMEOUT_MS;
318 static int fio_sgio_queue(struct thread_data *td, struct io_u *io_u)
320 struct sg_io_hdr *hdr = &io_u->hdr;
321 int ret, do_sync = 0;
323 fio_ro_check(td, io_u);
325 if (td->o.sync_io || td->o.odirect || ddir_sync(io_u->ddir))
328 ret = fio_sgio_doio(td, io_u, do_sync);
332 else if (hdr->status) {
333 io_u->resid = hdr->resid;
338 td_verror(td, io_u->error, "xfer");
339 return FIO_Q_COMPLETED;
345 static struct io_u *fio_sgio_event(struct thread_data *td, int event)
347 struct sgio_data *sd = td->io_ops->data;
349 return sd->events[event];
352 static int fio_sgio_read_capacity(struct thread_data *td, unsigned int *bs,
353 unsigned long long *max_lba)
356 * need to do read capacity operation w/o benefit of sd or
357 * io_u structures, which are not initialized until later.
359 struct sg_io_hdr hdr;
360 unsigned char cmd[16];
361 unsigned char sb[64];
362 unsigned char buf[32]; // read capacity return
366 struct fio_file *f = td->files[0];
368 /* open file independent of rest of application */
369 fd = open(f->file_name, O_RDONLY);
373 memset(&hdr, 0, sizeof(hdr));
374 memset(cmd, 0, sizeof(cmd));
375 memset(sb, 0, sizeof(sb));
376 memset(buf, 0, sizeof(buf));
378 /* First let's try a 10 byte read capacity. */
379 hdr.interface_id = 'S';
383 hdr.mx_sb_len = sizeof(sb);
384 hdr.timeout = SCSI_TIMEOUT_MS;
385 hdr.cmdp[0] = 0x25; // Read Capacity(10)
386 hdr.dxfer_direction = SG_DXFER_FROM_DEV;
388 hdr.dxfer_len = sizeof(buf);
390 ret = ioctl(fd, SG_IO, &hdr);
396 *bs = (buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | buf[7];
397 *max_lba = ((buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3]) & 0x00000000FFFFFFFFULL; // for some reason max_lba is being sign extended even though unsigned.
401 * If max lba is 0xFFFFFFFF, then need to retry with
402 * 16 byteread capacity
404 if (*max_lba == MAX_10B_LBA) {
406 hdr.cmdp[0] = 0x9e; // Read Capacity(16)
407 hdr.cmdp[1] = 0x10; // service action
408 hdr.cmdp[10] = (unsigned char) ((sizeof(buf) >> 24) & 0xff);
409 hdr.cmdp[11] = (unsigned char) ((sizeof(buf) >> 16) & 0xff);
410 hdr.cmdp[12] = (unsigned char) ((sizeof(buf) >> 8) & 0xff);
411 hdr.cmdp[13] = (unsigned char) (sizeof(buf) & 0xff);
413 hdr.dxfer_direction = SG_DXFER_FROM_DEV;
415 hdr.dxfer_len = sizeof(buf);
417 ret = ioctl(fd, SG_IO, &hdr);
423 /* record if an io error occurred */
424 if (hdr.info & SG_INFO_CHECK)
425 td_verror(td, EIO, "fio_sgio_read_capacity");
427 *bs = (buf[8] << 24) | (buf[9] << 16) | (buf[10] << 8) | buf[11];
428 *max_lba = ((unsigned long long)buf[0] << 56) |
429 ((unsigned long long)buf[1] << 48) |
430 ((unsigned long long)buf[2] << 40) |
431 ((unsigned long long)buf[3] << 32) |
432 ((unsigned long long)buf[4] << 24) |
433 ((unsigned long long)buf[5] << 16) |
434 ((unsigned long long)buf[6] << 8) |
435 (unsigned long long)buf[7];
442 static void fio_sgio_cleanup(struct thread_data *td)
444 struct sgio_data *sd = td->io_ops->data;
456 static int fio_sgio_init(struct thread_data *td)
458 struct sgio_data *sd;
460 sd = malloc(sizeof(*sd));
461 memset(sd, 0, sizeof(*sd));
462 sd->cmds = malloc(td->o.iodepth * sizeof(struct sgio_cmd));
463 memset(sd->cmds, 0, td->o.iodepth * sizeof(struct sgio_cmd));
464 sd->events = malloc(td->o.iodepth * sizeof(struct io_u *));
465 memset(sd->events, 0, td->o.iodepth * sizeof(struct io_u *));
466 sd->pfds = malloc(sizeof(struct pollfd) * td->o.nr_files);
467 memset(sd->pfds, 0, sizeof(struct pollfd) * td->o.nr_files);
468 sd->fd_flags = malloc(sizeof(int) * td->o.nr_files);
469 memset(sd->fd_flags, 0, sizeof(int) * td->o.nr_files);
470 sd->sgbuf = malloc(sizeof(struct sg_io_hdr) * td->o.iodepth);
471 memset(sd->sgbuf, 0, sizeof(struct sg_io_hdr) * td->o.iodepth);
472 sd->type_checked = 0;
473 td->io_ops->data = sd;
476 * we want to do it, regardless of whether odirect is set or not
478 td->o.override_sync = 1;
482 static int fio_sgio_type_check(struct thread_data *td, struct fio_file *f)
484 struct sgio_data *sd = td->io_ops->data;
486 unsigned long long max_lba = 0;
489 if (f->filetype == FIO_TYPE_BD) {
490 if (ioctl(f->fd, BLKSSZGET, &bs) < 0) {
491 td_verror(td, errno, "ioctl");
494 } else if (f->filetype == FIO_TYPE_CHAR) {
497 if (ioctl(f->fd, SG_GET_VERSION_NUM, &version) < 0) {
498 td_verror(td, errno, "ioctl");
502 ret = fio_sgio_read_capacity(td, &bs, &max_lba);
504 td_verror(td, td->error, "fio_sgio_read_capacity");
505 log_err("ioengine sg unable to read capacity successfully\n");
509 td_verror(td, EINVAL, "wrong file type");
510 log_err("ioengine sg only works on block devices\n");
515 // Determine size of commands needed based on max_lba
516 sd->max_lba = max_lba;
517 if (max_lba > MAX_10B_LBA) {
518 dprint(FD_IO, "sgio_type_check: using 16 byte operations: max_lba = 0x%016llx\n", max_lba);
522 if (f->filetype == FIO_TYPE_BD) {
523 td->io_ops->getevents = NULL;
524 td->io_ops->event = NULL;
526 sd->type_checked = 1;
531 static int fio_sgio_open(struct thread_data *td, struct fio_file *f)
533 struct sgio_data *sd = td->io_ops->data;
536 ret = generic_open_file(td, f);
540 if (sd && !sd->type_checked && fio_sgio_type_check(td, f)) {
541 ret = generic_close_file(td, f);
549 * Build an error string with details about the driver, host or scsi
550 * error contained in the sg header Caller will use as necessary.
552 static char *fio_sgio_errdetails(struct io_u *io_u)
554 struct sg_io_hdr *hdr = &io_u->hdr;
555 #define MAXERRDETAIL 1024
556 #define MAXMSGCHUNK 128
557 char *msg, msgchunk[MAXMSGCHUNK], *ret = NULL;
560 msg = calloc(MAXERRDETAIL, 1);
563 * can't seem to find sg_err.h, so I'll just echo the define values
564 * so others can search on internet to find clearer clues of meaning.
566 if (hdr->info & SG_INFO_CHECK) {
568 if (hdr->host_status) {
569 snprintf(msgchunk, MAXMSGCHUNK, "SG Host Status: 0x%02x; ", hdr->host_status);
570 strlcat(msg, msgchunk, MAXERRDETAIL);
571 switch (hdr->host_status) {
573 strlcat(msg, "SG_ERR_DID_NO_CONNECT", MAXERRDETAIL);
576 strlcat(msg, "SG_ERR_DID_BUS_BUSY", MAXERRDETAIL);
579 strlcat(msg, "SG_ERR_DID_TIME_OUT", MAXERRDETAIL);
582 strlcat(msg, "SG_ERR_DID_BAD_TARGET", MAXERRDETAIL);
585 strlcat(msg, "SG_ERR_DID_ABORT", MAXERRDETAIL);
588 strlcat(msg, "SG_ERR_DID_PARITY", MAXERRDETAIL);
591 strlcat(msg, "SG_ERR_DID_ERROR (internal error)", MAXERRDETAIL);
594 strlcat(msg, "SG_ERR_DID_RESET", MAXERRDETAIL);
597 strlcat(msg, "SG_ERR_DID_BAD_INTR (unexpected)", MAXERRDETAIL);
600 strlcat(msg, "SG_ERR_DID_PASSTHROUGH", MAXERRDETAIL);
603 strlcat(msg, "SG_ERR_DID_SOFT_ERROR (driver retry?)", MAXERRDETAIL);
606 strlcat(msg, "SG_ERR_DID_IMM_RETRY", MAXERRDETAIL);
609 strlcat(msg, "SG_ERR_DID_REQUEUE", MAXERRDETAIL);
612 strlcat(msg, "Unknown", MAXERRDETAIL);
615 strlcat(msg, ". ", MAXERRDETAIL);
617 if (hdr->driver_status) {
618 snprintf(msgchunk, MAXMSGCHUNK, "SG Driver Status: 0x%02x; ", hdr->driver_status);
619 strlcat(msg, msgchunk, MAXERRDETAIL);
620 switch (hdr->driver_status & 0x0F) {
622 strlcat(msg, "SG_ERR_DRIVER_BUSY", MAXERRDETAIL);
625 strlcat(msg, "SG_ERR_DRIVER_SOFT", MAXERRDETAIL);
628 strlcat(msg, "SG_ERR_DRIVER_MEDIA", MAXERRDETAIL);
631 strlcat(msg, "SG_ERR_DRIVER_ERROR", MAXERRDETAIL);
634 strlcat(msg, "SG_ERR_DRIVER_INVALID", MAXERRDETAIL);
637 strlcat(msg, "SG_ERR_DRIVER_TIMEOUT", MAXERRDETAIL);
640 strlcat(msg, "SG_ERR_DRIVER_HARD", MAXERRDETAIL);
643 strlcat(msg, "SG_ERR_DRIVER_SENSE", MAXERRDETAIL);
646 strlcat(msg, "Unknown", MAXERRDETAIL);
649 strlcat(msg, "; ", MAXERRDETAIL);
650 switch (hdr->driver_status & 0xF0) {
652 strlcat(msg, "SG_ERR_SUGGEST_RETRY", MAXERRDETAIL);
655 strlcat(msg, "SG_ERR_SUGGEST_ABORT", MAXERRDETAIL);
658 strlcat(msg, "SG_ERR_SUGGEST_REMAP", MAXERRDETAIL);
661 strlcat(msg, "SG_ERR_SUGGEST_DIE", MAXERRDETAIL);
664 strlcat(msg, "SG_ERR_SUGGEST_SENSE", MAXERRDETAIL);
667 strlcat(msg, ". ", MAXERRDETAIL);
670 snprintf(msgchunk, MAXMSGCHUNK, "SG SCSI Status: 0x%02x; ", hdr->status);
671 strlcat(msg, msgchunk, MAXERRDETAIL);
672 // SCSI 3 status codes
673 switch (hdr->status) {
675 strlcat(msg, "CHECK_CONDITION", MAXERRDETAIL);
678 strlcat(msg, "CONDITION_MET", MAXERRDETAIL);
681 strlcat(msg, "BUSY", MAXERRDETAIL);
684 strlcat(msg, "INTERMEDIATE", MAXERRDETAIL);
687 strlcat(msg, "INTERMEDIATE_CONDITION_MET", MAXERRDETAIL);
690 strlcat(msg, "RESERVATION_CONFLICT", MAXERRDETAIL);
693 strlcat(msg, "COMMAND_TERMINATED", MAXERRDETAIL);
696 strlcat(msg, "TASK_SET_FULL", MAXERRDETAIL);
699 strlcat(msg, "ACA_ACTIVE", MAXERRDETAIL);
702 strlcat(msg, "TASK_ABORTED", MAXERRDETAIL);
705 strlcat(msg, "Unknown", MAXERRDETAIL);
708 strlcat(msg, ". ", MAXERRDETAIL);
710 if (hdr->sb_len_wr) {
711 snprintf(msgchunk, MAXMSGCHUNK, "Sense Data (%d bytes):", hdr->sb_len_wr);
712 strlcat(msg, msgchunk, MAXERRDETAIL);
713 for (i = 0; i < hdr->sb_len_wr; i++) {
714 snprintf(msgchunk, MAXMSGCHUNK, " %02x", hdr->sbp[i]);
715 strlcat(msg, msgchunk, MAXERRDETAIL);
717 strlcat(msg, ". ", MAXERRDETAIL);
719 if (hdr->resid != 0) {
720 snprintf(msgchunk, MAXMSGCHUNK, "SG Driver: %d bytes out of %d not transferred. ", hdr->resid, hdr->dxfer_len);
721 strlcat(msg, msgchunk, MAXERRDETAIL);
727 ret = strdup("SG Driver did not report a Host, Driver or Device check");
733 * get max file size from read capacity.
735 static int fio_sgio_get_file_size(struct thread_data *td, struct fio_file *f)
738 * get_file_size is being called even before sgio_init is
739 * called, so none of the sg_io structures are
740 * initialized in the thread_data yet. So we need to do the
741 * ReadCapacity without any of those helpers. One of the effects
742 * is that ReadCapacity may get called 4 times on each open:
743 * readcap(10) followed by readcap(16) if needed - just to get
744 * the file size after the init occurs - it will be called
745 * again when "type_check" is called during structure
746 * initialization I'm not sure how to prevent this little
750 unsigned long long max_lba = 0;
753 if (fio_file_size_known(f))
756 ret = fio_sgio_read_capacity(td, &bs, &max_lba);
758 td_verror(td, td->error, "fio_sgio_read_capacity");
759 log_err("ioengine sg unable to successfully execute read capacity to get block size and maximum lba\n");
763 f->real_file_size = (max_lba + 1) * bs;
764 fio_file_set_size_known(f);
769 static struct ioengine_ops ioengine = {
771 .version = FIO_IOOPS_VERSION,
772 .init = fio_sgio_init,
773 .prep = fio_sgio_prep,
774 .queue = fio_sgio_queue,
775 .getevents = fio_sgio_getevents,
776 .errdetails = fio_sgio_errdetails,
777 .event = fio_sgio_event,
778 .cleanup = fio_sgio_cleanup,
779 .open_file = fio_sgio_open,
780 .close_file = generic_close_file,
781 .get_file_size = fio_sgio_get_file_size, // generic_get_file_size
782 .flags = FIO_SYNCIO | FIO_RAWIO,
785 #else /* FIO_HAVE_SGIO */
788 * When we have a proper configure system in place, we simply wont build
789 * and install this io engine. For now install a crippled version that
790 * just complains and fails to load.
792 static int fio_sgio_init(struct thread_data fio_unused *td)
794 log_err("fio: ioengine sg not available\n");
798 static struct ioengine_ops ioengine = {
800 .version = FIO_IOOPS_VERSION,
801 .init = fio_sgio_init,
806 static void fio_init fio_sgio_register(void)
808 register_ioengine(&ioengine);
811 static void fio_exit fio_sgio_unregister(void)
813 unregister_ioengine(&ioengine);