stat: Avoid freeing null pointer
[fio.git] / engines / sg.c
... / ...
CommitLineData
1/*
2 * sg engine
3 *
4 * IO engine that uses the Linux SG v3 interface to talk to SCSI devices
5 *
6 * This ioengine can operate in two modes:
7 * sync with block devices (/dev/sdX) or
8 * with character devices (/dev/sgY) with direct=1 or sync=1
9 * async with character devices with direct=0 and sync=0
10 *
11 * What value does queue() return for the different cases?
12 * queue() return value
13 * In sync mode:
14 * /dev/sdX RWT FIO_Q_COMPLETED
15 * /dev/sgY RWT FIO_Q_COMPLETED
16 * with direct=1 or sync=1
17 *
18 * In async mode:
19 * /dev/sgY RWT FIO_Q_QUEUED
20 * direct=0 and sync=0
21 *
22 * Because FIO_SYNCIO is set for this ioengine td_io_queue() will fill in
23 * issue_time *before* each IO is sent to queue()
24 *
25 * Where are the IO counting functions called for the different cases?
26 *
27 * In sync mode:
28 * /dev/sdX (commit==NULL)
29 * RWT
30 * io_u_mark_depth() called in td_io_queue()
31 * io_u_mark_submit/complete() called in td_io_queue()
32 * issue_time set in td_io_queue()
33 *
34 * /dev/sgY with direct=1 or sync=1 (commit does nothing)
35 * RWT
36 * io_u_mark_depth() called in td_io_queue()
37 * io_u_mark_submit/complete() called in queue()
38 * issue_time set in td_io_queue()
39 *
40 * In async mode:
41 * /dev/sgY with direct=0 and sync=0
42 * RW: read and write operations are submitted in queue()
43 * io_u_mark_depth() called in td_io_commit()
44 * io_u_mark_submit() called in queue()
45 * issue_time set in td_io_queue()
46 * T: trim operations are queued in queue() and submitted in commit()
47 * io_u_mark_depth() called in td_io_commit()
48 * io_u_mark_submit() called in commit()
49 * issue_time set in commit()
50 *
51 */
52#include <stdio.h>
53#include <stdlib.h>
54#include <unistd.h>
55#include <errno.h>
56#include <poll.h>
57
58#include "../fio.h"
59#include "../optgroup.h"
60
61#ifdef FIO_HAVE_SGIO
62
63#ifndef SGV4_FLAG_HIPRI
64#define SGV4_FLAG_HIPRI 0x800
65#endif
66
67enum {
68 FIO_SG_WRITE = 1,
69 FIO_SG_WRITE_VERIFY = 2,
70 FIO_SG_WRITE_SAME = 3
71};
72
73struct sg_options {
74 void *pad;
75 unsigned int hipri;
76 unsigned int readfua;
77 unsigned int writefua;
78 unsigned int write_mode;
79};
80
81static struct fio_option options[] = {
82 {
83 .name = "hipri",
84 .lname = "High Priority",
85 .type = FIO_OPT_STR_SET,
86 .off1 = offsetof(struct sg_options, hipri),
87 .help = "Use polled IO completions",
88 .category = FIO_OPT_C_ENGINE,
89 .group = FIO_OPT_G_SG,
90 },
91 {
92 .name = "readfua",
93 .lname = "sg engine read fua flag support",
94 .type = FIO_OPT_BOOL,
95 .off1 = offsetof(struct sg_options, readfua),
96 .help = "Set FUA flag (force unit access) for all Read operations",
97 .def = "0",
98 .category = FIO_OPT_C_ENGINE,
99 .group = FIO_OPT_G_SG,
100 },
101 {
102 .name = "writefua",
103 .lname = "sg engine write fua flag support",
104 .type = FIO_OPT_BOOL,
105 .off1 = offsetof(struct sg_options, writefua),
106 .help = "Set FUA flag (force unit access) for all Write operations",
107 .def = "0",
108 .category = FIO_OPT_C_ENGINE,
109 .group = FIO_OPT_G_SG,
110 },
111 {
112 .name = "sg_write_mode",
113 .lname = "specify sg write mode",
114 .type = FIO_OPT_STR,
115 .off1 = offsetof(struct sg_options, write_mode),
116 .help = "Specify SCSI WRITE mode",
117 .def = "write",
118 .posval = {
119 { .ival = "write",
120 .oval = FIO_SG_WRITE,
121 .help = "Issue standard SCSI WRITE commands",
122 },
123 { .ival = "verify",
124 .oval = FIO_SG_WRITE_VERIFY,
125 .help = "Issue SCSI WRITE AND VERIFY commands",
126 },
127 { .ival = "same",
128 .oval = FIO_SG_WRITE_SAME,
129 .help = "Issue SCSI WRITE SAME commands",
130 },
131 },
132 .category = FIO_OPT_C_ENGINE,
133 .group = FIO_OPT_G_SG,
134 },
135 {
136 .name = NULL,
137 },
138};
139
140#define MAX_10B_LBA 0xFFFFFFFFULL
141#define SCSI_TIMEOUT_MS 30000 // 30 second timeout; currently no method to override
142#define MAX_SB 64 // sense block maximum return size
143/*
144#define FIO_SGIO_DEBUG
145*/
146
147struct sgio_cmd {
148 unsigned char cdb[16]; // enhanced from 10 to support 16 byte commands
149 unsigned char sb[MAX_SB]; // add sense block to commands
150 int nr;
151};
152
153struct sgio_trim {
154 uint8_t *unmap_param;
155 unsigned int unmap_range_count;
156 struct io_u **trim_io_us;
157};
158
159struct sgio_data {
160 struct sgio_cmd *cmds;
161 struct io_u **events;
162 struct pollfd *pfds;
163 int *fd_flags;
164 void *sgbuf;
165 unsigned int bs;
166 int type_checked;
167 struct sgio_trim **trim_queues;
168 int current_queue;
169#ifdef FIO_SGIO_DEBUG
170 unsigned int *trim_queue_map;
171#endif
172};
173
174static inline uint32_t sgio_get_be32(uint8_t *buf)
175{
176 return be32_to_cpu(*((uint32_t *) buf));
177}
178
179static inline uint64_t sgio_get_be64(uint8_t *buf)
180{
181 return be64_to_cpu(*((uint64_t *) buf));
182}
183
184static inline void sgio_set_be16(uint16_t val, uint8_t *buf)
185{
186 uint16_t t = cpu_to_be16(val);
187
188 memcpy(buf, &t, sizeof(uint16_t));
189}
190
191static inline void sgio_set_be32(uint32_t val, uint8_t *buf)
192{
193 uint32_t t = cpu_to_be32(val);
194
195 memcpy(buf, &t, sizeof(uint32_t));
196}
197
198static inline void sgio_set_be64(uint64_t val, uint8_t *buf)
199{
200 uint64_t t = cpu_to_be64(val);
201
202 memcpy(buf, &t, sizeof(uint64_t));
203}
204
205static inline bool sgio_unbuffered(struct thread_data *td)
206{
207 return (td->o.odirect || td->o.sync_io);
208}
209
210static void sgio_hdr_init(struct sgio_data *sd, struct sg_io_hdr *hdr,
211 struct io_u *io_u, int fs)
212{
213 struct sgio_cmd *sc = &sd->cmds[io_u->index];
214
215 memset(hdr, 0, sizeof(*hdr));
216 memset(sc->cdb, 0, sizeof(sc->cdb));
217
218 hdr->interface_id = 'S';
219 hdr->cmdp = sc->cdb;
220 hdr->cmd_len = sizeof(sc->cdb);
221 hdr->sbp = sc->sb;
222 hdr->mx_sb_len = sizeof(sc->sb);
223 hdr->pack_id = io_u->index;
224 hdr->usr_ptr = io_u;
225 hdr->timeout = SCSI_TIMEOUT_MS;
226
227 if (fs) {
228 hdr->dxferp = io_u->xfer_buf;
229 hdr->dxfer_len = io_u->xfer_buflen;
230 }
231}
232
233static int pollin_events(struct pollfd *pfds, int fds)
234{
235 int i;
236
237 for (i = 0; i < fds; i++)
238 if (pfds[i].revents & POLLIN)
239 return 1;
240
241 return 0;
242}
243
244static int sg_fd_read(int fd, void *data, size_t size)
245{
246 int err = 0;
247
248 while (size) {
249 ssize_t ret;
250
251 ret = read(fd, data, size);
252 if (ret < 0) {
253 if (errno == EAGAIN || errno == EINTR)
254 continue;
255 err = errno;
256 break;
257 } else if (!ret)
258 break;
259 else {
260 data += ret;
261 size -= ret;
262 }
263 }
264
265 if (err)
266 return err;
267 if (size)
268 return EAGAIN;
269
270 return 0;
271}
272
273static int fio_sgio_getevents(struct thread_data *td, unsigned int min,
274 unsigned int max,
275 const struct timespec fio_unused *t)
276{
277 struct sgio_data *sd = td->io_ops_data;
278 int left = max, eventNum, ret, r = 0, trims = 0;
279 void *buf = sd->sgbuf;
280 unsigned int i, j, events;
281 struct fio_file *f;
282 struct io_u *io_u;
283
284 /*
285 * Fill in the file descriptors
286 */
287 for_each_file(td, f, i) {
288 /*
289 * don't block for min events == 0
290 */
291 if (!min)
292 sd->fd_flags[i] = fio_set_fd_nonblocking(f->fd, "sg");
293 else
294 sd->fd_flags[i] = -1;
295
296 sd->pfds[i].fd = f->fd;
297 sd->pfds[i].events = POLLIN;
298 }
299
300 /*
301 ** There are two counters here:
302 ** - number of SCSI commands completed
303 ** - number of io_us completed
304 **
305 ** These are the same with reads and writes, but
306 ** could differ with trim/unmap commands because
307 ** a single unmap can include multiple io_us
308 */
309
310 while (left > 0) {
311 char *p;
312
313 dprint(FD_IO, "sgio_getevents: sd %p: min=%d, max=%d, left=%d\n", sd, min, max, left);
314
315 do {
316 if (!min)
317 break;
318
319 ret = poll(sd->pfds, td->o.nr_files, -1);
320 if (ret < 0) {
321 if (!r)
322 r = -errno;
323 td_verror(td, errno, "poll");
324 break;
325 } else if (!ret)
326 continue;
327
328 if (pollin_events(sd->pfds, td->o.nr_files))
329 break;
330 } while (1);
331
332 if (r < 0)
333 break;
334
335re_read:
336 p = buf;
337 events = 0;
338 for_each_file(td, f, i) {
339 for (eventNum = 0; eventNum < left; eventNum++) {
340 ret = sg_fd_read(f->fd, p, sizeof(struct sg_io_hdr));
341 dprint(FD_IO, "sgio_getevents: sg_fd_read ret: %d\n", ret);
342 if (ret) {
343 r = -ret;
344 td_verror(td, r, "sg_read");
345 break;
346 }
347 io_u = ((struct sg_io_hdr *)p)->usr_ptr;
348 if (io_u->ddir == DDIR_TRIM) {
349 events += sd->trim_queues[io_u->index]->unmap_range_count;
350 eventNum += sd->trim_queues[io_u->index]->unmap_range_count - 1;
351 } else
352 events++;
353
354 p += sizeof(struct sg_io_hdr);
355 dprint(FD_IO, "sgio_getevents: events: %d, eventNum: %d, left: %d\n", events, eventNum, left);
356 }
357 }
358
359 if (r < 0 && !events)
360 break;
361 if (!events) {
362 usleep(1000);
363 goto re_read;
364 }
365
366 left -= events;
367 r += events;
368
369 for (i = 0; i < events; i++) {
370 struct sg_io_hdr *hdr = (struct sg_io_hdr *) buf + i;
371 sd->events[i + trims] = hdr->usr_ptr;
372 io_u = (struct io_u *)(hdr->usr_ptr);
373
374 if (hdr->info & SG_INFO_CHECK) {
375 /* record if an io error occurred, ignore resid */
376 memcpy(&io_u->hdr, hdr, sizeof(struct sg_io_hdr));
377 sd->events[i + trims]->error = EIO;
378 }
379
380 if (io_u->ddir == DDIR_TRIM) {
381 struct sgio_trim *st = sd->trim_queues[io_u->index];
382#ifdef FIO_SGIO_DEBUG
383 assert(st->trim_io_us[0] == io_u);
384 assert(sd->trim_queue_map[io_u->index] == io_u->index);
385 dprint(FD_IO, "sgio_getevents: reaping %d io_us from trim queue %d\n", st->unmap_range_count, io_u->index);
386 dprint(FD_IO, "sgio_getevents: reaped io_u %d and stored in events[%d]\n", io_u->index, i+trims);
387#endif
388 for (j = 1; j < st->unmap_range_count; j++) {
389 ++trims;
390 sd->events[i + trims] = st->trim_io_us[j];
391#ifdef FIO_SGIO_DEBUG
392 dprint(FD_IO, "sgio_getevents: reaped io_u %d and stored in events[%d]\n", st->trim_io_us[j]->index, i+trims);
393 assert(sd->trim_queue_map[st->trim_io_us[j]->index] == io_u->index);
394#endif
395 if (hdr->info & SG_INFO_CHECK) {
396 /* record if an io error occurred, ignore resid */
397 memcpy(&st->trim_io_us[j]->hdr, hdr, sizeof(struct sg_io_hdr));
398 sd->events[i + trims]->error = EIO;
399 }
400 }
401 events -= st->unmap_range_count - 1;
402 st->unmap_range_count = 0;
403 }
404 }
405 }
406
407 if (!min) {
408 for_each_file(td, f, i) {
409 if (sd->fd_flags[i] == -1)
410 continue;
411
412 if (fcntl(f->fd, F_SETFL, sd->fd_flags[i]) < 0)
413 log_err("fio: sg failed to restore fcntl flags: %s\n", strerror(errno));
414 }
415 }
416
417 return r;
418}
419
420static enum fio_q_status fio_sgio_ioctl_doio(struct thread_data *td,
421 struct fio_file *f,
422 struct io_u *io_u)
423{
424 struct sgio_data *sd = td->io_ops_data;
425 struct sg_io_hdr *hdr = &io_u->hdr;
426 int ret;
427
428 sd->events[0] = io_u;
429
430 ret = ioctl(f->fd, SG_IO, hdr);
431 if (ret < 0)
432 return ret;
433
434 /* record if an io error occurred */
435 if (hdr->info & SG_INFO_CHECK)
436 io_u->error = EIO;
437
438 return FIO_Q_COMPLETED;
439}
440
441static enum fio_q_status fio_sgio_rw_doio(struct thread_data *td,
442 struct fio_file *f,
443 struct io_u *io_u, int do_sync)
444{
445 struct sg_io_hdr *hdr = &io_u->hdr;
446 int ret;
447
448 ret = write(f->fd, hdr, sizeof(*hdr));
449 if (ret < 0)
450 return ret;
451
452 if (do_sync) {
453 /*
454 * We can't just read back the first command that completes
455 * and assume it's the one we need, it could be any command
456 * that is inflight.
457 */
458 do {
459 struct io_u *__io_u;
460
461 ret = read(f->fd, hdr, sizeof(*hdr));
462 if (ret < 0)
463 return ret;
464
465 __io_u = hdr->usr_ptr;
466
467 /* record if an io error occurred */
468 if (hdr->info & SG_INFO_CHECK)
469 __io_u->error = EIO;
470
471 if (__io_u == io_u)
472 break;
473
474 if (io_u_sync_complete(td, __io_u)) {
475 ret = -1;
476 break;
477 }
478 } while (1);
479
480 return FIO_Q_COMPLETED;
481 }
482
483 return FIO_Q_QUEUED;
484}
485
486static enum fio_q_status fio_sgio_doio(struct thread_data *td,
487 struct io_u *io_u, int do_sync)
488{
489 struct fio_file *f = io_u->file;
490 enum fio_q_status ret;
491
492 if (f->filetype == FIO_TYPE_BLOCK) {
493 ret = fio_sgio_ioctl_doio(td, f, io_u);
494 if (io_u->error)
495 td_verror(td, io_u->error, __func__);
496 } else {
497 ret = fio_sgio_rw_doio(td, f, io_u, do_sync);
498 if (io_u->error && do_sync)
499 td_verror(td, io_u->error, __func__);
500 }
501
502 return ret;
503}
504
505static void fio_sgio_rw_lba(struct sg_io_hdr *hdr, unsigned long long lba,
506 unsigned long long nr_blocks)
507{
508 if (lba < MAX_10B_LBA) {
509 sgio_set_be32((uint32_t) lba, &hdr->cmdp[2]);
510 sgio_set_be16((uint16_t) nr_blocks, &hdr->cmdp[7]);
511 } else {
512 sgio_set_be64(lba, &hdr->cmdp[2]);
513 sgio_set_be32((uint32_t) nr_blocks, &hdr->cmdp[10]);
514 }
515
516 return;
517}
518
519static int fio_sgio_prep(struct thread_data *td, struct io_u *io_u)
520{
521 struct sg_io_hdr *hdr = &io_u->hdr;
522 struct sg_options *o = td->eo;
523 struct sgio_data *sd = td->io_ops_data;
524 unsigned long long nr_blocks, lba;
525 int offset;
526
527 if (io_u->xfer_buflen & (sd->bs - 1)) {
528 log_err("read/write not sector aligned\n");
529 return EINVAL;
530 }
531
532 nr_blocks = io_u->xfer_buflen / sd->bs;
533 lba = io_u->offset / sd->bs;
534
535 if (io_u->ddir == DDIR_READ) {
536 sgio_hdr_init(sd, hdr, io_u, 1);
537
538 hdr->dxfer_direction = SG_DXFER_FROM_DEV;
539 if (lba < MAX_10B_LBA)
540 hdr->cmdp[0] = 0x28; // read(10)
541 else
542 hdr->cmdp[0] = 0x88; // read(16)
543
544 if (o->hipri)
545 hdr->flags |= SGV4_FLAG_HIPRI;
546 if (o->readfua)
547 hdr->cmdp[1] |= 0x08;
548
549 fio_sgio_rw_lba(hdr, lba, nr_blocks);
550
551 } else if (io_u->ddir == DDIR_WRITE) {
552 sgio_hdr_init(sd, hdr, io_u, 1);
553
554 hdr->dxfer_direction = SG_DXFER_TO_DEV;
555 switch(o->write_mode) {
556 case FIO_SG_WRITE:
557 if (lba < MAX_10B_LBA)
558 hdr->cmdp[0] = 0x2a; // write(10)
559 else
560 hdr->cmdp[0] = 0x8a; // write(16)
561 if (o->hipri)
562 hdr->flags |= SGV4_FLAG_HIPRI;
563 if (o->writefua)
564 hdr->cmdp[1] |= 0x08;
565 break;
566 case FIO_SG_WRITE_VERIFY:
567 if (lba < MAX_10B_LBA)
568 hdr->cmdp[0] = 0x2e; // write and verify(10)
569 else
570 hdr->cmdp[0] = 0x8e; // write and verify(16)
571 break;
572 // BYTCHK is disabled by virtue of the memset in sgio_hdr_init
573 case FIO_SG_WRITE_SAME:
574 hdr->dxfer_len = sd->bs;
575 if (lba < MAX_10B_LBA)
576 hdr->cmdp[0] = 0x41; // write same(10)
577 else
578 hdr->cmdp[0] = 0x93; // write same(16)
579 break;
580 };
581
582 fio_sgio_rw_lba(hdr, lba, nr_blocks);
583
584 } else if (io_u->ddir == DDIR_TRIM) {
585 struct sgio_trim *st;
586
587 if (sd->current_queue == -1) {
588 sgio_hdr_init(sd, hdr, io_u, 0);
589
590 hdr->cmd_len = 10;
591 hdr->dxfer_direction = SG_DXFER_TO_DEV;
592 hdr->cmdp[0] = 0x42; // unmap
593 sd->current_queue = io_u->index;
594 st = sd->trim_queues[sd->current_queue];
595 hdr->dxferp = st->unmap_param;
596#ifdef FIO_SGIO_DEBUG
597 assert(sd->trim_queues[io_u->index]->unmap_range_count == 0);
598 dprint(FD_IO, "sg: creating new queue based on io_u %d\n", io_u->index);
599#endif
600 }
601 else
602 st = sd->trim_queues[sd->current_queue];
603
604 dprint(FD_IO, "sg: adding io_u %d to trim queue %d\n", io_u->index, sd->current_queue);
605 st->trim_io_us[st->unmap_range_count] = io_u;
606#ifdef FIO_SGIO_DEBUG
607 sd->trim_queue_map[io_u->index] = sd->current_queue;
608#endif
609
610 offset = 8 + 16 * st->unmap_range_count;
611 sgio_set_be64(lba, &st->unmap_param[offset]);
612 sgio_set_be32((uint32_t) nr_blocks, &st->unmap_param[offset + 8]);
613
614 st->unmap_range_count++;
615
616 } else if (ddir_sync(io_u->ddir)) {
617 sgio_hdr_init(sd, hdr, io_u, 0);
618 hdr->dxfer_direction = SG_DXFER_NONE;
619 if (lba < MAX_10B_LBA)
620 hdr->cmdp[0] = 0x35; // synccache(10)
621 else
622 hdr->cmdp[0] = 0x91; // synccache(16)
623 } else
624 assert(0);
625
626 return 0;
627}
628
629static void fio_sgio_unmap_setup(struct sg_io_hdr *hdr, struct sgio_trim *st)
630{
631 uint16_t cnt = st->unmap_range_count * 16;
632
633 hdr->dxfer_len = cnt + 8;
634 sgio_set_be16(cnt + 8, &hdr->cmdp[7]);
635 sgio_set_be16(cnt + 6, st->unmap_param);
636 sgio_set_be16(cnt, &st->unmap_param[2]);
637
638 return;
639}
640
641static enum fio_q_status fio_sgio_queue(struct thread_data *td,
642 struct io_u *io_u)
643{
644 struct sg_io_hdr *hdr = &io_u->hdr;
645 struct sgio_data *sd = td->io_ops_data;
646 int ret, do_sync = 0;
647
648 fio_ro_check(td, io_u);
649
650 if (sgio_unbuffered(td) || ddir_sync(io_u->ddir))
651 do_sync = 1;
652
653 if (io_u->ddir == DDIR_TRIM) {
654 if (do_sync || io_u->file->filetype == FIO_TYPE_BLOCK) {
655 struct sgio_trim *st = sd->trim_queues[sd->current_queue];
656
657 /* finish cdb setup for unmap because we are
658 ** doing unmap commands synchronously */
659#ifdef FIO_SGIO_DEBUG
660 assert(st->unmap_range_count == 1);
661 assert(io_u == st->trim_io_us[0]);
662#endif
663 hdr = &io_u->hdr;
664
665 fio_sgio_unmap_setup(hdr, st);
666
667 st->unmap_range_count = 0;
668 sd->current_queue = -1;
669 } else
670 /* queue up trim ranges and submit in commit() */
671 return FIO_Q_QUEUED;
672 }
673
674 ret = fio_sgio_doio(td, io_u, do_sync);
675
676 if (ret < 0)
677 io_u->error = errno;
678 else if (hdr->status) {
679 io_u->resid = hdr->resid;
680 io_u->error = EIO;
681 } else if (td->io_ops->commit != NULL) {
682 if (do_sync && !ddir_sync(io_u->ddir)) {
683 io_u_mark_submit(td, 1);
684 io_u_mark_complete(td, 1);
685 } else if (io_u->ddir == DDIR_READ || io_u->ddir == DDIR_WRITE) {
686 io_u_mark_submit(td, 1);
687 io_u_queued(td, io_u);
688 }
689 }
690
691 if (io_u->error) {
692 td_verror(td, io_u->error, "xfer");
693 return FIO_Q_COMPLETED;
694 }
695
696 return ret;
697}
698
699static int fio_sgio_commit(struct thread_data *td)
700{
701 struct sgio_data *sd = td->io_ops_data;
702 struct sgio_trim *st;
703 struct io_u *io_u;
704 struct sg_io_hdr *hdr;
705 struct timespec now;
706 unsigned int i;
707 int ret;
708
709 if (sd->current_queue == -1)
710 return 0;
711
712 st = sd->trim_queues[sd->current_queue];
713 io_u = st->trim_io_us[0];
714 hdr = &io_u->hdr;
715
716 fio_sgio_unmap_setup(hdr, st);
717
718 sd->current_queue = -1;
719
720 ret = fio_sgio_rw_doio(td, io_u->file, io_u, 0);
721
722 if (ret < 0 || hdr->status) {
723 int error;
724
725 if (ret < 0)
726 error = errno;
727 else {
728 error = EIO;
729 ret = -EIO;
730 }
731
732 for (i = 0; i < st->unmap_range_count; i++) {
733 st->trim_io_us[i]->error = error;
734 clear_io_u(td, st->trim_io_us[i]);
735 if (hdr->status)
736 st->trim_io_us[i]->resid = hdr->resid;
737 }
738
739 td_verror(td, error, "xfer");
740 return ret;
741 }
742
743 if (fio_fill_issue_time(td)) {
744 fio_gettime(&now, NULL);
745 for (i = 0; i < st->unmap_range_count; i++) {
746 memcpy(&st->trim_io_us[i]->issue_time, &now, sizeof(now));
747 io_u_queued(td, io_u);
748 }
749 }
750 io_u_mark_submit(td, st->unmap_range_count);
751
752 return 0;
753}
754
755static struct io_u *fio_sgio_event(struct thread_data *td, int event)
756{
757 struct sgio_data *sd = td->io_ops_data;
758
759 return sd->events[event];
760}
761
762static int fio_sgio_read_capacity(struct thread_data *td, unsigned int *bs,
763 unsigned long long *max_lba)
764{
765 /*
766 * need to do read capacity operation w/o benefit of sd or
767 * io_u structures, which are not initialized until later.
768 */
769 struct sg_io_hdr hdr;
770 unsigned long long hlba;
771 unsigned int blksz = 0;
772 unsigned char cmd[16];
773 unsigned char sb[64];
774 unsigned char buf[32]; // read capacity return
775 int ret;
776 int fd = -1;
777
778 struct fio_file *f = td->files[0];
779
780 /* open file independent of rest of application */
781 fd = open(f->file_name, O_RDONLY);
782 if (fd < 0)
783 return -errno;
784
785 memset(&hdr, 0, sizeof(hdr));
786 memset(cmd, 0, sizeof(cmd));
787 memset(sb, 0, sizeof(sb));
788 memset(buf, 0, sizeof(buf));
789
790 /* First let's try a 10 byte read capacity. */
791 hdr.interface_id = 'S';
792 hdr.cmdp = cmd;
793 hdr.cmd_len = 10;
794 hdr.sbp = sb;
795 hdr.mx_sb_len = sizeof(sb);
796 hdr.timeout = SCSI_TIMEOUT_MS;
797 hdr.cmdp[0] = 0x25; // Read Capacity(10)
798 hdr.dxfer_direction = SG_DXFER_FROM_DEV;
799 hdr.dxferp = buf;
800 hdr.dxfer_len = sizeof(buf);
801
802 ret = ioctl(fd, SG_IO, &hdr);
803 if (ret < 0) {
804 close(fd);
805 return ret;
806 }
807
808 if (hdr.info & SG_INFO_CHECK) {
809 /* RCAP(10) might be unsupported by device. Force RCAP(16) */
810 hlba = MAX_10B_LBA;
811 } else {
812 blksz = sgio_get_be32(&buf[4]);
813 hlba = sgio_get_be32(buf);
814 }
815
816 /*
817 * If max lba masked by MAX_10B_LBA equals MAX_10B_LBA,
818 * then need to retry with 16 byte Read Capacity command.
819 */
820 if (hlba == MAX_10B_LBA) {
821 hdr.cmd_len = 16;
822 hdr.cmdp[0] = 0x9e; // service action
823 hdr.cmdp[1] = 0x10; // Read Capacity(16)
824 sgio_set_be32(sizeof(buf), &hdr.cmdp[10]);
825
826 hdr.dxfer_direction = SG_DXFER_FROM_DEV;
827 hdr.dxferp = buf;
828 hdr.dxfer_len = sizeof(buf);
829
830 ret = ioctl(fd, SG_IO, &hdr);
831 if (ret < 0) {
832 close(fd);
833 return ret;
834 }
835
836 /* record if an io error occurred */
837 if (hdr.info & SG_INFO_CHECK)
838 td_verror(td, EIO, "fio_sgio_read_capacity");
839
840 blksz = sgio_get_be32(&buf[8]);
841 hlba = sgio_get_be64(buf);
842 }
843
844 if (blksz) {
845 *bs = blksz;
846 *max_lba = hlba;
847 ret = 0;
848 } else {
849 ret = EIO;
850 }
851
852 close(fd);
853 return ret;
854}
855
856static void fio_sgio_cleanup(struct thread_data *td)
857{
858 struct sgio_data *sd = td->io_ops_data;
859 int i;
860
861 if (sd) {
862 free(sd->events);
863 free(sd->cmds);
864 free(sd->fd_flags);
865 free(sd->pfds);
866 free(sd->sgbuf);
867#ifdef FIO_SGIO_DEBUG
868 free(sd->trim_queue_map);
869#endif
870
871 for (i = 0; i < td->o.iodepth; i++) {
872 free(sd->trim_queues[i]->unmap_param);
873 free(sd->trim_queues[i]->trim_io_us);
874 free(sd->trim_queues[i]);
875 }
876
877 free(sd->trim_queues);
878 free(sd);
879 }
880}
881
882static int fio_sgio_init(struct thread_data *td)
883{
884 struct sgio_data *sd;
885 struct sgio_trim *st;
886 struct sg_io_hdr *h3p;
887 int i;
888
889 sd = calloc(1, sizeof(*sd));
890 sd->cmds = calloc(td->o.iodepth, sizeof(struct sgio_cmd));
891 sd->sgbuf = calloc(td->o.iodepth, sizeof(struct sg_io_hdr));
892 sd->events = calloc(td->o.iodepth, sizeof(struct io_u *));
893 sd->pfds = calloc(td->o.nr_files, sizeof(struct pollfd));
894 sd->fd_flags = calloc(td->o.nr_files, sizeof(int));
895 sd->type_checked = 0;
896
897 sd->trim_queues = calloc(td->o.iodepth, sizeof(struct sgio_trim *));
898 sd->current_queue = -1;
899#ifdef FIO_SGIO_DEBUG
900 sd->trim_queue_map = calloc(td->o.iodepth, sizeof(int));
901#endif
902 for (i = 0, h3p = sd->sgbuf; i < td->o.iodepth; i++, ++h3p) {
903 sd->trim_queues[i] = calloc(1, sizeof(struct sgio_trim));
904 st = sd->trim_queues[i];
905 st->unmap_param = calloc(td->o.iodepth + 1, sizeof(char[16]));
906 st->unmap_range_count = 0;
907 st->trim_io_us = calloc(td->o.iodepth, sizeof(struct io_u *));
908 h3p->interface_id = 'S';
909 }
910
911 td->io_ops_data = sd;
912
913 /*
914 * we want to do it, regardless of whether odirect is set or not
915 */
916 td->o.override_sync = 1;
917 return 0;
918}
919
920static int fio_sgio_type_check(struct thread_data *td, struct fio_file *f)
921{
922 struct sgio_data *sd = td->io_ops_data;
923 unsigned int bs = 0;
924 unsigned long long max_lba = 0;
925
926 if (f->filetype == FIO_TYPE_BLOCK) {
927 if (ioctl(f->fd, BLKSSZGET, &bs) < 0) {
928 td_verror(td, errno, "ioctl");
929 return 1;
930 }
931 } else if (f->filetype == FIO_TYPE_CHAR) {
932 int version, ret;
933
934 if (ioctl(f->fd, SG_GET_VERSION_NUM, &version) < 0) {
935 td_verror(td, errno, "ioctl");
936 return 1;
937 }
938
939 ret = fio_sgio_read_capacity(td, &bs, &max_lba);
940 if (ret) {
941 td_verror(td, td->error, "fio_sgio_read_capacity");
942 log_err("ioengine sg unable to read capacity successfully\n");
943 return 1;
944 }
945 } else {
946 td_verror(td, EINVAL, "wrong file type");
947 log_err("ioengine sg only works on block or character devices\n");
948 return 1;
949 }
950
951 sd->bs = bs;
952 // Determine size of commands needed based on max_lba
953 if (max_lba >= MAX_10B_LBA) {
954 dprint(FD_IO, "sgio_type_check: using 16 byte read/write "
955 "commands for lba above 0x%016llx/0x%016llx\n",
956 MAX_10B_LBA, max_lba);
957 }
958
959 if (f->filetype == FIO_TYPE_BLOCK) {
960 td->io_ops->getevents = NULL;
961 td->io_ops->event = NULL;
962 td->io_ops->commit = NULL;
963 /*
964 ** Setting these functions to null may cause problems
965 ** with filename=/dev/sda:/dev/sg0 since we are only
966 ** considering a single file
967 */
968 }
969 sd->type_checked = 1;
970
971 return 0;
972}
973
974static int fio_sgio_open(struct thread_data *td, struct fio_file *f)
975{
976 struct sgio_data *sd = td->io_ops_data;
977 int ret;
978
979 ret = generic_open_file(td, f);
980 if (ret)
981 return ret;
982
983 if (sd && !sd->type_checked && fio_sgio_type_check(td, f)) {
984 ret = generic_close_file(td, f);
985 return ret;
986 }
987
988 return 0;
989}
990
991/*
992 * Build an error string with details about the driver, host or scsi
993 * error contained in the sg header Caller will use as necessary.
994 */
995static char *fio_sgio_errdetails(struct io_u *io_u)
996{
997 struct sg_io_hdr *hdr = &io_u->hdr;
998#define MAXERRDETAIL 1024
999#define MAXMSGCHUNK 128
1000 char *msg, msgchunk[MAXMSGCHUNK];
1001 int i;
1002
1003 msg = calloc(1, MAXERRDETAIL);
1004 strcpy(msg, "");
1005
1006 /*
1007 * can't seem to find sg_err.h, so I'll just echo the define values
1008 * so others can search on internet to find clearer clues of meaning.
1009 */
1010 if (hdr->info & SG_INFO_CHECK) {
1011 if (hdr->host_status) {
1012 snprintf(msgchunk, MAXMSGCHUNK, "SG Host Status: 0x%02x; ", hdr->host_status);
1013 strlcat(msg, msgchunk, MAXERRDETAIL);
1014 switch (hdr->host_status) {
1015 case 0x01:
1016 strlcat(msg, "SG_ERR_DID_NO_CONNECT", MAXERRDETAIL);
1017 break;
1018 case 0x02:
1019 strlcat(msg, "SG_ERR_DID_BUS_BUSY", MAXERRDETAIL);
1020 break;
1021 case 0x03:
1022 strlcat(msg, "SG_ERR_DID_TIME_OUT", MAXERRDETAIL);
1023 break;
1024 case 0x04:
1025 strlcat(msg, "SG_ERR_DID_BAD_TARGET", MAXERRDETAIL);
1026 break;
1027 case 0x05:
1028 strlcat(msg, "SG_ERR_DID_ABORT", MAXERRDETAIL);
1029 break;
1030 case 0x06:
1031 strlcat(msg, "SG_ERR_DID_PARITY", MAXERRDETAIL);
1032 break;
1033 case 0x07:
1034 strlcat(msg, "SG_ERR_DID_ERROR (internal error)", MAXERRDETAIL);
1035 break;
1036 case 0x08:
1037 strlcat(msg, "SG_ERR_DID_RESET", MAXERRDETAIL);
1038 break;
1039 case 0x09:
1040 strlcat(msg, "SG_ERR_DID_BAD_INTR (unexpected)", MAXERRDETAIL);
1041 break;
1042 case 0x0a:
1043 strlcat(msg, "SG_ERR_DID_PASSTHROUGH", MAXERRDETAIL);
1044 break;
1045 case 0x0b:
1046 strlcat(msg, "SG_ERR_DID_SOFT_ERROR (driver retry?)", MAXERRDETAIL);
1047 break;
1048 case 0x0c:
1049 strlcat(msg, "SG_ERR_DID_IMM_RETRY", MAXERRDETAIL);
1050 break;
1051 case 0x0d:
1052 strlcat(msg, "SG_ERR_DID_REQUEUE", MAXERRDETAIL);
1053 break;
1054 case 0x0e:
1055 strlcat(msg, "SG_ERR_DID_TRANSPORT_DISRUPTED", MAXERRDETAIL);
1056 break;
1057 case 0x0f:
1058 strlcat(msg, "SG_ERR_DID_TRANSPORT_FAILFAST", MAXERRDETAIL);
1059 break;
1060 case 0x10:
1061 strlcat(msg, "SG_ERR_DID_TARGET_FAILURE", MAXERRDETAIL);
1062 break;
1063 case 0x11:
1064 strlcat(msg, "SG_ERR_DID_NEXUS_FAILURE", MAXERRDETAIL);
1065 break;
1066 case 0x12:
1067 strlcat(msg, "SG_ERR_DID_ALLOC_FAILURE", MAXERRDETAIL);
1068 break;
1069 case 0x13:
1070 strlcat(msg, "SG_ERR_DID_MEDIUM_ERROR", MAXERRDETAIL);
1071 break;
1072 default:
1073 strlcat(msg, "Unknown", MAXERRDETAIL);
1074 break;
1075 }
1076 strlcat(msg, ". ", MAXERRDETAIL);
1077 }
1078 if (hdr->driver_status) {
1079 snprintf(msgchunk, MAXMSGCHUNK, "SG Driver Status: 0x%02x; ", hdr->driver_status);
1080 strlcat(msg, msgchunk, MAXERRDETAIL);
1081 switch (hdr->driver_status & 0x0F) {
1082 case 0x01:
1083 strlcat(msg, "SG_ERR_DRIVER_BUSY", MAXERRDETAIL);
1084 break;
1085 case 0x02:
1086 strlcat(msg, "SG_ERR_DRIVER_SOFT", MAXERRDETAIL);
1087 break;
1088 case 0x03:
1089 strlcat(msg, "SG_ERR_DRIVER_MEDIA", MAXERRDETAIL);
1090 break;
1091 case 0x04:
1092 strlcat(msg, "SG_ERR_DRIVER_ERROR", MAXERRDETAIL);
1093 break;
1094 case 0x05:
1095 strlcat(msg, "SG_ERR_DRIVER_INVALID", MAXERRDETAIL);
1096 break;
1097 case 0x06:
1098 strlcat(msg, "SG_ERR_DRIVER_TIMEOUT", MAXERRDETAIL);
1099 break;
1100 case 0x07:
1101 strlcat(msg, "SG_ERR_DRIVER_HARD", MAXERRDETAIL);
1102 break;
1103 case 0x08:
1104 strlcat(msg, "SG_ERR_DRIVER_SENSE", MAXERRDETAIL);
1105 break;
1106 default:
1107 strlcat(msg, "Unknown", MAXERRDETAIL);
1108 break;
1109 }
1110 strlcat(msg, "; ", MAXERRDETAIL);
1111 switch (hdr->driver_status & 0xF0) {
1112 case 0x10:
1113 strlcat(msg, "SG_ERR_SUGGEST_RETRY", MAXERRDETAIL);
1114 break;
1115 case 0x20:
1116 strlcat(msg, "SG_ERR_SUGGEST_ABORT", MAXERRDETAIL);
1117 break;
1118 case 0x30:
1119 strlcat(msg, "SG_ERR_SUGGEST_REMAP", MAXERRDETAIL);
1120 break;
1121 case 0x40:
1122 strlcat(msg, "SG_ERR_SUGGEST_DIE", MAXERRDETAIL);
1123 break;
1124 case 0x80:
1125 strlcat(msg, "SG_ERR_SUGGEST_SENSE", MAXERRDETAIL);
1126 break;
1127 }
1128 strlcat(msg, ". ", MAXERRDETAIL);
1129 }
1130 if (hdr->status) {
1131 snprintf(msgchunk, MAXMSGCHUNK, "SG SCSI Status: 0x%02x; ", hdr->status);
1132 strlcat(msg, msgchunk, MAXERRDETAIL);
1133 // SCSI 3 status codes
1134 switch (hdr->status) {
1135 case 0x02:
1136 strlcat(msg, "CHECK_CONDITION", MAXERRDETAIL);
1137 break;
1138 case 0x04:
1139 strlcat(msg, "CONDITION_MET", MAXERRDETAIL);
1140 break;
1141 case 0x08:
1142 strlcat(msg, "BUSY", MAXERRDETAIL);
1143 break;
1144 case 0x10:
1145 strlcat(msg, "INTERMEDIATE", MAXERRDETAIL);
1146 break;
1147 case 0x14:
1148 strlcat(msg, "INTERMEDIATE_CONDITION_MET", MAXERRDETAIL);
1149 break;
1150 case 0x18:
1151 strlcat(msg, "RESERVATION_CONFLICT", MAXERRDETAIL);
1152 break;
1153 case 0x22:
1154 strlcat(msg, "COMMAND_TERMINATED", MAXERRDETAIL);
1155 break;
1156 case 0x28:
1157 strlcat(msg, "TASK_SET_FULL", MAXERRDETAIL);
1158 break;
1159 case 0x30:
1160 strlcat(msg, "ACA_ACTIVE", MAXERRDETAIL);
1161 break;
1162 case 0x40:
1163 strlcat(msg, "TASK_ABORTED", MAXERRDETAIL);
1164 break;
1165 default:
1166 strlcat(msg, "Unknown", MAXERRDETAIL);
1167 break;
1168 }
1169 strlcat(msg, ". ", MAXERRDETAIL);
1170 }
1171 if (hdr->sb_len_wr) {
1172 snprintf(msgchunk, MAXMSGCHUNK, "Sense Data (%d bytes):", hdr->sb_len_wr);
1173 strlcat(msg, msgchunk, MAXERRDETAIL);
1174 for (i = 0; i < hdr->sb_len_wr; i++) {
1175 snprintf(msgchunk, MAXMSGCHUNK, " %02x", hdr->sbp[i]);
1176 strlcat(msg, msgchunk, MAXERRDETAIL);
1177 }
1178 strlcat(msg, ". ", MAXERRDETAIL);
1179 }
1180 if (hdr->resid != 0) {
1181 snprintf(msgchunk, MAXMSGCHUNK, "SG Driver: %d bytes out of %d not transferred. ", hdr->resid, hdr->dxfer_len);
1182 strlcat(msg, msgchunk, MAXERRDETAIL);
1183 }
1184 if (hdr->cmdp) {
1185 strlcat(msg, "cdb:", MAXERRDETAIL);
1186 for (i = 0; i < hdr->cmd_len; i++) {
1187 snprintf(msgchunk, MAXMSGCHUNK, " %02x", hdr->cmdp[i]);
1188 strlcat(msg, msgchunk, MAXERRDETAIL);
1189 }
1190 strlcat(msg, ". ", MAXERRDETAIL);
1191 if (io_u->ddir == DDIR_TRIM) {
1192 unsigned char *param_list = hdr->dxferp;
1193 strlcat(msg, "dxferp:", MAXERRDETAIL);
1194 for (i = 0; i < hdr->dxfer_len; i++) {
1195 snprintf(msgchunk, MAXMSGCHUNK, " %02x", param_list[i]);
1196 strlcat(msg, msgchunk, MAXERRDETAIL);
1197 }
1198 strlcat(msg, ". ", MAXERRDETAIL);
1199 }
1200 }
1201 }
1202
1203 if (!(hdr->info & SG_INFO_CHECK) && !strlen(msg))
1204 snprintf(msg, MAXERRDETAIL, "%s",
1205 "SG Driver did not report a Host, Driver or Device check");
1206
1207 return msg;
1208}
1209
1210/*
1211 * get max file size from read capacity.
1212 */
1213static int fio_sgio_get_file_size(struct thread_data *td, struct fio_file *f)
1214{
1215 /*
1216 * get_file_size is being called even before sgio_init is
1217 * called, so none of the sg_io structures are
1218 * initialized in the thread_data yet. So we need to do the
1219 * ReadCapacity without any of those helpers. One of the effects
1220 * is that ReadCapacity may get called 4 times on each open:
1221 * readcap(10) followed by readcap(16) if needed - just to get
1222 * the file size after the init occurs - it will be called
1223 * again when "type_check" is called during structure
1224 * initialization I'm not sure how to prevent this little
1225 * inefficiency.
1226 */
1227 unsigned int bs = 0;
1228 unsigned long long max_lba = 0;
1229 int ret;
1230
1231 if (fio_file_size_known(f))
1232 return 0;
1233
1234 if (f->filetype != FIO_TYPE_BLOCK && f->filetype != FIO_TYPE_CHAR) {
1235 td_verror(td, EINVAL, "wrong file type");
1236 log_err("ioengine sg only works on block or character devices\n");
1237 return 1;
1238 }
1239
1240 ret = fio_sgio_read_capacity(td, &bs, &max_lba);
1241 if (ret ) {
1242 td_verror(td, td->error, "fio_sgio_read_capacity");
1243 log_err("ioengine sg unable to successfully execute read capacity to get block size and maximum lba\n");
1244 return 1;
1245 }
1246
1247 f->real_file_size = (max_lba + 1) * bs;
1248 fio_file_set_size_known(f);
1249 return 0;
1250}
1251
1252
1253static struct ioengine_ops ioengine = {
1254 .name = "sg",
1255 .version = FIO_IOOPS_VERSION,
1256 .init = fio_sgio_init,
1257 .prep = fio_sgio_prep,
1258 .queue = fio_sgio_queue,
1259 .commit = fio_sgio_commit,
1260 .getevents = fio_sgio_getevents,
1261 .errdetails = fio_sgio_errdetails,
1262 .event = fio_sgio_event,
1263 .cleanup = fio_sgio_cleanup,
1264 .open_file = fio_sgio_open,
1265 .close_file = generic_close_file,
1266 .get_file_size = fio_sgio_get_file_size,
1267 .flags = FIO_SYNCIO | FIO_RAWIO,
1268 .options = options,
1269 .option_struct_size = sizeof(struct sg_options)
1270};
1271
1272#else /* FIO_HAVE_SGIO */
1273
1274/*
1275 * When we have a proper configure system in place, we simply wont build
1276 * and install this io engine. For now install a crippled version that
1277 * just complains and fails to load.
1278 */
1279static int fio_sgio_init(struct thread_data fio_unused *td)
1280{
1281 log_err("fio: ioengine sg not available\n");
1282 return 1;
1283}
1284
1285static struct ioengine_ops ioengine = {
1286 .name = "sg",
1287 .version = FIO_IOOPS_VERSION,
1288 .init = fio_sgio_init,
1289};
1290
1291#endif
1292
1293static void fio_init fio_sgio_register(void)
1294{
1295 register_ioengine(&ioengine);
1296}
1297
1298static void fio_exit fio_sgio_unregister(void)
1299{
1300 unregister_ioengine(&ioengine);
1301}