2 * The io parts of the fio tool, includes workers for sync and mmap'ed
3 * io, as well as both posix and linux libaio support.
5 * sync io is implemented on top of aio.
7 * This is not really specific to fio, if the get_io_u/put_io_u and
8 * structures was pulled into this as well it would be a perfectly
9 * generic io engine that could be used for other projects.
23 #ifdef FIO_HAVE_LIBAIO
25 #define ev_to_iou(ev) (struct io_u *) ((unsigned long) (ev)->obj)
27 static int fio_io_sync(struct thread_data *td)
32 static int fill_timespec(struct timespec *ts)
35 if (!clock_gettime(CLOCK_MONOTONIC, ts))
38 perror("clock_gettime");
43 static unsigned long long ts_utime_since_now(struct timespec *t)
48 if (fill_timespec(&now))
51 sec = now.tv_sec - t->tv_sec;
52 nsec = now.tv_nsec - t->tv_nsec;
53 if (sec > 0 && nsec < 0) {
65 struct io_event *aio_events;
68 static int fio_libaio_io_prep(struct thread_data *td, struct io_u *io_u)
70 if (io_u->ddir == DDIR_READ)
71 io_prep_pread(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
73 io_prep_pwrite(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
78 static struct io_u *fio_libaio_event(struct thread_data *td, int event)
80 struct libaio_data *ld = td->io_data;
82 return ev_to_iou(ld->aio_events + event);
85 static int fio_libaio_getevents(struct thread_data *td, int min, int max,
88 struct libaio_data *ld = td->io_data;
92 r = io_getevents(ld->aio_ctx, min, max, ld->aio_events, t);
96 } else if (r == -EINTR)
105 static int fio_libaio_queue(struct thread_data *td, struct io_u *io_u)
107 struct libaio_data *ld = td->io_data;
108 struct iocb *iocb = &io_u->iocb;
112 ret = io_submit(ld->aio_ctx, 1, &iocb);
115 else if (ret == -EAGAIN)
117 else if (ret == -EINTR)
127 static int fio_libaio_cancel(struct thread_data *td, struct io_u *io_u)
129 struct libaio_data *ld = td->io_data;
131 return io_cancel(ld->aio_ctx, &io_u->iocb, ld->aio_events);
134 static void fio_libaio_cleanup(struct thread_data *td)
136 struct libaio_data *ld = td->io_data;
139 io_destroy(ld->aio_ctx);
141 free(ld->aio_events);
148 int fio_libaio_init(struct thread_data *td)
150 struct libaio_data *ld = malloc(sizeof(*ld));
152 memset(ld, 0, sizeof(*ld));
153 if (io_queue_init(td->iodepth, &ld->aio_ctx)) {
154 td_verror(td, errno);
158 td->io_prep = fio_libaio_io_prep;
159 td->io_queue = fio_libaio_queue;
160 td->io_getevents = fio_libaio_getevents;
161 td->io_event = fio_libaio_event;
162 td->io_cancel = fio_libaio_cancel;
163 td->io_cleanup = fio_libaio_cleanup;
164 td->io_sync = fio_io_sync;
166 ld->aio_events = malloc(td->iodepth * sizeof(struct io_event));
171 #else /* FIO_HAVE_LIBAIO */
173 int fio_libaio_init(struct thread_data *td)
178 #endif /* FIO_HAVE_LIBAIO */
180 #ifdef FIO_HAVE_POSIXAIO
182 struct posixaio_data {
183 struct io_u **aio_events;
186 static int fio_posixaio_cancel(struct thread_data *td, struct io_u *io_u)
188 int r = aio_cancel(td->fd, &io_u->aiocb);
190 if (r == 1 || r == AIO_CANCELED)
196 static int fio_posixaio_prep(struct thread_data *td, struct io_u *io_u)
198 struct aiocb *aiocb = &io_u->aiocb;
200 aiocb->aio_fildes = td->fd;
201 aiocb->aio_buf = io_u->buf;
202 aiocb->aio_nbytes = io_u->buflen;
203 aiocb->aio_offset = io_u->offset;
209 static int fio_posixaio_getevents(struct thread_data *td, int min, int max,
212 struct posixaio_data *pd = td->io_data;
213 struct list_head *entry;
214 struct timespec start;
215 int r, have_timeout = 0;
217 if (t && !fill_timespec(&start))
222 list_for_each(entry, &td->io_u_busylist) {
223 struct io_u *io_u = list_entry(entry, struct io_u, list);
229 err = aio_error(&io_u->aiocb);
235 pd->aio_events[r++] = io_u;
250 unsigned long long usec;
252 usec = (t->tv_sec * 1000000) + (t->tv_nsec / 1000);
253 if (ts_utime_since_now(&start) > usec)
258 * hrmpf, we need to wait for more. we should use aio_suspend, for
259 * now just sleep a little and recheck status of busy-and-not-seen
265 static struct io_u *fio_posixaio_event(struct thread_data *td, int event)
267 struct posixaio_data *pd = td->io_data;
269 return pd->aio_events[event];
272 static int fio_posixaio_queue(struct thread_data *td, struct io_u *io_u)
274 struct aiocb *aiocb = &io_u->aiocb;
277 if (io_u->ddir == DDIR_READ)
278 ret = aio_read(aiocb);
280 ret = aio_write(aiocb);
288 static void fio_posixaio_cleanup(struct thread_data *td)
290 struct posixaio_data *pd = td->io_data;
293 free(pd->aio_events);
299 int fio_posixaio_init(struct thread_data *td)
301 struct posixaio_data *pd = malloc(sizeof(*pd));
303 pd->aio_events = malloc(td->iodepth * sizeof(struct io_u *));
305 td->io_prep = fio_posixaio_prep;
306 td->io_queue = fio_posixaio_queue;
307 td->io_getevents = fio_posixaio_getevents;
308 td->io_event = fio_posixaio_event;
309 td->io_cancel = fio_posixaio_cancel;
310 td->io_cleanup = fio_posixaio_cleanup;
311 td->io_sync = fio_io_sync;
317 #else /* FIO_HAVE_POSIXAIO */
319 int fio_posixaio_init(struct thread_data *td)
324 #endif /* FIO_HAVE_POSIXAIO */
327 struct io_u *last_io_u;
330 static int fio_syncio_getevents(struct thread_data *td, int min, int max,
336 * we can only have one finished io_u for sync io, since the depth
339 if (list_empty(&td->io_u_busylist))
345 static struct io_u *fio_syncio_event(struct thread_data *td, int event)
347 struct syncio_data *sd = td->io_data;
351 return sd->last_io_u;
354 static int fio_syncio_prep(struct thread_data *td, struct io_u *io_u)
356 if (lseek(td->fd, io_u->offset, SEEK_SET) == -1) {
357 td_verror(td, errno);
364 static int fio_syncio_queue(struct thread_data *td, struct io_u *io_u)
366 struct syncio_data *sd = td->io_data;
369 if (io_u->ddir == DDIR_READ)
370 ret = read(td->fd, io_u->buf, io_u->buflen);
372 ret = write(td->fd, io_u->buf, io_u->buflen);
374 if ((unsigned int) ret != io_u->buflen) {
376 io_u->resid = io_u->buflen - ret;
377 io_u->error = ENODATA;
383 sd->last_io_u = io_u;
388 static void fio_syncio_cleanup(struct thread_data *td)
396 int fio_syncio_init(struct thread_data *td)
398 struct syncio_data *sd = malloc(sizeof(*sd));
400 td->io_prep = fio_syncio_prep;
401 td->io_queue = fio_syncio_queue;
402 td->io_getevents = fio_syncio_getevents;
403 td->io_event = fio_syncio_event;
404 td->io_cancel = NULL;
405 td->io_cleanup = fio_syncio_cleanup;
406 td->io_sync = fio_io_sync;
408 sd->last_io_u = NULL;
413 static int fio_mmapio_queue(struct thread_data *td, struct io_u *io_u)
415 unsigned long long real_off = io_u->offset - td->file_offset;
416 struct syncio_data *sd = td->io_data;
418 if (io_u->ddir == DDIR_READ)
419 memcpy(io_u->buf, td->mmap + real_off, io_u->buflen);
421 memcpy(td->mmap + real_off, io_u->buf, io_u->buflen);
424 * not really direct, but should drop the pages from the cache
427 if (msync(td->mmap + real_off, io_u->buflen, MS_SYNC) < 0)
429 if (madvise(td->mmap + real_off, io_u->buflen, MADV_DONTNEED) < 0)
434 sd->last_io_u = io_u;
439 static int fio_mmapio_sync(struct thread_data *td)
441 return msync(td->mmap, td->file_size, MS_SYNC);
444 int fio_mmapio_init(struct thread_data *td)
446 struct syncio_data *sd = malloc(sizeof(*sd));
449 td->io_queue = fio_mmapio_queue;
450 td->io_getevents = fio_syncio_getevents;
451 td->io_event = fio_syncio_event;
452 td->io_cancel = NULL;
453 td->io_cleanup = fio_syncio_cleanup;
454 td->io_sync = fio_mmapio_sync;
456 sd->last_io_u = NULL;
464 unsigned char cdb[10];
469 struct sgio_cmd *cmds;
470 struct io_u **events;
474 static void sgio_hdr_init(struct sgio_data *sd, struct sg_io_hdr *hdr,
475 struct io_u *io_u, int fs)
477 struct sgio_cmd *sc = &sd->cmds[io_u->index];
479 memset(hdr, 0, sizeof(*hdr));
480 memset(sc->cdb, 0, sizeof(sc->cdb));
482 hdr->interface_id = 'S';
484 hdr->cmd_len = sizeof(sc->cdb);
485 hdr->pack_id = io_u->index;
489 hdr->dxferp = io_u->buf;
490 hdr->dxfer_len = io_u->buflen;
494 static int fio_sgio_getevents(struct thread_data *td, int min, int max,
497 struct sgio_data *sd = td->io_data;
498 struct pollfd pfd = { .fd = td->fd, .events = POLLIN };
499 void *buf = malloc(max * sizeof(struct sg_io_hdr));
500 int left = max, ret, events, i, r = 0, fl = 0;
503 * don't block for !events
506 fl = fcntl(td->fd, F_GETFL);
507 fcntl(td->fd, F_SETFL, fl | O_NONBLOCK);
515 if (pfd.revents & POLLIN)
519 ret = read(td->fd, buf, left * sizeof(struct sg_io_hdr));
523 td_verror(td, errno);
529 events = ret / sizeof(struct sg_io_hdr);
533 for (i = 0; i < events; i++) {
534 struct sg_io_hdr *hdr = (struct sg_io_hdr *) buf + i;
536 sd->events[i] = hdr->usr_ptr;
541 fcntl(td->fd, F_SETFL, fl);
547 static int fio_sgio_ioctl_doio(struct thread_data *td, struct io_u *io_u)
549 struct sgio_data *sd = td->io_data;
550 struct sg_io_hdr *hdr = &io_u->hdr;
552 sd->events[0] = io_u;
554 return ioctl(td->fd, SG_IO, hdr);
557 static int fio_sgio_rw_doio(struct thread_data *td, struct io_u *io_u, int sync)
559 struct sg_io_hdr *hdr = &io_u->hdr;
562 ret = write(td->fd, hdr, sizeof(*hdr));
567 ret = read(td->fd, hdr, sizeof(*hdr));
575 static int fio_sgio_doio(struct thread_data *td, struct io_u *io_u, int sync)
577 if (td->filetype == FIO_TYPE_BD)
578 return fio_sgio_ioctl_doio(td, io_u);
580 return fio_sgio_rw_doio(td, io_u, sync);
583 static int fio_sgio_sync(struct thread_data *td)
585 struct sgio_data *sd = td->io_data;
586 struct sg_io_hdr *hdr;
590 io_u = __get_io_u(td);
595 sgio_hdr_init(sd, hdr, io_u, 0);
596 hdr->dxfer_direction = SG_DXFER_NONE;
600 ret = fio_sgio_doio(td, io_u, 1);
605 static int fio_sgio_prep(struct thread_data *td, struct io_u *io_u)
607 struct sg_io_hdr *hdr = &io_u->hdr;
608 struct sgio_data *sd = td->io_data;
611 if (io_u->buflen & (sd->bs - 1)) {
612 fprintf(stderr, "read/write not sector aligned\n");
616 sgio_hdr_init(sd, hdr, io_u, 1);
618 if (io_u->ddir == DDIR_READ) {
619 hdr->dxfer_direction = SG_DXFER_FROM_DEV;
622 hdr->dxfer_direction = SG_DXFER_TO_DEV;
626 nr_blocks = io_u->buflen / sd->bs;
627 lba = io_u->offset / sd->bs;
628 hdr->cmdp[2] = (lba >> 24) & 0xff;
629 hdr->cmdp[3] = (lba >> 16) & 0xff;
630 hdr->cmdp[4] = (lba >> 8) & 0xff;
631 hdr->cmdp[5] = lba & 0xff;
632 hdr->cmdp[7] = (nr_blocks >> 8) & 0xff;
633 hdr->cmdp[8] = nr_blocks & 0xff;
637 static int fio_sgio_queue(struct thread_data *td, struct io_u *io_u)
639 struct sg_io_hdr *hdr = &io_u->hdr;
642 ret = fio_sgio_doio(td, io_u, 0);
646 else if (hdr->status) {
647 io_u->resid = hdr->resid;
654 static struct io_u *fio_sgio_event(struct thread_data *td, int event)
656 struct sgio_data *sd = td->io_data;
658 return sd->events[event];
661 static int fio_sgio_get_bs(struct thread_data *td, unsigned int *bs)
663 struct sgio_data *sd = td->io_data;
665 struct sg_io_hdr *hdr;
666 unsigned char buf[8];
669 io_u = __get_io_u(td);
673 sgio_hdr_init(sd, hdr, io_u, 0);
674 memset(buf, 0, sizeof(buf));
677 hdr->dxfer_direction = SG_DXFER_FROM_DEV;
679 hdr->dxfer_len = sizeof(buf);
681 ret = fio_sgio_doio(td, io_u, 1);
687 *bs = (buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | buf[7];
692 int fio_sgio_init(struct thread_data *td)
694 struct sgio_data *sd;
698 sd = malloc(sizeof(*sd));
699 sd->cmds = malloc(td->iodepth * sizeof(struct sgio_cmd));
700 sd->events = malloc(td->iodepth * sizeof(struct io_u *));
703 if (td->filetype == FIO_TYPE_BD) {
704 if (ioctl(td->fd, BLKSSZGET, &bs) < 0) {
705 td_verror(td, errno);
708 } else if (td->filetype == FIO_TYPE_CHAR) {
711 if (ioctl(td->fd, SG_GET_VERSION_NUM, &version) < 0) {
712 td_verror(td, errno);
716 ret = fio_sgio_get_bs(td, &bs);
720 fprintf(stderr, "ioengine sgio only works on block devices\n");
726 td->io_prep = fio_sgio_prep;
727 td->io_queue = fio_sgio_queue;
729 if (td->filetype == FIO_TYPE_BD)
730 td->io_getevents = fio_syncio_getevents;
732 td->io_getevents = fio_sgio_getevents;
734 td->io_event = fio_sgio_event;
735 td->io_cancel = NULL;
736 td->io_cleanup = fio_syncio_cleanup;
737 td->io_sync = fio_sgio_sync;
740 * we want to do it, regardless of whether odirect is set or not
742 td->override_sync = 1;
746 #else /* FIO_HAVE_SGIO */
748 int fio_sgio_init(struct thread_data *td)
753 #endif /* FIO_HAVE_SGIO */