2 * The io parts of the fio tool, includes workers for sync and mmap'ed
3 * io, as well as both posix and linux libaio support.
5 * sync io is implemented on top of aio.
7 * This is not really specific to fio, if the get_io_u/put_io_u and
8 * structures was pulled into this as well it would be a perfectly
9 * generic io engine that could be used for other projects.
24 static int fill_timespec(struct timespec *ts)
27 if (!clock_gettime(CLOCK_MONOTONIC, ts))
30 perror("clock_gettime");
35 static unsigned long long ts_utime_since_now(struct timespec *t)
40 if (fill_timespec(&now))
43 sec = now.tv_sec - t->tv_sec;
44 nsec = now.tv_nsec - t->tv_nsec;
45 if (sec > 0 && nsec < 0) {
55 static int fio_io_sync(struct thread_data *td)
60 #ifdef FIO_HAVE_LIBAIO
62 #define ev_to_iou(ev) (struct io_u *) ((unsigned long) (ev)->obj)
66 struct io_event *aio_events;
69 static int fio_libaio_io_prep(struct thread_data *td, struct io_u *io_u)
71 if (io_u->ddir == DDIR_READ)
72 io_prep_pread(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
74 io_prep_pwrite(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
79 static struct io_u *fio_libaio_event(struct thread_data *td, int event)
81 struct libaio_data *ld = td->io_data;
83 return ev_to_iou(ld->aio_events + event);
86 static int fio_libaio_getevents(struct thread_data *td, int min, int max,
89 struct libaio_data *ld = td->io_data;
93 r = io_getevents(ld->aio_ctx, min, max, ld->aio_events, t);
97 } else if (r == -EINTR)
106 static int fio_libaio_queue(struct thread_data *td, struct io_u *io_u)
108 struct libaio_data *ld = td->io_data;
109 struct iocb *iocb = &io_u->iocb;
113 ret = io_submit(ld->aio_ctx, 1, &iocb);
116 else if (ret == -EAGAIN)
118 else if (ret == -EINTR)
128 static int fio_libaio_cancel(struct thread_data *td, struct io_u *io_u)
130 struct libaio_data *ld = td->io_data;
132 return io_cancel(ld->aio_ctx, &io_u->iocb, ld->aio_events);
135 static void fio_libaio_cleanup(struct thread_data *td)
137 struct libaio_data *ld = td->io_data;
140 io_destroy(ld->aio_ctx);
142 free(ld->aio_events);
149 int fio_libaio_init(struct thread_data *td)
151 struct libaio_data *ld = malloc(sizeof(*ld));
153 memset(ld, 0, sizeof(*ld));
154 if (io_queue_init(td->iodepth, &ld->aio_ctx)) {
155 td_verror(td, errno);
159 td->io_prep = fio_libaio_io_prep;
160 td->io_queue = fio_libaio_queue;
161 td->io_getevents = fio_libaio_getevents;
162 td->io_event = fio_libaio_event;
163 td->io_cancel = fio_libaio_cancel;
164 td->io_cleanup = fio_libaio_cleanup;
165 td->io_sync = fio_io_sync;
167 ld->aio_events = malloc(td->iodepth * sizeof(struct io_event));
172 #else /* FIO_HAVE_LIBAIO */
174 int fio_libaio_init(struct thread_data *td)
179 #endif /* FIO_HAVE_LIBAIO */
181 #ifdef FIO_HAVE_POSIXAIO
183 struct posixaio_data {
184 struct io_u **aio_events;
187 static int fio_posixaio_cancel(struct thread_data *td, struct io_u *io_u)
189 int r = aio_cancel(td->fd, &io_u->aiocb);
191 if (r == 1 || r == AIO_CANCELED)
197 static int fio_posixaio_prep(struct thread_data *td, struct io_u *io_u)
199 struct aiocb *aiocb = &io_u->aiocb;
201 aiocb->aio_fildes = td->fd;
202 aiocb->aio_buf = io_u->buf;
203 aiocb->aio_nbytes = io_u->buflen;
204 aiocb->aio_offset = io_u->offset;
210 static int fio_posixaio_getevents(struct thread_data *td, int min, int max,
213 struct posixaio_data *pd = td->io_data;
214 struct list_head *entry;
215 struct timespec start;
216 int r, have_timeout = 0;
218 if (t && !fill_timespec(&start))
223 list_for_each(entry, &td->io_u_busylist) {
224 struct io_u *io_u = list_entry(entry, struct io_u, list);
230 err = aio_error(&io_u->aiocb);
236 pd->aio_events[r++] = io_u;
251 unsigned long long usec;
253 usec = (t->tv_sec * 1000000) + (t->tv_nsec / 1000);
254 if (ts_utime_since_now(&start) > usec)
259 * hrmpf, we need to wait for more. we should use aio_suspend, for
260 * now just sleep a little and recheck status of busy-and-not-seen
266 static struct io_u *fio_posixaio_event(struct thread_data *td, int event)
268 struct posixaio_data *pd = td->io_data;
270 return pd->aio_events[event];
273 static int fio_posixaio_queue(struct thread_data *td, struct io_u *io_u)
275 struct aiocb *aiocb = &io_u->aiocb;
278 if (io_u->ddir == DDIR_READ)
279 ret = aio_read(aiocb);
281 ret = aio_write(aiocb);
289 static void fio_posixaio_cleanup(struct thread_data *td)
291 struct posixaio_data *pd = td->io_data;
294 free(pd->aio_events);
300 int fio_posixaio_init(struct thread_data *td)
302 struct posixaio_data *pd = malloc(sizeof(*pd));
304 pd->aio_events = malloc(td->iodepth * sizeof(struct io_u *));
306 td->io_prep = fio_posixaio_prep;
307 td->io_queue = fio_posixaio_queue;
308 td->io_getevents = fio_posixaio_getevents;
309 td->io_event = fio_posixaio_event;
310 td->io_cancel = fio_posixaio_cancel;
311 td->io_cleanup = fio_posixaio_cleanup;
312 td->io_sync = fio_io_sync;
318 #else /* FIO_HAVE_POSIXAIO */
320 int fio_posixaio_init(struct thread_data *td)
325 #endif /* FIO_HAVE_POSIXAIO */
328 struct io_u *last_io_u;
331 static int fio_syncio_getevents(struct thread_data *td, int min, int max,
337 * we can only have one finished io_u for sync io, since the depth
340 if (list_empty(&td->io_u_busylist))
346 static struct io_u *fio_syncio_event(struct thread_data *td, int event)
348 struct syncio_data *sd = td->io_data;
352 return sd->last_io_u;
355 static int fio_syncio_prep(struct thread_data *td, struct io_u *io_u)
357 if (lseek(td->fd, io_u->offset, SEEK_SET) == -1) {
358 td_verror(td, errno);
365 static int fio_syncio_queue(struct thread_data *td, struct io_u *io_u)
367 struct syncio_data *sd = td->io_data;
370 if (io_u->ddir == DDIR_READ)
371 ret = read(td->fd, io_u->buf, io_u->buflen);
373 ret = write(td->fd, io_u->buf, io_u->buflen);
375 if ((unsigned int) ret != io_u->buflen) {
377 io_u->resid = io_u->buflen - ret;
384 sd->last_io_u = io_u;
389 static void fio_syncio_cleanup(struct thread_data *td)
397 int fio_syncio_init(struct thread_data *td)
399 struct syncio_data *sd = malloc(sizeof(*sd));
401 td->io_prep = fio_syncio_prep;
402 td->io_queue = fio_syncio_queue;
403 td->io_getevents = fio_syncio_getevents;
404 td->io_event = fio_syncio_event;
405 td->io_cancel = NULL;
406 td->io_cleanup = fio_syncio_cleanup;
407 td->io_sync = fio_io_sync;
409 sd->last_io_u = NULL;
414 static int fio_mmapio_queue(struct thread_data *td, struct io_u *io_u)
416 unsigned long long real_off = io_u->offset - td->file_offset;
417 struct syncio_data *sd = td->io_data;
419 if (io_u->ddir == DDIR_READ)
420 memcpy(io_u->buf, td->mmap + real_off, io_u->buflen);
422 memcpy(td->mmap + real_off, io_u->buf, io_u->buflen);
425 * not really direct, but should drop the pages from the cache
428 if (msync(td->mmap + real_off, io_u->buflen, MS_SYNC) < 0)
430 if (madvise(td->mmap + real_off, io_u->buflen, MADV_DONTNEED) < 0)
435 sd->last_io_u = io_u;
440 static int fio_mmapio_sync(struct thread_data *td)
442 return msync(td->mmap, td->file_size, MS_SYNC);
445 int fio_mmapio_init(struct thread_data *td)
447 struct syncio_data *sd = malloc(sizeof(*sd));
450 td->io_queue = fio_mmapio_queue;
451 td->io_getevents = fio_syncio_getevents;
452 td->io_event = fio_syncio_event;
453 td->io_cancel = NULL;
454 td->io_cleanup = fio_syncio_cleanup;
455 td->io_sync = fio_mmapio_sync;
457 sd->last_io_u = NULL;
465 unsigned char cdb[10];
470 struct sgio_cmd *cmds;
471 struct io_u **events;
475 static void sgio_hdr_init(struct sgio_data *sd, struct sg_io_hdr *hdr,
476 struct io_u *io_u, int fs)
478 struct sgio_cmd *sc = &sd->cmds[io_u->index];
480 memset(hdr, 0, sizeof(*hdr));
481 memset(sc->cdb, 0, sizeof(sc->cdb));
483 hdr->interface_id = 'S';
485 hdr->cmd_len = sizeof(sc->cdb);
486 hdr->pack_id = io_u->index;
490 hdr->dxferp = io_u->buf;
491 hdr->dxfer_len = io_u->buflen;
495 static int fio_sgio_getevents(struct thread_data *td, int min, int max,
498 struct sgio_data *sd = td->io_data;
499 struct pollfd pfd = { .fd = td->fd, .events = POLLIN };
500 void *buf = malloc(max * sizeof(struct sg_io_hdr));
501 int left = max, ret, events, i, r = 0, fl = 0;
504 * don't block for !events
507 fl = fcntl(td->fd, F_GETFL);
508 fcntl(td->fd, F_SETFL, fl | O_NONBLOCK);
516 if (pfd.revents & POLLIN)
520 ret = read(td->fd, buf, left * sizeof(struct sg_io_hdr));
524 td_verror(td, errno);
530 events = ret / sizeof(struct sg_io_hdr);
534 for (i = 0; i < events; i++) {
535 struct sg_io_hdr *hdr = (struct sg_io_hdr *) buf + i;
537 sd->events[i] = hdr->usr_ptr;
542 fcntl(td->fd, F_SETFL, fl);
548 static int fio_sgio_ioctl_doio(struct thread_data *td, struct io_u *io_u)
550 struct sgio_data *sd = td->io_data;
551 struct sg_io_hdr *hdr = &io_u->hdr;
553 sd->events[0] = io_u;
555 return ioctl(td->fd, SG_IO, hdr);
558 static int fio_sgio_rw_doio(struct thread_data *td, struct io_u *io_u, int sync)
560 struct sg_io_hdr *hdr = &io_u->hdr;
563 ret = write(td->fd, hdr, sizeof(*hdr));
568 ret = read(td->fd, hdr, sizeof(*hdr));
576 static int fio_sgio_doio(struct thread_data *td, struct io_u *io_u, int sync)
578 if (td->filetype == FIO_TYPE_BD)
579 return fio_sgio_ioctl_doio(td, io_u);
581 return fio_sgio_rw_doio(td, io_u, sync);
584 static int fio_sgio_sync(struct thread_data *td)
586 struct sgio_data *sd = td->io_data;
587 struct sg_io_hdr *hdr;
591 io_u = __get_io_u(td);
596 sgio_hdr_init(sd, hdr, io_u, 0);
597 hdr->dxfer_direction = SG_DXFER_NONE;
601 ret = fio_sgio_doio(td, io_u, 1);
606 static int fio_sgio_prep(struct thread_data *td, struct io_u *io_u)
608 struct sg_io_hdr *hdr = &io_u->hdr;
609 struct sgio_data *sd = td->io_data;
612 if (io_u->buflen & (sd->bs - 1)) {
613 log_err("read/write not sector aligned\n");
617 sgio_hdr_init(sd, hdr, io_u, 1);
619 if (io_u->ddir == DDIR_READ) {
620 hdr->dxfer_direction = SG_DXFER_FROM_DEV;
623 hdr->dxfer_direction = SG_DXFER_TO_DEV;
627 nr_blocks = io_u->buflen / sd->bs;
628 lba = io_u->offset / sd->bs;
629 hdr->cmdp[2] = (lba >> 24) & 0xff;
630 hdr->cmdp[3] = (lba >> 16) & 0xff;
631 hdr->cmdp[4] = (lba >> 8) & 0xff;
632 hdr->cmdp[5] = lba & 0xff;
633 hdr->cmdp[7] = (nr_blocks >> 8) & 0xff;
634 hdr->cmdp[8] = nr_blocks & 0xff;
638 static int fio_sgio_queue(struct thread_data *td, struct io_u *io_u)
640 struct sg_io_hdr *hdr = &io_u->hdr;
643 ret = fio_sgio_doio(td, io_u, 0);
647 else if (hdr->status) {
648 io_u->resid = hdr->resid;
655 static struct io_u *fio_sgio_event(struct thread_data *td, int event)
657 struct sgio_data *sd = td->io_data;
659 return sd->events[event];
662 static int fio_sgio_get_bs(struct thread_data *td, unsigned int *bs)
664 struct sgio_data *sd = td->io_data;
666 struct sg_io_hdr *hdr;
667 unsigned char buf[8];
670 io_u = __get_io_u(td);
674 sgio_hdr_init(sd, hdr, io_u, 0);
675 memset(buf, 0, sizeof(buf));
678 hdr->dxfer_direction = SG_DXFER_FROM_DEV;
680 hdr->dxfer_len = sizeof(buf);
682 ret = fio_sgio_doio(td, io_u, 1);
688 *bs = (buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | buf[7];
693 int fio_sgio_init(struct thread_data *td)
695 struct sgio_data *sd;
699 sd = malloc(sizeof(*sd));
700 sd->cmds = malloc(td->iodepth * sizeof(struct sgio_cmd));
701 sd->events = malloc(td->iodepth * sizeof(struct io_u *));
704 if (td->filetype == FIO_TYPE_BD) {
705 if (ioctl(td->fd, BLKSSZGET, &bs) < 0) {
706 td_verror(td, errno);
709 } else if (td->filetype == FIO_TYPE_CHAR) {
712 if (ioctl(td->fd, SG_GET_VERSION_NUM, &version) < 0) {
713 td_verror(td, errno);
717 ret = fio_sgio_get_bs(td, &bs);
721 log_err("ioengine sgio only works on block devices\n");
727 td->io_prep = fio_sgio_prep;
728 td->io_queue = fio_sgio_queue;
730 if (td->filetype == FIO_TYPE_BD)
731 td->io_getevents = fio_syncio_getevents;
733 td->io_getevents = fio_sgio_getevents;
735 td->io_event = fio_sgio_event;
736 td->io_cancel = NULL;
737 td->io_cleanup = fio_syncio_cleanup;
738 td->io_sync = fio_sgio_sync;
741 * we want to do it, regardless of whether odirect is set or not
743 td->override_sync = 1;
747 #else /* FIO_HAVE_SGIO */
749 int fio_sgio_init(struct thread_data *td)
754 #endif /* FIO_HAVE_SGIO */
756 #ifdef FIO_HAVE_SPLICE
757 struct spliceio_data {
758 struct io_u *last_io_u;
762 static struct io_u *fio_spliceio_event(struct thread_data *td, int event)
764 struct spliceio_data *sd = td->io_data;
768 return sd->last_io_u;
772 * For splice reading, we unfortunately cannot (yet) vmsplice the other way.
773 * So just splice the data from the file into the pipe, and use regular
774 * read to fill the buffer. Doesn't make a lot of sense, but...
776 static int fio_splice_read(struct thread_data *td, struct io_u *io_u)
778 struct spliceio_data *sd = td->io_data;
779 int ret, ret2, buflen;
783 offset = io_u->offset;
784 buflen = io_u->buflen;
787 int this_len = buflen;
789 if (this_len > SPLICE_DEF_SIZE)
790 this_len = SPLICE_DEF_SIZE;
792 ret = splice(td->fd, &offset, sd->pipe[1], NULL, this_len, SPLICE_F_MORE);
794 if (errno == ENODATA || errno == EAGAIN)
803 ret2 = read(sd->pipe[0], p, ret);
816 * For splice writing, we can vmsplice our data buffer directly into a
817 * pipe and then splice that to a file.
819 static int fio_splice_write(struct thread_data *td, struct io_u *io_u)
821 struct spliceio_data *sd = td->io_data;
822 struct iovec iov[1] = {
824 .iov_base = io_u->buf,
825 .iov_len = io_u->buflen,
828 struct pollfd pfd = { .fd = sd->pipe[1], .events = POLLOUT, };
829 off_t off = io_u->offset;
832 while (iov[0].iov_len) {
833 if (poll(&pfd, 1, -1) < 0)
836 ret = vmsplice(sd->pipe[1], iov, 1, SPLICE_F_NONBLOCK);
840 iov[0].iov_len -= ret;
841 iov[0].iov_base += ret;
844 ret2 = splice(sd->pipe[0], NULL, td->fd, &off, ret, 0);
855 static int fio_spliceio_queue(struct thread_data *td, struct io_u *io_u)
857 struct spliceio_data *sd = td->io_data;
860 if (io_u->ddir == DDIR_READ)
861 ret = fio_splice_read(td, io_u);
863 ret = fio_splice_write(td, io_u);
865 if ((unsigned int) ret != io_u->buflen) {
867 io_u->resid = io_u->buflen - ret;
868 io_u->error = ENODATA;
874 sd->last_io_u = io_u;
879 static void fio_spliceio_cleanup(struct thread_data *td)
881 struct spliceio_data *sd = td->io_data;
891 int fio_spliceio_init(struct thread_data *td)
893 struct spliceio_data *sd = malloc(sizeof(*sd));
895 td->io_queue = fio_spliceio_queue;
896 td->io_getevents = fio_syncio_getevents;
897 td->io_event = fio_spliceio_event;
898 td->io_cancel = NULL;
899 td->io_cleanup = fio_spliceio_cleanup;
900 td->io_sync = fio_io_sync;
902 sd->last_io_u = NULL;
903 if (pipe(sd->pipe) < 0) {
904 td_verror(td, errno);
913 #else /* FIO_HAVE_SPLICE */
915 int fio_spliceio_init(struct thread_data *td)
920 #endif /* FIO_HAVE_SPLICE */