2 * The io parts of the fio tool, includes workers for sync and mmap'ed
3 * io, as well as both posix and linux libaio support.
5 * sync io is implemented on top of aio.
7 * This is not really specific to fio, if the get_io_u/put_io_u and
8 * structures was pulled into this as well it would be a perfectly
9 * generic io engine that could be used for other projects.
22 #ifdef FIO_HAVE_LIBAIO
24 #define ev_to_iou(ev) (struct io_u *) ((unsigned long) (ev)->obj)
26 static int fio_io_sync(struct thread_data *td)
31 static int fill_timespec(struct timespec *ts)
34 if (!clock_gettime(CLOCK_MONOTONIC, ts))
37 perror("clock_gettime");
42 static unsigned long long ts_utime_since_now(struct timespec *t)
47 if (fill_timespec(&now))
50 sec = now.tv_sec - t->tv_sec;
51 nsec = now.tv_nsec - t->tv_nsec;
52 if (sec > 0 && nsec < 0) {
64 struct io_event *aio_events;
67 static int fio_libaio_io_prep(struct thread_data *td, struct io_u *io_u)
69 if (io_u->ddir == DDIR_READ)
70 io_prep_pread(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
72 io_prep_pwrite(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
77 static struct io_u *fio_libaio_event(struct thread_data *td, int event)
79 struct libaio_data *ld = td->io_data;
81 return ev_to_iou(ld->aio_events + event);
84 static int fio_libaio_getevents(struct thread_data *td, int min, int max,
87 struct libaio_data *ld = td->io_data;
91 r = io_getevents(ld->aio_ctx, min, max, ld->aio_events, t);
95 } else if (r == -EINTR)
104 static int fio_libaio_queue(struct thread_data *td, struct io_u *io_u)
106 struct libaio_data *ld = td->io_data;
107 struct iocb *iocb = &io_u->iocb;
111 ret = io_submit(ld->aio_ctx, 1, &iocb);
114 else if (ret == -EAGAIN)
116 else if (ret == -EINTR)
126 static int fio_libaio_cancel(struct thread_data *td, struct io_u *io_u)
128 struct libaio_data *ld = td->io_data;
130 return io_cancel(ld->aio_ctx, &io_u->iocb, ld->aio_events);
133 static void fio_libaio_cleanup(struct thread_data *td)
135 struct libaio_data *ld = td->io_data;
138 io_destroy(ld->aio_ctx);
140 free(ld->aio_events);
147 int fio_libaio_init(struct thread_data *td)
149 struct libaio_data *ld = malloc(sizeof(*ld));
151 memset(ld, 0, sizeof(*ld));
152 if (io_queue_init(td->iodepth, &ld->aio_ctx)) {
153 td_verror(td, errno);
157 td->io_prep = fio_libaio_io_prep;
158 td->io_queue = fio_libaio_queue;
159 td->io_getevents = fio_libaio_getevents;
160 td->io_event = fio_libaio_event;
161 td->io_cancel = fio_libaio_cancel;
162 td->io_cleanup = fio_libaio_cleanup;
163 td->io_sync = fio_io_sync;
165 ld->aio_events = malloc(td->iodepth * sizeof(struct io_event));
170 #else /* FIO_HAVE_LIBAIO */
172 int fio_libaio_init(struct thread_data *td)
177 #endif /* FIO_HAVE_LIBAIO */
179 #ifdef FIO_HAVE_POSIXAIO
181 struct posixaio_data {
182 struct io_u **aio_events;
185 static int fio_posixaio_cancel(struct thread_data *td, struct io_u *io_u)
187 int r = aio_cancel(td->fd, &io_u->aiocb);
189 if (r == 1 || r == AIO_CANCELED)
195 static int fio_posixaio_prep(struct thread_data *td, struct io_u *io_u)
197 struct aiocb *aiocb = &io_u->aiocb;
199 aiocb->aio_fildes = td->fd;
200 aiocb->aio_buf = io_u->buf;
201 aiocb->aio_nbytes = io_u->buflen;
202 aiocb->aio_offset = io_u->offset;
208 static int fio_posixaio_getevents(struct thread_data *td, int min, int max,
211 struct posixaio_data *pd = td->io_data;
212 struct list_head *entry;
213 struct timespec start;
214 int r, have_timeout = 0;
216 if (t && !fill_timespec(&start))
221 list_for_each(entry, &td->io_u_busylist) {
222 struct io_u *io_u = list_entry(entry, struct io_u, list);
228 err = aio_error(&io_u->aiocb);
234 pd->aio_events[r++] = io_u;
249 unsigned long long usec;
251 usec = (t->tv_sec * 1000000) + (t->tv_nsec / 1000);
252 if (ts_utime_since_now(&start) > usec)
257 * hrmpf, we need to wait for more. we should use aio_suspend, for
258 * now just sleep a little and recheck status of busy-and-not-seen
264 static struct io_u *fio_posixaio_event(struct thread_data *td, int event)
266 struct posixaio_data *pd = td->io_data;
268 return pd->aio_events[event];
271 static int fio_posixaio_queue(struct thread_data *td, struct io_u *io_u)
273 struct aiocb *aiocb = &io_u->aiocb;
276 if (io_u->ddir == DDIR_READ)
277 ret = aio_read(aiocb);
279 ret = aio_write(aiocb);
287 static void fio_posixaio_cleanup(struct thread_data *td)
289 struct posixaio_data *pd = td->io_data;
292 free(pd->aio_events);
298 int fio_posixaio_init(struct thread_data *td)
300 struct posixaio_data *pd = malloc(sizeof(*pd));
302 pd->aio_events = malloc(td->iodepth * sizeof(struct io_u *));
304 td->io_prep = fio_posixaio_prep;
305 td->io_queue = fio_posixaio_queue;
306 td->io_getevents = fio_posixaio_getevents;
307 td->io_event = fio_posixaio_event;
308 td->io_cancel = fio_posixaio_cancel;
309 td->io_cleanup = fio_posixaio_cleanup;
310 td->io_sync = fio_io_sync;
316 #else /* FIO_HAVE_POSIXAIO */
318 int fio_posixaio_init(struct thread_data *td)
323 #endif /* FIO_HAVE_POSIXAIO */
326 struct io_u *last_io_u;
329 static int fio_syncio_getevents(struct thread_data *td, int min, int max,
335 * we can only have one finished io_u for sync io, since the depth
338 if (list_empty(&td->io_u_busylist))
344 static struct io_u *fio_syncio_event(struct thread_data *td, int event)
346 struct syncio_data *sd = td->io_data;
350 return sd->last_io_u;
353 static int fio_syncio_prep(struct thread_data *td, struct io_u *io_u)
355 if (td->cur_off != io_u->offset) {
356 if (lseek(td->fd, io_u->offset, SEEK_SET) == -1) {
357 td_verror(td, errno);
365 static int fio_syncio_queue(struct thread_data *td, struct io_u *io_u)
367 struct syncio_data *sd = td->io_data;
370 if (io_u->ddir == DDIR_READ)
371 ret = read(td->fd, io_u->buf, io_u->buflen);
373 ret = write(td->fd, io_u->buf, io_u->buflen);
375 if ((unsigned int) ret != io_u->buflen) {
377 io_u->resid = io_u->buflen - ret;
378 io_u->error = ENODATA;
384 sd->last_io_u = io_u;
389 static void fio_syncio_cleanup(struct thread_data *td)
397 int fio_syncio_init(struct thread_data *td)
399 struct syncio_data *sd = malloc(sizeof(*sd));
401 td->io_prep = fio_syncio_prep;
402 td->io_queue = fio_syncio_queue;
403 td->io_getevents = fio_syncio_getevents;
404 td->io_event = fio_syncio_event;
405 td->io_cancel = NULL;
406 td->io_cleanup = fio_syncio_cleanup;
407 td->io_sync = fio_io_sync;
409 sd->last_io_u = NULL;
414 static int fio_mmapio_queue(struct thread_data *td, struct io_u *io_u)
416 unsigned long long real_off = io_u->offset - td->file_offset;
417 struct syncio_data *sd = td->io_data;
419 if (io_u->ddir == DDIR_READ)
420 memcpy(io_u->buf, td->mmap + real_off, io_u->buflen);
422 memcpy(td->mmap + real_off, io_u->buf, io_u->buflen);
425 * not really direct, but should drop the pages from the cache
428 if (msync(td->mmap + real_off, io_u->buflen, MS_SYNC) < 0)
430 if (madvise(td->mmap + real_off, io_u->buflen, MADV_DONTNEED) < 0)
435 sd->last_io_u = io_u;
440 static int fio_mmapio_sync(struct thread_data *td)
442 return msync(td->mmap, td->file_size, MS_SYNC);
445 int fio_mmapio_init(struct thread_data *td)
447 struct syncio_data *sd = malloc(sizeof(*sd));
450 td->io_queue = fio_mmapio_queue;
451 td->io_getevents = fio_syncio_getevents;
452 td->io_event = fio_syncio_event;
453 td->io_cancel = NULL;
454 td->io_cleanup = fio_syncio_cleanup;
455 td->io_sync = fio_mmapio_sync;
457 sd->last_io_u = NULL;
465 struct io_u *last_io_u;
466 unsigned char cdb[10];
470 static inline void sgio_hdr_init(struct sgio_data *sd, struct sg_io_hdr *hdr,
473 memset(hdr, 0, sizeof(*hdr));
474 memset(sd->cdb, 0, sizeof(sd->cdb));
476 hdr->interface_id = 'S';
478 hdr->cmd_len = sizeof(sd->cdb);
481 hdr->dxferp = io_u->buf;
482 hdr->dxfer_len = io_u->buflen;
486 static int fio_sgio_sync(struct thread_data *td)
488 struct sgio_data *sd = td->io_data;
489 struct sg_io_hdr hdr;
491 sgio_hdr_init(sd, &hdr, NULL);
492 hdr.dxfer_direction = SG_DXFER_NONE;
496 return ioctl(td->fd, SG_IO, &hdr);
499 static int fio_sgio_prep(struct thread_data *td, struct io_u *io_u)
501 struct sg_io_hdr *hdr = &io_u->hdr;
502 struct sgio_data *sd = td->io_data;
505 if (io_u->buflen & (sd->bs - 1)) {
506 fprintf(stderr, "read/write not sector aligned\n");
510 sgio_hdr_init(sd, hdr, io_u);
512 if (io_u->ddir == DDIR_READ) {
513 hdr->dxfer_direction = SG_DXFER_FROM_DEV;
516 hdr->dxfer_direction = SG_DXFER_TO_DEV;
520 nr_blocks = io_u->buflen / sd->bs;
521 lba = io_u->offset / sd->bs;
522 hdr->cmdp[2] = (lba >> 24) & 0xff;
523 hdr->cmdp[3] = (lba >> 16) & 0xff;
524 hdr->cmdp[4] = (lba >> 8) & 0xff;
525 hdr->cmdp[5] = lba & 0xff;
526 hdr->cmdp[7] = (nr_blocks >> 8) & 0xff;
527 hdr->cmdp[8] = nr_blocks & 0xff;
531 static int fio_sgio_queue(struct thread_data *td, struct io_u *io_u)
533 struct sg_io_hdr *hdr = &io_u->hdr;
534 struct sgio_data *sd = td->io_data;
537 ret = ioctl(td->fd, SG_IO, hdr);
540 else if (hdr->status) {
541 io_u->resid = hdr->resid;
546 sd->last_io_u = io_u;
551 static struct io_u *fio_sgio_event(struct thread_data *td, int event)
553 struct sgio_data *sd = td->io_data;
557 return sd->last_io_u;
560 int fio_sgio_init(struct thread_data *td)
562 struct sgio_data *sd;
565 if (td->filetype != FIO_TYPE_BD) {
566 fprintf(stderr, "ioengine sgio only works on block devices\n");
570 if (ioctl(td->fd, BLKSSZGET, &bs) < 0) {
571 td_verror(td, errno);
575 sd = malloc(sizeof(*sd));
578 td->io_prep = fio_sgio_prep;
579 td->io_queue = fio_sgio_queue;
580 td->io_getevents = fio_syncio_getevents;
581 td->io_event = fio_sgio_event;
582 td->io_cancel = NULL;
583 td->io_cleanup = fio_syncio_cleanup;
584 td->io_sync = fio_sgio_sync;
587 * we want to do it, regardless of whether odirect is set or not
589 td->override_sync = 1;
591 sd->last_io_u = NULL;
596 #else /* FIO_HAVE_SGIO */
598 int fio_sgio_init(struct thread_data *td)
603 #endif /* FIO_HAVE_SGIO */