[PATCH] Update version string, fix offset bug
[fio.git] / fio-io.c
CommitLineData
ebac4655
JA
1/*
2 * The io parts of the fio tool, includes workers for sync and mmap'ed
3 * io, as well as both posix and linux libaio support.
4 *
5 * sync io is implemented on top of aio.
6 *
7 * This is not really specific to fio, if the get_io_u/put_io_u and
8 * structures was pulled into this as well it would be a perfectly
9 * generic io engine that could be used for other projects.
10 *
11 */
12#include <stdio.h>
13#include <stdlib.h>
14#include <unistd.h>
15#include <errno.h>
16#include <assert.h>
17#include <time.h>
18#include <sys/mman.h>
b1ff3403 19#include <sys/poll.h>
ebac4655
JA
20#include "fio.h"
21#include "os.h"
22
23#ifdef FIO_HAVE_LIBAIO
24
25#define ev_to_iou(ev) (struct io_u *) ((unsigned long) (ev)->obj)
26
27static int fio_io_sync(struct thread_data *td)
28{
29 return fsync(td->fd);
30}
31
32static int fill_timespec(struct timespec *ts)
33{
34#ifdef _POSIX_TIMERS
35 if (!clock_gettime(CLOCK_MONOTONIC, ts))
36 return 0;
37
38 perror("clock_gettime");
39#endif
40 return 1;
41}
42
43static unsigned long long ts_utime_since_now(struct timespec *t)
44{
45 long long sec, nsec;
46 struct timespec now;
47
48 if (fill_timespec(&now))
49 return 0;
50
51 sec = now.tv_sec - t->tv_sec;
52 nsec = now.tv_nsec - t->tv_nsec;
53 if (sec > 0 && nsec < 0) {
54 sec--;
55 nsec += 1000000000;
56 }
57
58 sec *= 1000000;
59 nsec /= 1000;
60 return sec + nsec;
61}
62
63struct libaio_data {
64 io_context_t aio_ctx;
65 struct io_event *aio_events;
66};
67
68static int fio_libaio_io_prep(struct thread_data *td, struct io_u *io_u)
69{
70 if (io_u->ddir == DDIR_READ)
71 io_prep_pread(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
72 else
73 io_prep_pwrite(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
74
75 return 0;
76}
77
78static struct io_u *fio_libaio_event(struct thread_data *td, int event)
79{
80 struct libaio_data *ld = td->io_data;
81
82 return ev_to_iou(ld->aio_events + event);
83}
84
85static int fio_libaio_getevents(struct thread_data *td, int min, int max,
86 struct timespec *t)
87{
88 struct libaio_data *ld = td->io_data;
89 int r;
90
91 do {
92 r = io_getevents(ld->aio_ctx, min, max, ld->aio_events, t);
93 if (r == -EAGAIN) {
94 usleep(100);
95 continue;
96 } else if (r == -EINTR)
97 continue;
98 else
99 break;
100 } while (1);
101
102 return r;
103}
104
105static int fio_libaio_queue(struct thread_data *td, struct io_u *io_u)
106{
107 struct libaio_data *ld = td->io_data;
108 struct iocb *iocb = &io_u->iocb;
109 int ret;
110
111 do {
112 ret = io_submit(ld->aio_ctx, 1, &iocb);
113 if (ret == 1)
114 return 0;
115 else if (ret == -EAGAIN)
116 usleep(100);
117 else if (ret == -EINTR)
118 continue;
119 else
120 break;
121 } while (1);
122
123 return ret;
124
125}
126
127static int fio_libaio_cancel(struct thread_data *td, struct io_u *io_u)
128{
129 struct libaio_data *ld = td->io_data;
130
131 return io_cancel(ld->aio_ctx, &io_u->iocb, ld->aio_events);
132}
133
134static void fio_libaio_cleanup(struct thread_data *td)
135{
136 struct libaio_data *ld = td->io_data;
137
138 if (ld) {
139 io_destroy(ld->aio_ctx);
140 if (ld->aio_events)
141 free(ld->aio_events);
142
143 free(ld);
144 td->io_data = NULL;
145 }
146}
147
148int fio_libaio_init(struct thread_data *td)
149{
150 struct libaio_data *ld = malloc(sizeof(*ld));
151
152 memset(ld, 0, sizeof(*ld));
153 if (io_queue_init(td->iodepth, &ld->aio_ctx)) {
154 td_verror(td, errno);
155 return 1;
156 }
157
158 td->io_prep = fio_libaio_io_prep;
159 td->io_queue = fio_libaio_queue;
160 td->io_getevents = fio_libaio_getevents;
161 td->io_event = fio_libaio_event;
162 td->io_cancel = fio_libaio_cancel;
163 td->io_cleanup = fio_libaio_cleanup;
164 td->io_sync = fio_io_sync;
165
166 ld->aio_events = malloc(td->iodepth * sizeof(struct io_event));
167 td->io_data = ld;
168 return 0;
169}
170
171#else /* FIO_HAVE_LIBAIO */
172
173int fio_libaio_init(struct thread_data *td)
174{
175 return EINVAL;
176}
177
178#endif /* FIO_HAVE_LIBAIO */
179
180#ifdef FIO_HAVE_POSIXAIO
181
182struct posixaio_data {
183 struct io_u **aio_events;
184};
185
186static int fio_posixaio_cancel(struct thread_data *td, struct io_u *io_u)
187{
188 int r = aio_cancel(td->fd, &io_u->aiocb);
189
190 if (r == 1 || r == AIO_CANCELED)
191 return 0;
192
193 return 1;
194}
195
196static int fio_posixaio_prep(struct thread_data *td, struct io_u *io_u)
197{
198 struct aiocb *aiocb = &io_u->aiocb;
199
200 aiocb->aio_fildes = td->fd;
201 aiocb->aio_buf = io_u->buf;
202 aiocb->aio_nbytes = io_u->buflen;
203 aiocb->aio_offset = io_u->offset;
204
205 io_u->seen = 0;
206 return 0;
207}
208
209static int fio_posixaio_getevents(struct thread_data *td, int min, int max,
210 struct timespec *t)
211{
212 struct posixaio_data *pd = td->io_data;
213 struct list_head *entry;
214 struct timespec start;
215 int r, have_timeout = 0;
216
217 if (t && !fill_timespec(&start))
218 have_timeout = 1;
219
220 r = 0;
221restart:
222 list_for_each(entry, &td->io_u_busylist) {
223 struct io_u *io_u = list_entry(entry, struct io_u, list);
224 int err;
225
226 if (io_u->seen)
227 continue;
228
229 err = aio_error(&io_u->aiocb);
230 switch (err) {
231 default:
232 io_u->error = err;
233 case ECANCELED:
234 case 0:
235 pd->aio_events[r++] = io_u;
236 io_u->seen = 1;
237 break;
238 case EINPROGRESS:
239 break;
240 }
241
242 if (r >= max)
243 break;
244 }
245
246 if (r >= min)
247 return r;
248
249 if (have_timeout) {
250 unsigned long long usec;
251
252 usec = (t->tv_sec * 1000000) + (t->tv_nsec / 1000);
253 if (ts_utime_since_now(&start) > usec)
254 return r;
255 }
256
257 /*
258 * hrmpf, we need to wait for more. we should use aio_suspend, for
259 * now just sleep a little and recheck status of busy-and-not-seen
260 */
261 usleep(1000);
262 goto restart;
263}
264
265static struct io_u *fio_posixaio_event(struct thread_data *td, int event)
266{
267 struct posixaio_data *pd = td->io_data;
268
269 return pd->aio_events[event];
270}
271
272static int fio_posixaio_queue(struct thread_data *td, struct io_u *io_u)
273{
274 struct aiocb *aiocb = &io_u->aiocb;
275 int ret;
276
277 if (io_u->ddir == DDIR_READ)
278 ret = aio_read(aiocb);
279 else
280 ret = aio_write(aiocb);
281
282 if (ret)
283 io_u->error = errno;
284
285 return io_u->error;
286}
287
288static void fio_posixaio_cleanup(struct thread_data *td)
289{
290 struct posixaio_data *pd = td->io_data;
291
292 if (pd) {
293 free(pd->aio_events);
294 free(pd);
295 td->io_data = NULL;
296 }
297}
298
299int fio_posixaio_init(struct thread_data *td)
300{
301 struct posixaio_data *pd = malloc(sizeof(*pd));
302
303 pd->aio_events = malloc(td->iodepth * sizeof(struct io_u *));
304
305 td->io_prep = fio_posixaio_prep;
306 td->io_queue = fio_posixaio_queue;
307 td->io_getevents = fio_posixaio_getevents;
308 td->io_event = fio_posixaio_event;
309 td->io_cancel = fio_posixaio_cancel;
310 td->io_cleanup = fio_posixaio_cleanup;
311 td->io_sync = fio_io_sync;
312
313 td->io_data = pd;
314 return 0;
315}
316
317#else /* FIO_HAVE_POSIXAIO */
318
319int fio_posixaio_init(struct thread_data *td)
320{
321 return EINVAL;
322}
323
324#endif /* FIO_HAVE_POSIXAIO */
325
326struct syncio_data {
327 struct io_u *last_io_u;
328};
329
330static int fio_syncio_getevents(struct thread_data *td, int min, int max,
331 struct timespec *t)
332{
333 assert(max <= 1);
334
335 /*
336 * we can only have one finished io_u for sync io, since the depth
337 * is always 1
338 */
339 if (list_empty(&td->io_u_busylist))
340 return 0;
341
342 return 1;
343}
344
345static struct io_u *fio_syncio_event(struct thread_data *td, int event)
346{
347 struct syncio_data *sd = td->io_data;
348
349 assert(event == 0);
350
351 return sd->last_io_u;
352}
353
354static int fio_syncio_prep(struct thread_data *td, struct io_u *io_u)
355{
77cf8455
JA
356 if (lseek(td->fd, io_u->offset, SEEK_SET) == -1) {
357 td_verror(td, errno);
358 return 1;
ebac4655
JA
359 }
360
361 return 0;
362}
363
364static int fio_syncio_queue(struct thread_data *td, struct io_u *io_u)
365{
366 struct syncio_data *sd = td->io_data;
367 int ret;
368
369 if (io_u->ddir == DDIR_READ)
370 ret = read(td->fd, io_u->buf, io_u->buflen);
371 else
372 ret = write(td->fd, io_u->buf, io_u->buflen);
373
374 if ((unsigned int) ret != io_u->buflen) {
375 if (ret > 0) {
376 io_u->resid = io_u->buflen - ret;
377 io_u->error = ENODATA;
378 } else
379 io_u->error = errno;
380 }
381
382 if (!io_u->error)
383 sd->last_io_u = io_u;
384
385 return io_u->error;
386}
387
388static void fio_syncio_cleanup(struct thread_data *td)
389{
390 if (td->io_data) {
391 free(td->io_data);
392 td->io_data = NULL;
393 }
394}
395
396int fio_syncio_init(struct thread_data *td)
397{
398 struct syncio_data *sd = malloc(sizeof(*sd));
399
400 td->io_prep = fio_syncio_prep;
401 td->io_queue = fio_syncio_queue;
402 td->io_getevents = fio_syncio_getevents;
403 td->io_event = fio_syncio_event;
404 td->io_cancel = NULL;
405 td->io_cleanup = fio_syncio_cleanup;
406 td->io_sync = fio_io_sync;
407
408 sd->last_io_u = NULL;
409 td->io_data = sd;
410 return 0;
411}
412
413static int fio_mmapio_queue(struct thread_data *td, struct io_u *io_u)
414{
415 unsigned long long real_off = io_u->offset - td->file_offset;
416 struct syncio_data *sd = td->io_data;
417
418 if (io_u->ddir == DDIR_READ)
419 memcpy(io_u->buf, td->mmap + real_off, io_u->buflen);
420 else
421 memcpy(td->mmap + real_off, io_u->buf, io_u->buflen);
422
423 /*
424 * not really direct, but should drop the pages from the cache
425 */
426 if (td->odirect) {
427 if (msync(td->mmap + real_off, io_u->buflen, MS_SYNC) < 0)
428 io_u->error = errno;
429 if (madvise(td->mmap + real_off, io_u->buflen, MADV_DONTNEED) < 0)
430 io_u->error = errno;
431 }
432
433 if (!io_u->error)
434 sd->last_io_u = io_u;
435
436 return io_u->error;
437}
438
439static int fio_mmapio_sync(struct thread_data *td)
440{
441 return msync(td->mmap, td->file_size, MS_SYNC);
442}
443
444int fio_mmapio_init(struct thread_data *td)
445{
446 struct syncio_data *sd = malloc(sizeof(*sd));
447
448 td->io_prep = NULL;
449 td->io_queue = fio_mmapio_queue;
450 td->io_getevents = fio_syncio_getevents;
451 td->io_event = fio_syncio_event;
452 td->io_cancel = NULL;
453 td->io_cleanup = fio_syncio_cleanup;
454 td->io_sync = fio_mmapio_sync;
455
456 sd->last_io_u = NULL;
457 td->io_data = sd;
458 return 0;
459}
460
461#ifdef FIO_HAVE_SGIO
462
b1ff3403 463struct sgio_cmd {
5f2b7b5e 464 unsigned char cdb[10];
b1ff3403
JA
465 int nr;
466};
467
ebac4655 468struct sgio_data {
b1ff3403
JA
469 struct sgio_cmd *cmds;
470 struct io_u **events;
ebac4655
JA
471 unsigned int bs;
472};
473
b1ff3403
JA
474static void sgio_hdr_init(struct sgio_data *sd, struct sg_io_hdr *hdr,
475 struct io_u *io_u, int fs)
ebac4655 476{
b1ff3403
JA
477 struct sgio_cmd *sc = &sd->cmds[io_u->index];
478
ebac4655 479 memset(hdr, 0, sizeof(*hdr));
b1ff3403 480 memset(sc->cdb, 0, sizeof(sc->cdb));
ebac4655
JA
481
482 hdr->interface_id = 'S';
b1ff3403
JA
483 hdr->cmdp = sc->cdb;
484 hdr->cmd_len = sizeof(sc->cdb);
485 hdr->pack_id = io_u->index;
486 hdr->usr_ptr = io_u;
ebac4655 487
b1ff3403 488 if (fs) {
ebac4655
JA
489 hdr->dxferp = io_u->buf;
490 hdr->dxfer_len = io_u->buflen;
491 }
492}
493
b1ff3403
JA
494static int fio_sgio_getevents(struct thread_data *td, int min, int max,
495 struct timespec *t)
496{
497 struct sgio_data *sd = td->io_data;
498 struct pollfd pfd = { .fd = td->fd, .events = POLLIN };
499 void *buf = malloc(max * sizeof(struct sg_io_hdr));
5f2b7b5e 500 int left = max, ret, events, i, r = 0, fl = 0;
b1ff3403
JA
501
502 /*
503 * don't block for !events
504 */
505 if (!min) {
506 fl = fcntl(td->fd, F_GETFL);
507 fcntl(td->fd, F_SETFL, fl | O_NONBLOCK);
508 }
509
510 while (left) {
511 do {
512 if (!min)
513 break;
514 poll(&pfd, 1, -1);
515 if (pfd.revents & POLLIN)
516 break;
517 } while (1);
518
519 ret = read(td->fd, buf, left * sizeof(struct sg_io_hdr));
520 if (ret < 0) {
521 if (errno == EAGAIN)
522 break;
523 td_verror(td, errno);
524 r = -1;
525 break;
526 } else if (!ret)
527 break;
528
529 events = ret / sizeof(struct sg_io_hdr);
530 left -= events;
531 r += events;
532
533 for (i = 0; i < events; i++) {
534 struct sg_io_hdr *hdr = (struct sg_io_hdr *) buf + i;
535
536 sd->events[i] = hdr->usr_ptr;
537 }
538 }
539
540 if (!min)
541 fcntl(td->fd, F_SETFL, fl);
542
543 free(buf);
544 return r;
545}
546
547static int fio_sgio_doio(struct thread_data *td, struct io_u *io_u, int sync)
0af7b542 548{
b1ff3403
JA
549 struct sgio_data *sd = td->io_data;
550 struct sg_io_hdr *hdr = &io_u->hdr;
0af7b542
JA
551 int ret;
552
b1ff3403
JA
553 if (td->filetype == FIO_TYPE_BD) {
554 sd->events[0] = io_u;
555 return ioctl(td->fd, SG_IO, hdr);
556 }
0af7b542
JA
557
558 ret = write(td->fd, hdr, sizeof(*hdr));
559 if (ret < 0)
560 return errno;
561
b1ff3403
JA
562 if (sync) {
563 ret = read(td->fd, hdr, sizeof(*hdr));
564 if (ret < 0)
565 return errno;
566 }
0af7b542
JA
567
568 return 0;
569}
570
ebac4655
JA
571static int fio_sgio_sync(struct thread_data *td)
572{
573 struct sgio_data *sd = td->io_data;
b1ff3403
JA
574 struct sg_io_hdr *hdr;
575 struct io_u *io_u;
576 int ret;
ebac4655 577
b1ff3403
JA
578 io_u = __get_io_u(td);
579 if (!io_u)
580 return ENOMEM;
ebac4655 581
b1ff3403
JA
582 hdr = &io_u->hdr;
583 sgio_hdr_init(sd, hdr, io_u, 0);
584 hdr->dxfer_direction = SG_DXFER_NONE;
ebac4655 585
b1ff3403
JA
586 hdr->cmdp[0] = 0x35;
587
588 ret = fio_sgio_doio(td, io_u, 1);
589 put_io_u(td, io_u);
590 return ret;
ebac4655
JA
591}
592
593static int fio_sgio_prep(struct thread_data *td, struct io_u *io_u)
594{
595 struct sg_io_hdr *hdr = &io_u->hdr;
596 struct sgio_data *sd = td->io_data;
597 int nr_blocks, lba;
598
599 if (io_u->buflen & (sd->bs - 1)) {
600 fprintf(stderr, "read/write not sector aligned\n");
601 return EINVAL;
602 }
603
b1ff3403 604 sgio_hdr_init(sd, hdr, io_u, 1);
ebac4655
JA
605
606 if (io_u->ddir == DDIR_READ) {
607 hdr->dxfer_direction = SG_DXFER_FROM_DEV;
608 hdr->cmdp[0] = 0x28;
609 } else {
610 hdr->dxfer_direction = SG_DXFER_TO_DEV;
611 hdr->cmdp[0] = 0x2a;
612 }
613
614 nr_blocks = io_u->buflen / sd->bs;
615 lba = io_u->offset / sd->bs;
616 hdr->cmdp[2] = (lba >> 24) & 0xff;
617 hdr->cmdp[3] = (lba >> 16) & 0xff;
618 hdr->cmdp[4] = (lba >> 8) & 0xff;
619 hdr->cmdp[5] = lba & 0xff;
620 hdr->cmdp[7] = (nr_blocks >> 8) & 0xff;
621 hdr->cmdp[8] = nr_blocks & 0xff;
622 return 0;
623}
624
625static int fio_sgio_queue(struct thread_data *td, struct io_u *io_u)
626{
627 struct sg_io_hdr *hdr = &io_u->hdr;
ebac4655
JA
628 int ret;
629
b1ff3403 630 ret = fio_sgio_doio(td, io_u, 0);
0af7b542 631
ebac4655
JA
632 if (ret < 0)
633 io_u->error = errno;
634 else if (hdr->status) {
635 io_u->resid = hdr->resid;
636 io_u->error = EIO;
637 }
638
ebac4655
JA
639 return io_u->error;
640}
641
642static struct io_u *fio_sgio_event(struct thread_data *td, int event)
643{
644 struct sgio_data *sd = td->io_data;
645
b1ff3403 646 return sd->events[event];
ebac4655
JA
647}
648
0af7b542
JA
649static int fio_sgio_get_bs(struct thread_data *td, unsigned int *bs)
650{
651 struct sgio_data *sd = td->io_data;
b1ff3403
JA
652 struct io_u *io_u;
653 struct sg_io_hdr *hdr;
0af7b542
JA
654 unsigned char buf[8];
655 int ret;
656
b1ff3403
JA
657 io_u = __get_io_u(td);
658 assert(io_u);
659
660 hdr = &io_u->hdr;
661 sgio_hdr_init(sd, hdr, io_u, 0);
0af7b542
JA
662 memset(buf, 0, sizeof(buf));
663
b1ff3403
JA
664 hdr->cmdp[0] = 0x25;
665 hdr->dxfer_direction = SG_DXFER_FROM_DEV;
666 hdr->dxferp = buf;
667 hdr->dxfer_len = sizeof(buf);
0af7b542 668
b1ff3403
JA
669 ret = fio_sgio_doio(td, io_u, 1);
670 if (ret) {
671 put_io_u(td, io_u);
0af7b542 672 return ret;
b1ff3403 673 }
0af7b542
JA
674
675 *bs = (buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | buf[7];
b1ff3403 676 put_io_u(td, io_u);
0af7b542
JA
677 return 0;
678}
679
ebac4655
JA
680int fio_sgio_init(struct thread_data *td)
681{
682 struct sgio_data *sd;
0af7b542
JA
683 unsigned int bs;
684 int ret;
ebac4655 685
0af7b542 686 sd = malloc(sizeof(*sd));
b1ff3403
JA
687 sd->cmds = malloc(td->iodepth * sizeof(struct sgio_cmd));
688 sd->events = malloc(td->iodepth * sizeof(struct io_u *));
0af7b542 689 td->io_data = sd;
ebac4655 690
0af7b542
JA
691 if (td->filetype == FIO_TYPE_BD) {
692 if (ioctl(td->fd, BLKSSZGET, &bs) < 0) {
693 td_verror(td, errno);
694 return 1;
695 }
696 } else if (td->filetype == FIO_TYPE_CHAR) {
697 int version;
698
699 if (ioctl(td->fd, SG_GET_VERSION_NUM, &version) < 0) {
700 td_verror(td, errno);
701 return 1;
702 }
703
704 ret = fio_sgio_get_bs(td, &bs);
705 if (ret)
706 return ret;
707 } else {
708 fprintf(stderr, "ioengine sgio only works on block devices\n");
ebac4655
JA
709 return 1;
710 }
711
ebac4655
JA
712 sd->bs = bs;
713
714 td->io_prep = fio_sgio_prep;
715 td->io_queue = fio_sgio_queue;
b1ff3403
JA
716
717 if (td->filetype == FIO_TYPE_BD)
718 td->io_getevents = fio_syncio_getevents;
719 else
720 td->io_getevents = fio_sgio_getevents;
721
ebac4655
JA
722 td->io_event = fio_sgio_event;
723 td->io_cancel = NULL;
724 td->io_cleanup = fio_syncio_cleanup;
725 td->io_sync = fio_sgio_sync;
726
727 /*
728 * we want to do it, regardless of whether odirect is set or not
729 */
730 td->override_sync = 1;
ebac4655
JA
731 return 0;
732}
733
734#else /* FIO_HAVE_SGIO */
735
736int fio_sgio_init(struct thread_data *td)
737{
738 return EINVAL;
739}
740
741#endif /* FIO_HAVE_SGIO */