Move fio to seperate repo
[fio.git] / fio-io.c
CommitLineData
ebac4655
JA
1/*
2 * The io parts of the fio tool, includes workers for sync and mmap'ed
3 * io, as well as both posix and linux libaio support.
4 *
5 * sync io is implemented on top of aio.
6 *
7 * This is not really specific to fio, if the get_io_u/put_io_u and
8 * structures was pulled into this as well it would be a perfectly
9 * generic io engine that could be used for other projects.
10 *
11 */
12#include <stdio.h>
13#include <stdlib.h>
14#include <unistd.h>
15#include <errno.h>
16#include <assert.h>
17#include <time.h>
18#include <sys/mman.h>
19#include "fio.h"
20#include "os.h"
21
22#ifdef FIO_HAVE_LIBAIO
23
24#define ev_to_iou(ev) (struct io_u *) ((unsigned long) (ev)->obj)
25
26static int fio_io_sync(struct thread_data *td)
27{
28 return fsync(td->fd);
29}
30
31static int fill_timespec(struct timespec *ts)
32{
33#ifdef _POSIX_TIMERS
34 if (!clock_gettime(CLOCK_MONOTONIC, ts))
35 return 0;
36
37 perror("clock_gettime");
38#endif
39 return 1;
40}
41
42static unsigned long long ts_utime_since_now(struct timespec *t)
43{
44 long long sec, nsec;
45 struct timespec now;
46
47 if (fill_timespec(&now))
48 return 0;
49
50 sec = now.tv_sec - t->tv_sec;
51 nsec = now.tv_nsec - t->tv_nsec;
52 if (sec > 0 && nsec < 0) {
53 sec--;
54 nsec += 1000000000;
55 }
56
57 sec *= 1000000;
58 nsec /= 1000;
59 return sec + nsec;
60}
61
62struct libaio_data {
63 io_context_t aio_ctx;
64 struct io_event *aio_events;
65};
66
67static int fio_libaio_io_prep(struct thread_data *td, struct io_u *io_u)
68{
69 if (io_u->ddir == DDIR_READ)
70 io_prep_pread(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
71 else
72 io_prep_pwrite(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
73
74 return 0;
75}
76
77static struct io_u *fio_libaio_event(struct thread_data *td, int event)
78{
79 struct libaio_data *ld = td->io_data;
80
81 return ev_to_iou(ld->aio_events + event);
82}
83
84static int fio_libaio_getevents(struct thread_data *td, int min, int max,
85 struct timespec *t)
86{
87 struct libaio_data *ld = td->io_data;
88 int r;
89
90 do {
91 r = io_getevents(ld->aio_ctx, min, max, ld->aio_events, t);
92 if (r == -EAGAIN) {
93 usleep(100);
94 continue;
95 } else if (r == -EINTR)
96 continue;
97 else
98 break;
99 } while (1);
100
101 return r;
102}
103
104static int fio_libaio_queue(struct thread_data *td, struct io_u *io_u)
105{
106 struct libaio_data *ld = td->io_data;
107 struct iocb *iocb = &io_u->iocb;
108 int ret;
109
110 do {
111 ret = io_submit(ld->aio_ctx, 1, &iocb);
112 if (ret == 1)
113 return 0;
114 else if (ret == -EAGAIN)
115 usleep(100);
116 else if (ret == -EINTR)
117 continue;
118 else
119 break;
120 } while (1);
121
122 return ret;
123
124}
125
126static int fio_libaio_cancel(struct thread_data *td, struct io_u *io_u)
127{
128 struct libaio_data *ld = td->io_data;
129
130 return io_cancel(ld->aio_ctx, &io_u->iocb, ld->aio_events);
131}
132
133static void fio_libaio_cleanup(struct thread_data *td)
134{
135 struct libaio_data *ld = td->io_data;
136
137 if (ld) {
138 io_destroy(ld->aio_ctx);
139 if (ld->aio_events)
140 free(ld->aio_events);
141
142 free(ld);
143 td->io_data = NULL;
144 }
145}
146
147int fio_libaio_init(struct thread_data *td)
148{
149 struct libaio_data *ld = malloc(sizeof(*ld));
150
151 memset(ld, 0, sizeof(*ld));
152 if (io_queue_init(td->iodepth, &ld->aio_ctx)) {
153 td_verror(td, errno);
154 return 1;
155 }
156
157 td->io_prep = fio_libaio_io_prep;
158 td->io_queue = fio_libaio_queue;
159 td->io_getevents = fio_libaio_getevents;
160 td->io_event = fio_libaio_event;
161 td->io_cancel = fio_libaio_cancel;
162 td->io_cleanup = fio_libaio_cleanup;
163 td->io_sync = fio_io_sync;
164
165 ld->aio_events = malloc(td->iodepth * sizeof(struct io_event));
166 td->io_data = ld;
167 return 0;
168}
169
170#else /* FIO_HAVE_LIBAIO */
171
172int fio_libaio_init(struct thread_data *td)
173{
174 return EINVAL;
175}
176
177#endif /* FIO_HAVE_LIBAIO */
178
179#ifdef FIO_HAVE_POSIXAIO
180
181struct posixaio_data {
182 struct io_u **aio_events;
183};
184
185static int fio_posixaio_cancel(struct thread_data *td, struct io_u *io_u)
186{
187 int r = aio_cancel(td->fd, &io_u->aiocb);
188
189 if (r == 1 || r == AIO_CANCELED)
190 return 0;
191
192 return 1;
193}
194
195static int fio_posixaio_prep(struct thread_data *td, struct io_u *io_u)
196{
197 struct aiocb *aiocb = &io_u->aiocb;
198
199 aiocb->aio_fildes = td->fd;
200 aiocb->aio_buf = io_u->buf;
201 aiocb->aio_nbytes = io_u->buflen;
202 aiocb->aio_offset = io_u->offset;
203
204 io_u->seen = 0;
205 return 0;
206}
207
208static int fio_posixaio_getevents(struct thread_data *td, int min, int max,
209 struct timespec *t)
210{
211 struct posixaio_data *pd = td->io_data;
212 struct list_head *entry;
213 struct timespec start;
214 int r, have_timeout = 0;
215
216 if (t && !fill_timespec(&start))
217 have_timeout = 1;
218
219 r = 0;
220restart:
221 list_for_each(entry, &td->io_u_busylist) {
222 struct io_u *io_u = list_entry(entry, struct io_u, list);
223 int err;
224
225 if (io_u->seen)
226 continue;
227
228 err = aio_error(&io_u->aiocb);
229 switch (err) {
230 default:
231 io_u->error = err;
232 case ECANCELED:
233 case 0:
234 pd->aio_events[r++] = io_u;
235 io_u->seen = 1;
236 break;
237 case EINPROGRESS:
238 break;
239 }
240
241 if (r >= max)
242 break;
243 }
244
245 if (r >= min)
246 return r;
247
248 if (have_timeout) {
249 unsigned long long usec;
250
251 usec = (t->tv_sec * 1000000) + (t->tv_nsec / 1000);
252 if (ts_utime_since_now(&start) > usec)
253 return r;
254 }
255
256 /*
257 * hrmpf, we need to wait for more. we should use aio_suspend, for
258 * now just sleep a little and recheck status of busy-and-not-seen
259 */
260 usleep(1000);
261 goto restart;
262}
263
264static struct io_u *fio_posixaio_event(struct thread_data *td, int event)
265{
266 struct posixaio_data *pd = td->io_data;
267
268 return pd->aio_events[event];
269}
270
271static int fio_posixaio_queue(struct thread_data *td, struct io_u *io_u)
272{
273 struct aiocb *aiocb = &io_u->aiocb;
274 int ret;
275
276 if (io_u->ddir == DDIR_READ)
277 ret = aio_read(aiocb);
278 else
279 ret = aio_write(aiocb);
280
281 if (ret)
282 io_u->error = errno;
283
284 return io_u->error;
285}
286
287static void fio_posixaio_cleanup(struct thread_data *td)
288{
289 struct posixaio_data *pd = td->io_data;
290
291 if (pd) {
292 free(pd->aio_events);
293 free(pd);
294 td->io_data = NULL;
295 }
296}
297
298int fio_posixaio_init(struct thread_data *td)
299{
300 struct posixaio_data *pd = malloc(sizeof(*pd));
301
302 pd->aio_events = malloc(td->iodepth * sizeof(struct io_u *));
303
304 td->io_prep = fio_posixaio_prep;
305 td->io_queue = fio_posixaio_queue;
306 td->io_getevents = fio_posixaio_getevents;
307 td->io_event = fio_posixaio_event;
308 td->io_cancel = fio_posixaio_cancel;
309 td->io_cleanup = fio_posixaio_cleanup;
310 td->io_sync = fio_io_sync;
311
312 td->io_data = pd;
313 return 0;
314}
315
316#else /* FIO_HAVE_POSIXAIO */
317
318int fio_posixaio_init(struct thread_data *td)
319{
320 return EINVAL;
321}
322
323#endif /* FIO_HAVE_POSIXAIO */
324
325struct syncio_data {
326 struct io_u *last_io_u;
327};
328
329static int fio_syncio_getevents(struct thread_data *td, int min, int max,
330 struct timespec *t)
331{
332 assert(max <= 1);
333
334 /*
335 * we can only have one finished io_u for sync io, since the depth
336 * is always 1
337 */
338 if (list_empty(&td->io_u_busylist))
339 return 0;
340
341 return 1;
342}
343
344static struct io_u *fio_syncio_event(struct thread_data *td, int event)
345{
346 struct syncio_data *sd = td->io_data;
347
348 assert(event == 0);
349
350 return sd->last_io_u;
351}
352
353static int fio_syncio_prep(struct thread_data *td, struct io_u *io_u)
354{
355 if (td->cur_off != io_u->offset) {
356 if (lseek(td->fd, io_u->offset, SEEK_SET) == -1) {
357 td_verror(td, errno);
358 return 1;
359 }
360 }
361
362 return 0;
363}
364
365static int fio_syncio_queue(struct thread_data *td, struct io_u *io_u)
366{
367 struct syncio_data *sd = td->io_data;
368 int ret;
369
370 if (io_u->ddir == DDIR_READ)
371 ret = read(td->fd, io_u->buf, io_u->buflen);
372 else
373 ret = write(td->fd, io_u->buf, io_u->buflen);
374
375 if ((unsigned int) ret != io_u->buflen) {
376 if (ret > 0) {
377 io_u->resid = io_u->buflen - ret;
378 io_u->error = ENODATA;
379 } else
380 io_u->error = errno;
381 }
382
383 if (!io_u->error)
384 sd->last_io_u = io_u;
385
386 return io_u->error;
387}
388
389static void fio_syncio_cleanup(struct thread_data *td)
390{
391 if (td->io_data) {
392 free(td->io_data);
393 td->io_data = NULL;
394 }
395}
396
397int fio_syncio_init(struct thread_data *td)
398{
399 struct syncio_data *sd = malloc(sizeof(*sd));
400
401 td->io_prep = fio_syncio_prep;
402 td->io_queue = fio_syncio_queue;
403 td->io_getevents = fio_syncio_getevents;
404 td->io_event = fio_syncio_event;
405 td->io_cancel = NULL;
406 td->io_cleanup = fio_syncio_cleanup;
407 td->io_sync = fio_io_sync;
408
409 sd->last_io_u = NULL;
410 td->io_data = sd;
411 return 0;
412}
413
414static int fio_mmapio_queue(struct thread_data *td, struct io_u *io_u)
415{
416 unsigned long long real_off = io_u->offset - td->file_offset;
417 struct syncio_data *sd = td->io_data;
418
419 if (io_u->ddir == DDIR_READ)
420 memcpy(io_u->buf, td->mmap + real_off, io_u->buflen);
421 else
422 memcpy(td->mmap + real_off, io_u->buf, io_u->buflen);
423
424 /*
425 * not really direct, but should drop the pages from the cache
426 */
427 if (td->odirect) {
428 if (msync(td->mmap + real_off, io_u->buflen, MS_SYNC) < 0)
429 io_u->error = errno;
430 if (madvise(td->mmap + real_off, io_u->buflen, MADV_DONTNEED) < 0)
431 io_u->error = errno;
432 }
433
434 if (!io_u->error)
435 sd->last_io_u = io_u;
436
437 return io_u->error;
438}
439
440static int fio_mmapio_sync(struct thread_data *td)
441{
442 return msync(td->mmap, td->file_size, MS_SYNC);
443}
444
445int fio_mmapio_init(struct thread_data *td)
446{
447 struct syncio_data *sd = malloc(sizeof(*sd));
448
449 td->io_prep = NULL;
450 td->io_queue = fio_mmapio_queue;
451 td->io_getevents = fio_syncio_getevents;
452 td->io_event = fio_syncio_event;
453 td->io_cancel = NULL;
454 td->io_cleanup = fio_syncio_cleanup;
455 td->io_sync = fio_mmapio_sync;
456
457 sd->last_io_u = NULL;
458 td->io_data = sd;
459 return 0;
460}
461
462#ifdef FIO_HAVE_SGIO
463
464struct sgio_data {
465 struct io_u *last_io_u;
466 unsigned char cdb[10];
467 unsigned int bs;
468};
469
470static inline void sgio_hdr_init(struct sgio_data *sd, struct sg_io_hdr *hdr,
471 struct io_u *io_u)
472{
473 memset(hdr, 0, sizeof(*hdr));
474 memset(sd->cdb, 0, sizeof(sd->cdb));
475
476 hdr->interface_id = 'S';
477 hdr->cmdp = sd->cdb;
478 hdr->cmd_len = sizeof(sd->cdb);
479
480 if (io_u) {
481 hdr->dxferp = io_u->buf;
482 hdr->dxfer_len = io_u->buflen;
483 }
484}
485
486static int fio_sgio_sync(struct thread_data *td)
487{
488 struct sgio_data *sd = td->io_data;
489 struct sg_io_hdr hdr;
490
491 sgio_hdr_init(sd, &hdr, NULL);
492 hdr.dxfer_direction = SG_DXFER_NONE;
493
494 hdr.cmdp[0] = 0x35;
495
496 return ioctl(td->fd, SG_IO, &hdr);
497}
498
499static int fio_sgio_prep(struct thread_data *td, struct io_u *io_u)
500{
501 struct sg_io_hdr *hdr = &io_u->hdr;
502 struct sgio_data *sd = td->io_data;
503 int nr_blocks, lba;
504
505 if (io_u->buflen & (sd->bs - 1)) {
506 fprintf(stderr, "read/write not sector aligned\n");
507 return EINVAL;
508 }
509
510 sgio_hdr_init(sd, hdr, io_u);
511
512 if (io_u->ddir == DDIR_READ) {
513 hdr->dxfer_direction = SG_DXFER_FROM_DEV;
514 hdr->cmdp[0] = 0x28;
515 } else {
516 hdr->dxfer_direction = SG_DXFER_TO_DEV;
517 hdr->cmdp[0] = 0x2a;
518 }
519
520 nr_blocks = io_u->buflen / sd->bs;
521 lba = io_u->offset / sd->bs;
522 hdr->cmdp[2] = (lba >> 24) & 0xff;
523 hdr->cmdp[3] = (lba >> 16) & 0xff;
524 hdr->cmdp[4] = (lba >> 8) & 0xff;
525 hdr->cmdp[5] = lba & 0xff;
526 hdr->cmdp[7] = (nr_blocks >> 8) & 0xff;
527 hdr->cmdp[8] = nr_blocks & 0xff;
528 return 0;
529}
530
531static int fio_sgio_queue(struct thread_data *td, struct io_u *io_u)
532{
533 struct sg_io_hdr *hdr = &io_u->hdr;
534 struct sgio_data *sd = td->io_data;
535 int ret;
536
537 ret = ioctl(td->fd, SG_IO, hdr);
538 if (ret < 0)
539 io_u->error = errno;
540 else if (hdr->status) {
541 io_u->resid = hdr->resid;
542 io_u->error = EIO;
543 }
544
545 if (!io_u->error)
546 sd->last_io_u = io_u;
547
548 return io_u->error;
549}
550
551static struct io_u *fio_sgio_event(struct thread_data *td, int event)
552{
553 struct sgio_data *sd = td->io_data;
554
555 assert(event == 0);
556
557 return sd->last_io_u;
558}
559
560int fio_sgio_init(struct thread_data *td)
561{
562 struct sgio_data *sd;
563 int bs;
564
565 if (td->filetype != FIO_TYPE_BD) {
566 fprintf(stderr, "ioengine sgio only works on block devices\n");
567 return 1;
568 }
569
570 if (ioctl(td->fd, BLKSSZGET, &bs) < 0) {
571 td_verror(td, errno);
572 return 1;
573 }
574
575 sd = malloc(sizeof(*sd));
576 sd->bs = bs;
577
578 td->io_prep = fio_sgio_prep;
579 td->io_queue = fio_sgio_queue;
580 td->io_getevents = fio_syncio_getevents;
581 td->io_event = fio_sgio_event;
582 td->io_cancel = NULL;
583 td->io_cleanup = fio_syncio_cleanup;
584 td->io_sync = fio_sgio_sync;
585
586 /*
587 * we want to do it, regardless of whether odirect is set or not
588 */
589 td->override_sync = 1;
590
591 sd->last_io_u = NULL;
592 td->io_data = sd;
593 return 0;
594}
595
596#else /* FIO_HAVE_SGIO */
597
598int fio_sgio_init(struct thread_data *td)
599{
600 return EINVAL;
601}
602
603#endif /* FIO_HAVE_SGIO */