[PATCH] Add support for giving multiple job files
[fio.git] / ioengines.c
CommitLineData
ebac4655
JA
1/*
2 * The io parts of the fio tool, includes workers for sync and mmap'ed
3 * io, as well as both posix and linux libaio support.
4 *
5 * sync io is implemented on top of aio.
6 *
7 * This is not really specific to fio, if the get_io_u/put_io_u and
8 * structures was pulled into this as well it would be a perfectly
9 * generic io engine that could be used for other projects.
10 *
11 */
12#include <stdio.h>
13#include <stdlib.h>
14#include <unistd.h>
15#include <errno.h>
16#include <assert.h>
17#include <time.h>
5c4e1dbc 18#include <string.h>
ebac4655 19#include <sys/mman.h>
b1ff3403 20#include <sys/poll.h>
ebac4655
JA
21#include "fio.h"
22#include "os.h"
23
ebac4655
JA
24static int fill_timespec(struct timespec *ts)
25{
26#ifdef _POSIX_TIMERS
27 if (!clock_gettime(CLOCK_MONOTONIC, ts))
28 return 0;
29
30 perror("clock_gettime");
31#endif
32 return 1;
33}
34
35static unsigned long long ts_utime_since_now(struct timespec *t)
36{
37 long long sec, nsec;
38 struct timespec now;
39
40 if (fill_timespec(&now))
41 return 0;
42
43 sec = now.tv_sec - t->tv_sec;
44 nsec = now.tv_nsec - t->tv_nsec;
45 if (sec > 0 && nsec < 0) {
46 sec--;
47 nsec += 1000000000;
48 }
49
50 sec *= 1000000;
51 nsec /= 1000;
52 return sec + nsec;
53}
54
5c4e1dbc
JA
55static int fio_io_sync(struct thread_data *td)
56{
57 return fsync(td->fd);
58}
59
60#ifdef FIO_HAVE_LIBAIO
61
62#define ev_to_iou(ev) (struct io_u *) ((unsigned long) (ev)->obj)
63
ebac4655
JA
64struct libaio_data {
65 io_context_t aio_ctx;
66 struct io_event *aio_events;
67};
68
69static int fio_libaio_io_prep(struct thread_data *td, struct io_u *io_u)
70{
71 if (io_u->ddir == DDIR_READ)
72 io_prep_pread(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
73 else
74 io_prep_pwrite(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
75
76 return 0;
77}
78
79static struct io_u *fio_libaio_event(struct thread_data *td, int event)
80{
81 struct libaio_data *ld = td->io_data;
82
83 return ev_to_iou(ld->aio_events + event);
84}
85
86static int fio_libaio_getevents(struct thread_data *td, int min, int max,
87 struct timespec *t)
88{
89 struct libaio_data *ld = td->io_data;
90 int r;
91
92 do {
93 r = io_getevents(ld->aio_ctx, min, max, ld->aio_events, t);
94 if (r == -EAGAIN) {
95 usleep(100);
96 continue;
97 } else if (r == -EINTR)
98 continue;
99 else
100 break;
101 } while (1);
102
103 return r;
104}
105
106static int fio_libaio_queue(struct thread_data *td, struct io_u *io_u)
107{
108 struct libaio_data *ld = td->io_data;
109 struct iocb *iocb = &io_u->iocb;
110 int ret;
111
112 do {
113 ret = io_submit(ld->aio_ctx, 1, &iocb);
114 if (ret == 1)
115 return 0;
116 else if (ret == -EAGAIN)
117 usleep(100);
118 else if (ret == -EINTR)
119 continue;
120 else
121 break;
122 } while (1);
123
124 return ret;
125
126}
127
128static int fio_libaio_cancel(struct thread_data *td, struct io_u *io_u)
129{
130 struct libaio_data *ld = td->io_data;
131
132 return io_cancel(ld->aio_ctx, &io_u->iocb, ld->aio_events);
133}
134
135static void fio_libaio_cleanup(struct thread_data *td)
136{
137 struct libaio_data *ld = td->io_data;
138
139 if (ld) {
140 io_destroy(ld->aio_ctx);
141 if (ld->aio_events)
142 free(ld->aio_events);
143
144 free(ld);
145 td->io_data = NULL;
146 }
147}
148
149int fio_libaio_init(struct thread_data *td)
150{
151 struct libaio_data *ld = malloc(sizeof(*ld));
152
153 memset(ld, 0, sizeof(*ld));
154 if (io_queue_init(td->iodepth, &ld->aio_ctx)) {
155 td_verror(td, errno);
156 return 1;
157 }
158
159 td->io_prep = fio_libaio_io_prep;
160 td->io_queue = fio_libaio_queue;
161 td->io_getevents = fio_libaio_getevents;
162 td->io_event = fio_libaio_event;
163 td->io_cancel = fio_libaio_cancel;
164 td->io_cleanup = fio_libaio_cleanup;
165 td->io_sync = fio_io_sync;
166
167 ld->aio_events = malloc(td->iodepth * sizeof(struct io_event));
168 td->io_data = ld;
169 return 0;
170}
171
172#else /* FIO_HAVE_LIBAIO */
173
174int fio_libaio_init(struct thread_data *td)
175{
176 return EINVAL;
177}
178
179#endif /* FIO_HAVE_LIBAIO */
180
181#ifdef FIO_HAVE_POSIXAIO
182
183struct posixaio_data {
184 struct io_u **aio_events;
185};
186
187static int fio_posixaio_cancel(struct thread_data *td, struct io_u *io_u)
188{
189 int r = aio_cancel(td->fd, &io_u->aiocb);
190
191 if (r == 1 || r == AIO_CANCELED)
192 return 0;
193
194 return 1;
195}
196
197static int fio_posixaio_prep(struct thread_data *td, struct io_u *io_u)
198{
199 struct aiocb *aiocb = &io_u->aiocb;
200
201 aiocb->aio_fildes = td->fd;
202 aiocb->aio_buf = io_u->buf;
203 aiocb->aio_nbytes = io_u->buflen;
204 aiocb->aio_offset = io_u->offset;
205
206 io_u->seen = 0;
207 return 0;
208}
209
210static int fio_posixaio_getevents(struct thread_data *td, int min, int max,
211 struct timespec *t)
212{
213 struct posixaio_data *pd = td->io_data;
214 struct list_head *entry;
215 struct timespec start;
216 int r, have_timeout = 0;
217
218 if (t && !fill_timespec(&start))
219 have_timeout = 1;
220
221 r = 0;
222restart:
223 list_for_each(entry, &td->io_u_busylist) {
224 struct io_u *io_u = list_entry(entry, struct io_u, list);
225 int err;
226
227 if (io_u->seen)
228 continue;
229
230 err = aio_error(&io_u->aiocb);
231 switch (err) {
232 default:
233 io_u->error = err;
234 case ECANCELED:
235 case 0:
236 pd->aio_events[r++] = io_u;
237 io_u->seen = 1;
238 break;
239 case EINPROGRESS:
240 break;
241 }
242
243 if (r >= max)
244 break;
245 }
246
247 if (r >= min)
248 return r;
249
250 if (have_timeout) {
251 unsigned long long usec;
252
253 usec = (t->tv_sec * 1000000) + (t->tv_nsec / 1000);
254 if (ts_utime_since_now(&start) > usec)
255 return r;
256 }
257
258 /*
259 * hrmpf, we need to wait for more. we should use aio_suspend, for
260 * now just sleep a little and recheck status of busy-and-not-seen
261 */
262 usleep(1000);
263 goto restart;
264}
265
266static struct io_u *fio_posixaio_event(struct thread_data *td, int event)
267{
268 struct posixaio_data *pd = td->io_data;
269
270 return pd->aio_events[event];
271}
272
273static int fio_posixaio_queue(struct thread_data *td, struct io_u *io_u)
274{
275 struct aiocb *aiocb = &io_u->aiocb;
276 int ret;
277
278 if (io_u->ddir == DDIR_READ)
279 ret = aio_read(aiocb);
280 else
281 ret = aio_write(aiocb);
282
283 if (ret)
284 io_u->error = errno;
285
286 return io_u->error;
287}
288
289static void fio_posixaio_cleanup(struct thread_data *td)
290{
291 struct posixaio_data *pd = td->io_data;
292
293 if (pd) {
294 free(pd->aio_events);
295 free(pd);
296 td->io_data = NULL;
297 }
298}
299
300int fio_posixaio_init(struct thread_data *td)
301{
302 struct posixaio_data *pd = malloc(sizeof(*pd));
303
304 pd->aio_events = malloc(td->iodepth * sizeof(struct io_u *));
305
306 td->io_prep = fio_posixaio_prep;
307 td->io_queue = fio_posixaio_queue;
308 td->io_getevents = fio_posixaio_getevents;
309 td->io_event = fio_posixaio_event;
310 td->io_cancel = fio_posixaio_cancel;
311 td->io_cleanup = fio_posixaio_cleanup;
312 td->io_sync = fio_io_sync;
313
314 td->io_data = pd;
315 return 0;
316}
317
318#else /* FIO_HAVE_POSIXAIO */
319
320int fio_posixaio_init(struct thread_data *td)
321{
322 return EINVAL;
323}
324
325#endif /* FIO_HAVE_POSIXAIO */
326
327struct syncio_data {
328 struct io_u *last_io_u;
329};
330
331static int fio_syncio_getevents(struct thread_data *td, int min, int max,
332 struct timespec *t)
333{
334 assert(max <= 1);
335
336 /*
337 * we can only have one finished io_u for sync io, since the depth
338 * is always 1
339 */
340 if (list_empty(&td->io_u_busylist))
341 return 0;
342
343 return 1;
344}
345
346static struct io_u *fio_syncio_event(struct thread_data *td, int event)
347{
348 struct syncio_data *sd = td->io_data;
349
350 assert(event == 0);
351
352 return sd->last_io_u;
353}
354
355static int fio_syncio_prep(struct thread_data *td, struct io_u *io_u)
356{
77cf8455
JA
357 if (lseek(td->fd, io_u->offset, SEEK_SET) == -1) {
358 td_verror(td, errno);
359 return 1;
ebac4655
JA
360 }
361
362 return 0;
363}
364
365static int fio_syncio_queue(struct thread_data *td, struct io_u *io_u)
366{
367 struct syncio_data *sd = td->io_data;
368 int ret;
369
370 if (io_u->ddir == DDIR_READ)
371 ret = read(td->fd, io_u->buf, io_u->buflen);
372 else
373 ret = write(td->fd, io_u->buf, io_u->buflen);
374
375 if ((unsigned int) ret != io_u->buflen) {
376 if (ret > 0) {
377 io_u->resid = io_u->buflen - ret;
5c4e1dbc 378 io_u->error = EIO;
ebac4655
JA
379 } else
380 io_u->error = errno;
381 }
382
383 if (!io_u->error)
384 sd->last_io_u = io_u;
385
386 return io_u->error;
387}
388
389static void fio_syncio_cleanup(struct thread_data *td)
390{
391 if (td->io_data) {
392 free(td->io_data);
393 td->io_data = NULL;
394 }
395}
396
397int fio_syncio_init(struct thread_data *td)
398{
399 struct syncio_data *sd = malloc(sizeof(*sd));
400
401 td->io_prep = fio_syncio_prep;
402 td->io_queue = fio_syncio_queue;
403 td->io_getevents = fio_syncio_getevents;
404 td->io_event = fio_syncio_event;
405 td->io_cancel = NULL;
406 td->io_cleanup = fio_syncio_cleanup;
407 td->io_sync = fio_io_sync;
408
409 sd->last_io_u = NULL;
410 td->io_data = sd;
411 return 0;
412}
413
414static int fio_mmapio_queue(struct thread_data *td, struct io_u *io_u)
415{
416 unsigned long long real_off = io_u->offset - td->file_offset;
417 struct syncio_data *sd = td->io_data;
418
419 if (io_u->ddir == DDIR_READ)
420 memcpy(io_u->buf, td->mmap + real_off, io_u->buflen);
421 else
422 memcpy(td->mmap + real_off, io_u->buf, io_u->buflen);
423
424 /*
425 * not really direct, but should drop the pages from the cache
426 */
427 if (td->odirect) {
428 if (msync(td->mmap + real_off, io_u->buflen, MS_SYNC) < 0)
429 io_u->error = errno;
430 if (madvise(td->mmap + real_off, io_u->buflen, MADV_DONTNEED) < 0)
431 io_u->error = errno;
432 }
433
434 if (!io_u->error)
435 sd->last_io_u = io_u;
436
437 return io_u->error;
438}
439
440static int fio_mmapio_sync(struct thread_data *td)
441{
442 return msync(td->mmap, td->file_size, MS_SYNC);
443}
444
445int fio_mmapio_init(struct thread_data *td)
446{
447 struct syncio_data *sd = malloc(sizeof(*sd));
448
449 td->io_prep = NULL;
450 td->io_queue = fio_mmapio_queue;
451 td->io_getevents = fio_syncio_getevents;
452 td->io_event = fio_syncio_event;
453 td->io_cancel = NULL;
454 td->io_cleanup = fio_syncio_cleanup;
455 td->io_sync = fio_mmapio_sync;
456
457 sd->last_io_u = NULL;
458 td->io_data = sd;
459 return 0;
460}
461
462#ifdef FIO_HAVE_SGIO
463
b1ff3403 464struct sgio_cmd {
5f2b7b5e 465 unsigned char cdb[10];
b1ff3403
JA
466 int nr;
467};
468
ebac4655 469struct sgio_data {
b1ff3403
JA
470 struct sgio_cmd *cmds;
471 struct io_u **events;
ebac4655
JA
472 unsigned int bs;
473};
474
b1ff3403
JA
475static void sgio_hdr_init(struct sgio_data *sd, struct sg_io_hdr *hdr,
476 struct io_u *io_u, int fs)
ebac4655 477{
b1ff3403
JA
478 struct sgio_cmd *sc = &sd->cmds[io_u->index];
479
ebac4655 480 memset(hdr, 0, sizeof(*hdr));
b1ff3403 481 memset(sc->cdb, 0, sizeof(sc->cdb));
ebac4655
JA
482
483 hdr->interface_id = 'S';
b1ff3403
JA
484 hdr->cmdp = sc->cdb;
485 hdr->cmd_len = sizeof(sc->cdb);
486 hdr->pack_id = io_u->index;
487 hdr->usr_ptr = io_u;
ebac4655 488
b1ff3403 489 if (fs) {
ebac4655
JA
490 hdr->dxferp = io_u->buf;
491 hdr->dxfer_len = io_u->buflen;
492 }
493}
494
b1ff3403
JA
495static int fio_sgio_getevents(struct thread_data *td, int min, int max,
496 struct timespec *t)
497{
498 struct sgio_data *sd = td->io_data;
499 struct pollfd pfd = { .fd = td->fd, .events = POLLIN };
500 void *buf = malloc(max * sizeof(struct sg_io_hdr));
5f2b7b5e 501 int left = max, ret, events, i, r = 0, fl = 0;
b1ff3403
JA
502
503 /*
504 * don't block for !events
505 */
506 if (!min) {
507 fl = fcntl(td->fd, F_GETFL);
508 fcntl(td->fd, F_SETFL, fl | O_NONBLOCK);
509 }
510
511 while (left) {
512 do {
513 if (!min)
514 break;
515 poll(&pfd, 1, -1);
516 if (pfd.revents & POLLIN)
517 break;
518 } while (1);
519
520 ret = read(td->fd, buf, left * sizeof(struct sg_io_hdr));
521 if (ret < 0) {
522 if (errno == EAGAIN)
523 break;
524 td_verror(td, errno);
525 r = -1;
526 break;
527 } else if (!ret)
528 break;
529
530 events = ret / sizeof(struct sg_io_hdr);
531 left -= events;
532 r += events;
533
534 for (i = 0; i < events; i++) {
535 struct sg_io_hdr *hdr = (struct sg_io_hdr *) buf + i;
536
537 sd->events[i] = hdr->usr_ptr;
538 }
539 }
540
541 if (!min)
542 fcntl(td->fd, F_SETFL, fl);
543
544 free(buf);
545 return r;
546}
547
f3a4ef5e 548static int fio_sgio_ioctl_doio(struct thread_data *td, struct io_u *io_u)
0af7b542 549{
b1ff3403
JA
550 struct sgio_data *sd = td->io_data;
551 struct sg_io_hdr *hdr = &io_u->hdr;
0af7b542 552
f3a4ef5e
JA
553 sd->events[0] = io_u;
554
555 return ioctl(td->fd, SG_IO, hdr);
556}
557
558static int fio_sgio_rw_doio(struct thread_data *td, struct io_u *io_u, int sync)
559{
560 struct sg_io_hdr *hdr = &io_u->hdr;
561 int ret;
0af7b542
JA
562
563 ret = write(td->fd, hdr, sizeof(*hdr));
564 if (ret < 0)
565 return errno;
566
b1ff3403
JA
567 if (sync) {
568 ret = read(td->fd, hdr, sizeof(*hdr));
569 if (ret < 0)
570 return errno;
571 }
0af7b542
JA
572
573 return 0;
574}
575
f3a4ef5e
JA
576static int fio_sgio_doio(struct thread_data *td, struct io_u *io_u, int sync)
577{
578 if (td->filetype == FIO_TYPE_BD)
579 return fio_sgio_ioctl_doio(td, io_u);
580
581 return fio_sgio_rw_doio(td, io_u, sync);
582}
583
ebac4655
JA
584static int fio_sgio_sync(struct thread_data *td)
585{
586 struct sgio_data *sd = td->io_data;
b1ff3403
JA
587 struct sg_io_hdr *hdr;
588 struct io_u *io_u;
589 int ret;
ebac4655 590
b1ff3403
JA
591 io_u = __get_io_u(td);
592 if (!io_u)
593 return ENOMEM;
ebac4655 594
b1ff3403
JA
595 hdr = &io_u->hdr;
596 sgio_hdr_init(sd, hdr, io_u, 0);
597 hdr->dxfer_direction = SG_DXFER_NONE;
ebac4655 598
b1ff3403
JA
599 hdr->cmdp[0] = 0x35;
600
601 ret = fio_sgio_doio(td, io_u, 1);
602 put_io_u(td, io_u);
603 return ret;
ebac4655
JA
604}
605
606static int fio_sgio_prep(struct thread_data *td, struct io_u *io_u)
607{
608 struct sg_io_hdr *hdr = &io_u->hdr;
609 struct sgio_data *sd = td->io_data;
610 int nr_blocks, lba;
611
612 if (io_u->buflen & (sd->bs - 1)) {
3b70d7e5 613 log_err("read/write not sector aligned\n");
ebac4655
JA
614 return EINVAL;
615 }
616
b1ff3403 617 sgio_hdr_init(sd, hdr, io_u, 1);
ebac4655
JA
618
619 if (io_u->ddir == DDIR_READ) {
620 hdr->dxfer_direction = SG_DXFER_FROM_DEV;
621 hdr->cmdp[0] = 0x28;
622 } else {
623 hdr->dxfer_direction = SG_DXFER_TO_DEV;
624 hdr->cmdp[0] = 0x2a;
625 }
626
627 nr_blocks = io_u->buflen / sd->bs;
628 lba = io_u->offset / sd->bs;
629 hdr->cmdp[2] = (lba >> 24) & 0xff;
630 hdr->cmdp[3] = (lba >> 16) & 0xff;
631 hdr->cmdp[4] = (lba >> 8) & 0xff;
632 hdr->cmdp[5] = lba & 0xff;
633 hdr->cmdp[7] = (nr_blocks >> 8) & 0xff;
634 hdr->cmdp[8] = nr_blocks & 0xff;
635 return 0;
636}
637
638static int fio_sgio_queue(struct thread_data *td, struct io_u *io_u)
639{
640 struct sg_io_hdr *hdr = &io_u->hdr;
ebac4655
JA
641 int ret;
642
b1ff3403 643 ret = fio_sgio_doio(td, io_u, 0);
0af7b542 644
ebac4655
JA
645 if (ret < 0)
646 io_u->error = errno;
647 else if (hdr->status) {
648 io_u->resid = hdr->resid;
649 io_u->error = EIO;
650 }
651
ebac4655
JA
652 return io_u->error;
653}
654
655static struct io_u *fio_sgio_event(struct thread_data *td, int event)
656{
657 struct sgio_data *sd = td->io_data;
658
b1ff3403 659 return sd->events[event];
ebac4655
JA
660}
661
0af7b542
JA
662static int fio_sgio_get_bs(struct thread_data *td, unsigned int *bs)
663{
664 struct sgio_data *sd = td->io_data;
b1ff3403
JA
665 struct io_u *io_u;
666 struct sg_io_hdr *hdr;
0af7b542
JA
667 unsigned char buf[8];
668 int ret;
669
b1ff3403
JA
670 io_u = __get_io_u(td);
671 assert(io_u);
672
673 hdr = &io_u->hdr;
674 sgio_hdr_init(sd, hdr, io_u, 0);
0af7b542
JA
675 memset(buf, 0, sizeof(buf));
676
b1ff3403
JA
677 hdr->cmdp[0] = 0x25;
678 hdr->dxfer_direction = SG_DXFER_FROM_DEV;
679 hdr->dxferp = buf;
680 hdr->dxfer_len = sizeof(buf);
0af7b542 681
b1ff3403
JA
682 ret = fio_sgio_doio(td, io_u, 1);
683 if (ret) {
684 put_io_u(td, io_u);
0af7b542 685 return ret;
b1ff3403 686 }
0af7b542
JA
687
688 *bs = (buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | buf[7];
b1ff3403 689 put_io_u(td, io_u);
0af7b542
JA
690 return 0;
691}
692
ebac4655
JA
693int fio_sgio_init(struct thread_data *td)
694{
695 struct sgio_data *sd;
0af7b542
JA
696 unsigned int bs;
697 int ret;
ebac4655 698
0af7b542 699 sd = malloc(sizeof(*sd));
b1ff3403
JA
700 sd->cmds = malloc(td->iodepth * sizeof(struct sgio_cmd));
701 sd->events = malloc(td->iodepth * sizeof(struct io_u *));
0af7b542 702 td->io_data = sd;
ebac4655 703
0af7b542
JA
704 if (td->filetype == FIO_TYPE_BD) {
705 if (ioctl(td->fd, BLKSSZGET, &bs) < 0) {
706 td_verror(td, errno);
707 return 1;
708 }
709 } else if (td->filetype == FIO_TYPE_CHAR) {
710 int version;
711
712 if (ioctl(td->fd, SG_GET_VERSION_NUM, &version) < 0) {
713 td_verror(td, errno);
714 return 1;
715 }
716
717 ret = fio_sgio_get_bs(td, &bs);
718 if (ret)
719 return ret;
720 } else {
3b70d7e5 721 log_err("ioengine sgio only works on block devices\n");
ebac4655
JA
722 return 1;
723 }
724
ebac4655
JA
725 sd->bs = bs;
726
727 td->io_prep = fio_sgio_prep;
728 td->io_queue = fio_sgio_queue;
b1ff3403
JA
729
730 if (td->filetype == FIO_TYPE_BD)
731 td->io_getevents = fio_syncio_getevents;
732 else
733 td->io_getevents = fio_sgio_getevents;
734
ebac4655
JA
735 td->io_event = fio_sgio_event;
736 td->io_cancel = NULL;
737 td->io_cleanup = fio_syncio_cleanup;
738 td->io_sync = fio_sgio_sync;
739
740 /*
741 * we want to do it, regardless of whether odirect is set or not
742 */
743 td->override_sync = 1;
ebac4655
JA
744 return 0;
745}
746
747#else /* FIO_HAVE_SGIO */
748
749int fio_sgio_init(struct thread_data *td)
750{
751 return EINVAL;
752}
753
754#endif /* FIO_HAVE_SGIO */
8756e4d4
JA
755
756#ifdef FIO_HAVE_SPLICE
757struct spliceio_data {
758 struct io_u *last_io_u;
759 int pipe[2];
760};
761
762static struct io_u *fio_spliceio_event(struct thread_data *td, int event)
763{
51852280 764 struct spliceio_data *sd = td->io_data;
8756e4d4
JA
765
766 assert(event == 0);
767
768 return sd->last_io_u;
769}
770
771/*
772 * For splice reading, we unfortunately cannot (yet) vmsplice the other way.
773 * So just splice the data from the file into the pipe, and use regular
774 * read to fill the buffer. Doesn't make a lot of sense, but...
775 */
776static int fio_splice_read(struct thread_data *td, struct io_u *io_u)
777{
778 struct spliceio_data *sd = td->io_data;
779 int ret, ret2, buflen;
3feedc60 780 off_t offset;
8756e4d4
JA
781 void *p;
782
3feedc60 783 offset = io_u->offset;
8756e4d4
JA
784 buflen = io_u->buflen;
785 p = io_u->buf;
51852280 786 while (buflen) {
3feedc60
JA
787 int this_len = buflen;
788
789 if (this_len > SPLICE_DEF_SIZE)
790 this_len = SPLICE_DEF_SIZE;
791
792 ret = splice(td->fd, &offset, sd->pipe[1], NULL, this_len, SPLICE_F_MORE);
793 if (ret < 0) {
794 if (errno == ENODATA || errno == EAGAIN)
795 continue;
8756e4d4 796
8756e4d4 797 return errno;
3feedc60 798 }
8756e4d4
JA
799
800 buflen -= ret;
801
802 while (ret) {
803 ret2 = read(sd->pipe[0], p, ret);
804 if (ret2 < 0)
805 return errno;
806
807 ret -= ret2;
808 p += ret2;
809 }
51852280 810 }
8756e4d4
JA
811
812 return io_u->buflen;
813}
814
815/*
816 * For splice writing, we can vmsplice our data buffer directly into a
817 * pipe and then splice that to a file.
818 */
819static int fio_splice_write(struct thread_data *td, struct io_u *io_u)
820{
821 struct spliceio_data *sd = td->io_data;
822 struct iovec iov[1] = {
823 {
824 .iov_base = io_u->buf,
825 .iov_len = io_u->buflen,
826 }
827 };
828 struct pollfd pfd = { .fd = sd->pipe[1], .events = POLLOUT, };
3feedc60 829 off_t off = io_u->offset;
8756e4d4
JA
830 int ret, ret2;
831
832 while (iov[0].iov_len) {
833 if (poll(&pfd, 1, -1) < 0)
834 return errno;
835
836 ret = vmsplice(sd->pipe[1], iov, 1, SPLICE_F_NONBLOCK);
837 if (ret < 0)
838 return errno;
839
840 iov[0].iov_len -= ret;
841 iov[0].iov_base += ret;
842
843 while (ret) {
8756e4d4
JA
844 ret2 = splice(sd->pipe[0], NULL, td->fd, &off, ret, 0);
845 if (ret2 < 0)
846 return errno;
847
848 ret -= ret2;
849 }
850 }
851
852 return io_u->buflen;
853}
854
855static int fio_spliceio_queue(struct thread_data *td, struct io_u *io_u)
856{
857 struct spliceio_data *sd = td->io_data;
858 int ret;
859
860 if (io_u->ddir == DDIR_READ)
861 ret = fio_splice_read(td, io_u);
862 else
863 ret = fio_splice_write(td, io_u);
864
865 if ((unsigned int) ret != io_u->buflen) {
866 if (ret > 0) {
867 io_u->resid = io_u->buflen - ret;
868 io_u->error = ENODATA;
869 } else
870 io_u->error = errno;
871 }
872
873 if (!io_u->error)
874 sd->last_io_u = io_u;
875
876 return io_u->error;
877}
878
879static void fio_spliceio_cleanup(struct thread_data *td)
880{
881 struct spliceio_data *sd = td->io_data;
882
883 if (sd) {
884 close(sd->pipe[0]);
885 close(sd->pipe[1]);
886 free(sd);
887 td->io_data = NULL;
888 }
889}
890
891int fio_spliceio_init(struct thread_data *td)
892{
893 struct spliceio_data *sd = malloc(sizeof(*sd));
894
895 td->io_queue = fio_spliceio_queue;
896 td->io_getevents = fio_syncio_getevents;
897 td->io_event = fio_spliceio_event;
898 td->io_cancel = NULL;
899 td->io_cleanup = fio_spliceio_cleanup;
900 td->io_sync = fio_io_sync;
901
902 sd->last_io_u = NULL;
903 if (pipe(sd->pipe) < 0) {
904 td_verror(td, errno);
905 free(sd);
906 return 1;
907 }
908
909 td->io_data = sd;
910 return 0;
911}
912
913#else /* FIO_HAVE_SPLICE */
914
915int fio_spliceio_init(struct thread_data *td)
916{
917 return EINVAL;
918}
919
920#endif /* FIO_HAVE_SPLICE */