[PATCH] examples/disk-zone-profile added
[fio.git] / fio-io.c
CommitLineData
ebac4655
JA
1/*
2 * The io parts of the fio tool, includes workers for sync and mmap'ed
3 * io, as well as both posix and linux libaio support.
4 *
5 * sync io is implemented on top of aio.
6 *
7 * This is not really specific to fio, if the get_io_u/put_io_u and
8 * structures was pulled into this as well it would be a perfectly
9 * generic io engine that could be used for other projects.
10 *
11 */
12#include <stdio.h>
13#include <stdlib.h>
14#include <unistd.h>
15#include <errno.h>
16#include <assert.h>
17#include <time.h>
18#include <sys/mman.h>
19#include "fio.h"
20#include "os.h"
21
22#ifdef FIO_HAVE_LIBAIO
23
24#define ev_to_iou(ev) (struct io_u *) ((unsigned long) (ev)->obj)
25
26static int fio_io_sync(struct thread_data *td)
27{
28 return fsync(td->fd);
29}
30
31static int fill_timespec(struct timespec *ts)
32{
33#ifdef _POSIX_TIMERS
34 if (!clock_gettime(CLOCK_MONOTONIC, ts))
35 return 0;
36
37 perror("clock_gettime");
38#endif
39 return 1;
40}
41
42static unsigned long long ts_utime_since_now(struct timespec *t)
43{
44 long long sec, nsec;
45 struct timespec now;
46
47 if (fill_timespec(&now))
48 return 0;
49
50 sec = now.tv_sec - t->tv_sec;
51 nsec = now.tv_nsec - t->tv_nsec;
52 if (sec > 0 && nsec < 0) {
53 sec--;
54 nsec += 1000000000;
55 }
56
57 sec *= 1000000;
58 nsec /= 1000;
59 return sec + nsec;
60}
61
62struct libaio_data {
63 io_context_t aio_ctx;
64 struct io_event *aio_events;
65};
66
67static int fio_libaio_io_prep(struct thread_data *td, struct io_u *io_u)
68{
69 if (io_u->ddir == DDIR_READ)
70 io_prep_pread(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
71 else
72 io_prep_pwrite(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
73
74 return 0;
75}
76
77static struct io_u *fio_libaio_event(struct thread_data *td, int event)
78{
79 struct libaio_data *ld = td->io_data;
80
81 return ev_to_iou(ld->aio_events + event);
82}
83
84static int fio_libaio_getevents(struct thread_data *td, int min, int max,
85 struct timespec *t)
86{
87 struct libaio_data *ld = td->io_data;
88 int r;
89
90 do {
91 r = io_getevents(ld->aio_ctx, min, max, ld->aio_events, t);
92 if (r == -EAGAIN) {
93 usleep(100);
94 continue;
95 } else if (r == -EINTR)
96 continue;
97 else
98 break;
99 } while (1);
100
101 return r;
102}
103
104static int fio_libaio_queue(struct thread_data *td, struct io_u *io_u)
105{
106 struct libaio_data *ld = td->io_data;
107 struct iocb *iocb = &io_u->iocb;
108 int ret;
109
110 do {
111 ret = io_submit(ld->aio_ctx, 1, &iocb);
112 if (ret == 1)
113 return 0;
114 else if (ret == -EAGAIN)
115 usleep(100);
116 else if (ret == -EINTR)
117 continue;
118 else
119 break;
120 } while (1);
121
122 return ret;
123
124}
125
126static int fio_libaio_cancel(struct thread_data *td, struct io_u *io_u)
127{
128 struct libaio_data *ld = td->io_data;
129
130 return io_cancel(ld->aio_ctx, &io_u->iocb, ld->aio_events);
131}
132
133static void fio_libaio_cleanup(struct thread_data *td)
134{
135 struct libaio_data *ld = td->io_data;
136
137 if (ld) {
138 io_destroy(ld->aio_ctx);
139 if (ld->aio_events)
140 free(ld->aio_events);
141
142 free(ld);
143 td->io_data = NULL;
144 }
145}
146
147int fio_libaio_init(struct thread_data *td)
148{
149 struct libaio_data *ld = malloc(sizeof(*ld));
150
151 memset(ld, 0, sizeof(*ld));
152 if (io_queue_init(td->iodepth, &ld->aio_ctx)) {
153 td_verror(td, errno);
154 return 1;
155 }
156
157 td->io_prep = fio_libaio_io_prep;
158 td->io_queue = fio_libaio_queue;
159 td->io_getevents = fio_libaio_getevents;
160 td->io_event = fio_libaio_event;
161 td->io_cancel = fio_libaio_cancel;
162 td->io_cleanup = fio_libaio_cleanup;
163 td->io_sync = fio_io_sync;
164
165 ld->aio_events = malloc(td->iodepth * sizeof(struct io_event));
166 td->io_data = ld;
167 return 0;
168}
169
170#else /* FIO_HAVE_LIBAIO */
171
172int fio_libaio_init(struct thread_data *td)
173{
174 return EINVAL;
175}
176
177#endif /* FIO_HAVE_LIBAIO */
178
179#ifdef FIO_HAVE_POSIXAIO
180
181struct posixaio_data {
182 struct io_u **aio_events;
183};
184
185static int fio_posixaio_cancel(struct thread_data *td, struct io_u *io_u)
186{
187 int r = aio_cancel(td->fd, &io_u->aiocb);
188
189 if (r == 1 || r == AIO_CANCELED)
190 return 0;
191
192 return 1;
193}
194
195static int fio_posixaio_prep(struct thread_data *td, struct io_u *io_u)
196{
197 struct aiocb *aiocb = &io_u->aiocb;
198
199 aiocb->aio_fildes = td->fd;
200 aiocb->aio_buf = io_u->buf;
201 aiocb->aio_nbytes = io_u->buflen;
202 aiocb->aio_offset = io_u->offset;
203
204 io_u->seen = 0;
205 return 0;
206}
207
208static int fio_posixaio_getevents(struct thread_data *td, int min, int max,
209 struct timespec *t)
210{
211 struct posixaio_data *pd = td->io_data;
212 struct list_head *entry;
213 struct timespec start;
214 int r, have_timeout = 0;
215
216 if (t && !fill_timespec(&start))
217 have_timeout = 1;
218
219 r = 0;
220restart:
221 list_for_each(entry, &td->io_u_busylist) {
222 struct io_u *io_u = list_entry(entry, struct io_u, list);
223 int err;
224
225 if (io_u->seen)
226 continue;
227
228 err = aio_error(&io_u->aiocb);
229 switch (err) {
230 default:
231 io_u->error = err;
232 case ECANCELED:
233 case 0:
234 pd->aio_events[r++] = io_u;
235 io_u->seen = 1;
236 break;
237 case EINPROGRESS:
238 break;
239 }
240
241 if (r >= max)
242 break;
243 }
244
245 if (r >= min)
246 return r;
247
248 if (have_timeout) {
249 unsigned long long usec;
250
251 usec = (t->tv_sec * 1000000) + (t->tv_nsec / 1000);
252 if (ts_utime_since_now(&start) > usec)
253 return r;
254 }
255
256 /*
257 * hrmpf, we need to wait for more. we should use aio_suspend, for
258 * now just sleep a little and recheck status of busy-and-not-seen
259 */
260 usleep(1000);
261 goto restart;
262}
263
264static struct io_u *fio_posixaio_event(struct thread_data *td, int event)
265{
266 struct posixaio_data *pd = td->io_data;
267
268 return pd->aio_events[event];
269}
270
271static int fio_posixaio_queue(struct thread_data *td, struct io_u *io_u)
272{
273 struct aiocb *aiocb = &io_u->aiocb;
274 int ret;
275
276 if (io_u->ddir == DDIR_READ)
277 ret = aio_read(aiocb);
278 else
279 ret = aio_write(aiocb);
280
281 if (ret)
282 io_u->error = errno;
283
284 return io_u->error;
285}
286
287static void fio_posixaio_cleanup(struct thread_data *td)
288{
289 struct posixaio_data *pd = td->io_data;
290
291 if (pd) {
292 free(pd->aio_events);
293 free(pd);
294 td->io_data = NULL;
295 }
296}
297
298int fio_posixaio_init(struct thread_data *td)
299{
300 struct posixaio_data *pd = malloc(sizeof(*pd));
301
302 pd->aio_events = malloc(td->iodepth * sizeof(struct io_u *));
303
304 td->io_prep = fio_posixaio_prep;
305 td->io_queue = fio_posixaio_queue;
306 td->io_getevents = fio_posixaio_getevents;
307 td->io_event = fio_posixaio_event;
308 td->io_cancel = fio_posixaio_cancel;
309 td->io_cleanup = fio_posixaio_cleanup;
310 td->io_sync = fio_io_sync;
311
312 td->io_data = pd;
313 return 0;
314}
315
316#else /* FIO_HAVE_POSIXAIO */
317
318int fio_posixaio_init(struct thread_data *td)
319{
320 return EINVAL;
321}
322
323#endif /* FIO_HAVE_POSIXAIO */
324
325struct syncio_data {
326 struct io_u *last_io_u;
327};
328
329static int fio_syncio_getevents(struct thread_data *td, int min, int max,
330 struct timespec *t)
331{
332 assert(max <= 1);
333
334 /*
335 * we can only have one finished io_u for sync io, since the depth
336 * is always 1
337 */
338 if (list_empty(&td->io_u_busylist))
339 return 0;
340
341 return 1;
342}
343
344static struct io_u *fio_syncio_event(struct thread_data *td, int event)
345{
346 struct syncio_data *sd = td->io_data;
347
348 assert(event == 0);
349
350 return sd->last_io_u;
351}
352
353static int fio_syncio_prep(struct thread_data *td, struct io_u *io_u)
354{
77cf8455
JA
355 if (lseek(td->fd, io_u->offset, SEEK_SET) == -1) {
356 td_verror(td, errno);
357 return 1;
ebac4655
JA
358 }
359
360 return 0;
361}
362
363static int fio_syncio_queue(struct thread_data *td, struct io_u *io_u)
364{
365 struct syncio_data *sd = td->io_data;
366 int ret;
367
368 if (io_u->ddir == DDIR_READ)
369 ret = read(td->fd, io_u->buf, io_u->buflen);
370 else
371 ret = write(td->fd, io_u->buf, io_u->buflen);
372
373 if ((unsigned int) ret != io_u->buflen) {
374 if (ret > 0) {
375 io_u->resid = io_u->buflen - ret;
376 io_u->error = ENODATA;
377 } else
378 io_u->error = errno;
379 }
380
381 if (!io_u->error)
382 sd->last_io_u = io_u;
383
384 return io_u->error;
385}
386
387static void fio_syncio_cleanup(struct thread_data *td)
388{
389 if (td->io_data) {
390 free(td->io_data);
391 td->io_data = NULL;
392 }
393}
394
395int fio_syncio_init(struct thread_data *td)
396{
397 struct syncio_data *sd = malloc(sizeof(*sd));
398
399 td->io_prep = fio_syncio_prep;
400 td->io_queue = fio_syncio_queue;
401 td->io_getevents = fio_syncio_getevents;
402 td->io_event = fio_syncio_event;
403 td->io_cancel = NULL;
404 td->io_cleanup = fio_syncio_cleanup;
405 td->io_sync = fio_io_sync;
406
407 sd->last_io_u = NULL;
408 td->io_data = sd;
409 return 0;
410}
411
412static int fio_mmapio_queue(struct thread_data *td, struct io_u *io_u)
413{
414 unsigned long long real_off = io_u->offset - td->file_offset;
415 struct syncio_data *sd = td->io_data;
416
417 if (io_u->ddir == DDIR_READ)
418 memcpy(io_u->buf, td->mmap + real_off, io_u->buflen);
419 else
420 memcpy(td->mmap + real_off, io_u->buf, io_u->buflen);
421
422 /*
423 * not really direct, but should drop the pages from the cache
424 */
425 if (td->odirect) {
426 if (msync(td->mmap + real_off, io_u->buflen, MS_SYNC) < 0)
427 io_u->error = errno;
428 if (madvise(td->mmap + real_off, io_u->buflen, MADV_DONTNEED) < 0)
429 io_u->error = errno;
430 }
431
432 if (!io_u->error)
433 sd->last_io_u = io_u;
434
435 return io_u->error;
436}
437
438static int fio_mmapio_sync(struct thread_data *td)
439{
440 return msync(td->mmap, td->file_size, MS_SYNC);
441}
442
443int fio_mmapio_init(struct thread_data *td)
444{
445 struct syncio_data *sd = malloc(sizeof(*sd));
446
447 td->io_prep = NULL;
448 td->io_queue = fio_mmapio_queue;
449 td->io_getevents = fio_syncio_getevents;
450 td->io_event = fio_syncio_event;
451 td->io_cancel = NULL;
452 td->io_cleanup = fio_syncio_cleanup;
453 td->io_sync = fio_mmapio_sync;
454
455 sd->last_io_u = NULL;
456 td->io_data = sd;
457 return 0;
458}
459
460#ifdef FIO_HAVE_SGIO
461
462struct sgio_data {
463 struct io_u *last_io_u;
464 unsigned char cdb[10];
465 unsigned int bs;
466};
467
468static inline void sgio_hdr_init(struct sgio_data *sd, struct sg_io_hdr *hdr,
469 struct io_u *io_u)
470{
471 memset(hdr, 0, sizeof(*hdr));
472 memset(sd->cdb, 0, sizeof(sd->cdb));
473
474 hdr->interface_id = 'S';
475 hdr->cmdp = sd->cdb;
476 hdr->cmd_len = sizeof(sd->cdb);
477
478 if (io_u) {
479 hdr->dxferp = io_u->buf;
480 hdr->dxfer_len = io_u->buflen;
481 }
482}
483
484static int fio_sgio_sync(struct thread_data *td)
485{
486 struct sgio_data *sd = td->io_data;
487 struct sg_io_hdr hdr;
488
489 sgio_hdr_init(sd, &hdr, NULL);
490 hdr.dxfer_direction = SG_DXFER_NONE;
491
492 hdr.cmdp[0] = 0x35;
493
494 return ioctl(td->fd, SG_IO, &hdr);
495}
496
497static int fio_sgio_prep(struct thread_data *td, struct io_u *io_u)
498{
499 struct sg_io_hdr *hdr = &io_u->hdr;
500 struct sgio_data *sd = td->io_data;
501 int nr_blocks, lba;
502
503 if (io_u->buflen & (sd->bs - 1)) {
504 fprintf(stderr, "read/write not sector aligned\n");
505 return EINVAL;
506 }
507
508 sgio_hdr_init(sd, hdr, io_u);
509
510 if (io_u->ddir == DDIR_READ) {
511 hdr->dxfer_direction = SG_DXFER_FROM_DEV;
512 hdr->cmdp[0] = 0x28;
513 } else {
514 hdr->dxfer_direction = SG_DXFER_TO_DEV;
515 hdr->cmdp[0] = 0x2a;
516 }
517
518 nr_blocks = io_u->buflen / sd->bs;
519 lba = io_u->offset / sd->bs;
520 hdr->cmdp[2] = (lba >> 24) & 0xff;
521 hdr->cmdp[3] = (lba >> 16) & 0xff;
522 hdr->cmdp[4] = (lba >> 8) & 0xff;
523 hdr->cmdp[5] = lba & 0xff;
524 hdr->cmdp[7] = (nr_blocks >> 8) & 0xff;
525 hdr->cmdp[8] = nr_blocks & 0xff;
526 return 0;
527}
528
529static int fio_sgio_queue(struct thread_data *td, struct io_u *io_u)
530{
531 struct sg_io_hdr *hdr = &io_u->hdr;
532 struct sgio_data *sd = td->io_data;
533 int ret;
534
535 ret = ioctl(td->fd, SG_IO, hdr);
536 if (ret < 0)
537 io_u->error = errno;
538 else if (hdr->status) {
539 io_u->resid = hdr->resid;
540 io_u->error = EIO;
541 }
542
543 if (!io_u->error)
544 sd->last_io_u = io_u;
545
546 return io_u->error;
547}
548
549static struct io_u *fio_sgio_event(struct thread_data *td, int event)
550{
551 struct sgio_data *sd = td->io_data;
552
553 assert(event == 0);
554
555 return sd->last_io_u;
556}
557
558int fio_sgio_init(struct thread_data *td)
559{
560 struct sgio_data *sd;
561 int bs;
562
563 if (td->filetype != FIO_TYPE_BD) {
564 fprintf(stderr, "ioengine sgio only works on block devices\n");
565 return 1;
566 }
567
568 if (ioctl(td->fd, BLKSSZGET, &bs) < 0) {
569 td_verror(td, errno);
570 return 1;
571 }
572
573 sd = malloc(sizeof(*sd));
574 sd->bs = bs;
575
576 td->io_prep = fio_sgio_prep;
577 td->io_queue = fio_sgio_queue;
578 td->io_getevents = fio_syncio_getevents;
579 td->io_event = fio_sgio_event;
580 td->io_cancel = NULL;
581 td->io_cleanup = fio_syncio_cleanup;
582 td->io_sync = fio_sgio_sync;
583
584 /*
585 * we want to do it, regardless of whether odirect is set or not
586 */
587 td->override_sync = 1;
588
589 sd->last_io_u = NULL;
590 td->io_data = sd;
591 return 0;
592}
593
594#else /* FIO_HAVE_SGIO */
595
596int fio_sgio_init(struct thread_data *td)
597{
598 return EINVAL;
599}
600
601#endif /* FIO_HAVE_SGIO */