Fio 1.17.2
[fio.git] / io_u.c
CommitLineData
10ba535a
JA
1#include <unistd.h>
2#include <fcntl.h>
3#include <string.h>
4#include <signal.h>
5#include <time.h>
0c6e7517 6#include <assert.h>
10ba535a
JA
7
8#include "fio.h"
10ba535a 9
5945b9b4
JA
10/*
11 * Change this define to play with the timeout handling
12 */
13#undef FIO_USE_TIMEOUT
14
97601024
JA
15struct io_completion_data {
16 int nr; /* input */
97601024
JA
17
18 int error; /* output */
19 unsigned long bytes_done[2]; /* output */
20 struct timeval time; /* output */
21};
22
10ba535a
JA
23/*
24 * The ->file_map[] contains a map of blocks we have or have not done io
25 * to yet. Used to make sure we cover the entire range in a fair fashion.
26 */
27static int random_map_free(struct thread_data *td, struct fio_file *f,
28 unsigned long long block)
29{
30 unsigned int idx = RAND_MAP_IDX(td, f, block);
31 unsigned int bit = RAND_MAP_BIT(td, f, block);
32
33 return (f->file_map[idx] & (1UL << bit)) == 0;
34}
35
df415585
JA
36/*
37 * Mark a given offset as used in the map.
38 */
9bf2061e 39static void mark_random_map(struct thread_data *td, struct io_u *io_u)
df415585 40{
2dc1bbeb 41 unsigned int min_bs = td->o.rw_min_bs;
9bf2061e 42 struct fio_file *f = io_u->file;
a00735e6
JA
43 unsigned long long block;
44 unsigned int blocks;
c685b5b2 45 unsigned int nr_blocks;
df415585 46
a00735e6
JA
47 block = io_u->offset / (unsigned long long) min_bs;
48 blocks = 0;
c685b5b2
JA
49 nr_blocks = (io_u->buflen + min_bs - 1) / min_bs;
50
51 while (blocks < nr_blocks) {
df415585
JA
52 unsigned int idx, bit;
53
1e3d53ac
JA
54 /*
55 * If we have a mixed random workload, we may
56 * encounter blocks we already did IO to.
57 */
b6a4c7d1 58 if (!td->o.ddir_nr == 1 && !random_map_free(td, f, block))
df415585
JA
59 break;
60
61 idx = RAND_MAP_IDX(td, f, block);
62 bit = RAND_MAP_BIT(td, f, block);
63
0032bf9f 64 fio_assert(td, idx < f->num_maps);
df415585
JA
65
66 f->file_map[idx] |= (1UL << bit);
67 block++;
68 blocks++;
69 }
70
a00735e6
JA
71 if ((blocks * min_bs) < io_u->buflen)
72 io_u->buflen = blocks * min_bs;
df415585
JA
73}
74
10ba535a
JA
75/*
76 * Return the next free block in the map.
77 */
78static int get_next_free_block(struct thread_data *td, struct fio_file *f,
79 unsigned long long *b)
80{
81 int i;
82
c685b5b2
JA
83 i = f->last_free_lookup;
84 *b = (i * BLOCKS_PER_MAP);
2dc1bbeb 85 while ((*b) * td->o.rw_min_bs < f->real_file_size) {
10ba535a 86 if (f->file_map[i] != -1UL) {
b12ebc65 87 *b += fio_ffz(f->file_map[i]);
c685b5b2 88 f->last_free_lookup = i;
10ba535a
JA
89 return 0;
90 }
91
92 *b += BLOCKS_PER_MAP;
93 i++;
94 }
95
96 return 1;
97}
98
ec4015da
JA
99static int get_next_rand_offset(struct thread_data *td, struct fio_file *f,
100 int ddir, unsigned long long *b)
101{
7bb48f84 102 unsigned long long max_blocks = f->io_size / td->o.min_bs[ddir];
ec4015da
JA
103 unsigned long long r, rb;
104 int loops = 5;
105
106 do {
107 r = os_random_long(&td->random_state);
108 if (!max_blocks)
109 *b = 0;
110 else
111 *b = ((max_blocks - 1) * r / (unsigned long long) (RAND_MAX+1.0));
43c63a78
JA
112 /*
113 * if we are not maintaining a random map, we are done.
114 */
ec4015da 115 if (td->o.norandommap)
43c63a78
JA
116 return 0;
117
118 /*
119 * calculate map offset and chec if it's free
120 */
ec4015da 121 rb = *b + (f->file_offset / td->o.min_bs[ddir]);
43c63a78
JA
122 if (random_map_free(td, f, rb))
123 return 0;
124
125 } while (--loops);
ec4015da
JA
126
127 /*
43c63a78
JA
128 * we get here, if we didn't suceed in looking up a block. generate
129 * a random start offset into the filemap, and find the first free
130 * block from there.
ec4015da 131 */
43c63a78
JA
132 loops = 10;
133 do {
134 f->last_free_lookup = (f->num_maps - 1) * (r / (RAND_MAX+1.0));
135 if (!get_next_free_block(td, f, b))
136 return 0;
ec4015da 137
43c63a78
JA
138 r = os_random_long(&td->random_state);
139 } while (--loops);
140
141 /*
142 * that didn't work either, try exhaustive search from the start
143 */
144 f->last_free_lookup = 0;
145 return get_next_free_block(td, f, b);
ec4015da
JA
146}
147
10ba535a
JA
148/*
149 * For random io, generate a random new block and see if it's used. Repeat
150 * until we find a free one. For sequential io, just return the end of
151 * the last io issued.
152 */
9bf2061e 153static int get_next_offset(struct thread_data *td, struct io_u *io_u)
10ba535a 154{
9bf2061e 155 struct fio_file *f = io_u->file;
c685b5b2 156 const int ddir = io_u->ddir;
ec4015da 157 unsigned long long b;
10ba535a 158
ec4015da
JA
159 if (td_random(td) && (td->o.ddir_nr && !--td->ddir_nr)) {
160 td->ddir_nr = td->o.ddir_nr;
211097b2 161
ec4015da 162 if (get_next_rand_offset(td, f, ddir, &b))
bca4ed4d 163 return 1;
43063a1c 164 } else {
0c3d768a
YHJT
165 if (f->last_pos >= f->real_file_size) {
166 if (!td_random(td) || get_next_rand_offset(td, f, ddir, &b))
167 return 1;
bcdedd0a
YHJT
168 } else
169 b = (f->last_pos - f->file_offset) / td->o.min_bs[ddir];
43063a1c 170 }
10ba535a 171
2dc1bbeb 172 io_u->offset = (b * td->o.min_bs[ddir]) + f->file_offset;
bca4ed4d 173 if (io_u->offset >= f->real_file_size)
10ba535a
JA
174 return 1;
175
176 return 0;
177}
178
9bf2061e 179static unsigned int get_next_buflen(struct thread_data *td, struct io_u *io_u)
10ba535a 180{
bca4ed4d 181 const int ddir = io_u->ddir;
10ba535a
JA
182 unsigned int buflen;
183 long r;
184
2dc1bbeb
JA
185 if (td->o.min_bs[ddir] == td->o.max_bs[ddir])
186 buflen = td->o.min_bs[ddir];
10ba535a
JA
187 else {
188 r = os_random_long(&td->bsrange_state);
2dc1bbeb
JA
189 buflen = (unsigned int) (1 + (double) (td->o.max_bs[ddir] - 1) * r / (RAND_MAX + 1.0));
190 if (!td->o.bs_unaligned)
191 buflen = (buflen + td->o.min_bs[ddir] - 1) & ~(td->o.min_bs[ddir] - 1);
10ba535a
JA
192 }
193
6a5e6884
JA
194 if (io_u->offset + buflen > io_u->file->real_file_size)
195 buflen = td->o.min_bs[ddir];
196
10ba535a
JA
197 return buflen;
198}
199
afe24a5a
JA
200static void set_rwmix_bytes(struct thread_data *td)
201{
202 unsigned long long rbytes;
203 unsigned int diff;
204
205 /*
206 * we do time or byte based switch. this is needed because
207 * buffered writes may issue a lot quicker than they complete,
208 * whereas reads do not.
209 */
210 rbytes = td->io_bytes[td->rwmix_ddir] - td->rwmix_bytes;
e47f799f 211 diff = td->o.rwmix[td->rwmix_ddir ^ 1];
afe24a5a 212
e47f799f
JA
213 td->rwmix_bytes = td->io_bytes[td->rwmix_ddir] + (rbytes * ((100 - diff)) / diff);
214}
215
216static inline enum fio_ddir get_rand_ddir(struct thread_data *td)
217{
218 unsigned int v;
219 long r;
220
221 r = os_random_long(&td->rwmix_state);
222 v = 1 + (int) (100.0 * (r / (RAND_MAX + 1.0)));
223 if (v < td->o.rwmix[DDIR_READ])
224 return DDIR_READ;
225
226 return DDIR_WRITE;
afe24a5a
JA
227}
228
10ba535a
JA
229/*
230 * Return the data direction for the next io_u. If the job is a
231 * mixed read/write workload, check the rwmix cycle and switch if
232 * necessary.
233 */
1e97cce9 234static enum fio_ddir get_rw_ddir(struct thread_data *td)
10ba535a
JA
235{
236 if (td_rw(td)) {
237 struct timeval now;
238 unsigned long elapsed;
afe24a5a 239 unsigned int cycle;
10ba535a 240
02bcaa8c 241 fio_gettime(&now, NULL);
10ba535a
JA
242 elapsed = mtime_since_now(&td->rwmix_switch);
243
e47f799f
JA
244 /*
245 * if this is the first cycle, make it shorter
246 */
afe24a5a
JA
247 cycle = td->o.rwmixcycle;
248 if (!td->rwmix_bytes)
249 cycle /= 10;
250
10ba535a
JA
251 /*
252 * Check if it's time to seed a new data direction.
253 */
e47f799f 254 if (elapsed >= cycle ||
afe24a5a 255 td->io_bytes[td->rwmix_ddir] >= td->rwmix_bytes) {
e47f799f
JA
256 unsigned long long max_bytes;
257 enum fio_ddir ddir;
258
259 /*
260 * Put a top limit on how many bytes we do for
261 * one data direction, to avoid overflowing the
262 * ranges too much
263 */
264 ddir = get_rand_ddir(td);
265 max_bytes = td->this_io_bytes[ddir];
7bb48f84 266 if (max_bytes >= (td->o.size * td->o.rwmix[ddir] / 100)) {
38d77cae
JA
267 if (!td->rw_end_set[ddir]) {
268 td->rw_end_set[ddir] = 1;
269 memcpy(&td->rw_end[ddir], &now, sizeof(now));
270 }
e47f799f 271 ddir ^= 1;
38d77cae 272 }
e47f799f
JA
273
274 if (ddir != td->rwmix_ddir)
275 set_rwmix_bytes(td);
276
277 td->rwmix_ddir = ddir;
10ba535a
JA
278 memcpy(&td->rwmix_switch, &now, sizeof(now));
279 }
280 return td->rwmix_ddir;
281 } else if (td_read(td))
282 return DDIR_READ;
283 else
284 return DDIR_WRITE;
285}
286
10ba535a
JA
287void put_io_u(struct thread_data *td, struct io_u *io_u)
288{
0c6e7517
JA
289 assert((io_u->flags & IO_U_F_FREE) == 0);
290 io_u->flags |= IO_U_F_FREE;
291
2dbdab7e
JA
292 if (io_u->file)
293 put_file(td, io_u->file);
294
10ba535a
JA
295 io_u->file = NULL;
296 list_del(&io_u->list);
297 list_add(&io_u->list, &td->io_u_freelist);
298 td->cur_depth--;
299}
300
755200a3
JA
301void requeue_io_u(struct thread_data *td, struct io_u **io_u)
302{
303 struct io_u *__io_u = *io_u;
304
4d2e0f49
JA
305 __io_u->flags |= IO_U_F_FREE;
306 __io_u->flags &= ~IO_U_F_FLIGHT;
307
755200a3
JA
308 list_del(&__io_u->list);
309 list_add_tail(&__io_u->list, &td->io_u_requeues);
310 td->cur_depth--;
311 *io_u = NULL;
312}
313
9bf2061e 314static int fill_io_u(struct thread_data *td, struct io_u *io_u)
10ba535a 315{
b4c5e1ac
JA
316 if (td->io_ops->flags & FIO_NOIO)
317 goto out;
318
87dc1ab1
JA
319 /*
320 * see if it's time to sync
321 */
2dc1bbeb
JA
322 if (td->o.fsync_blocks &&
323 !(td->io_issues[DDIR_WRITE] % td->o.fsync_blocks) &&
324 td->io_issues[DDIR_WRITE] && should_fsync(td)) {
87dc1ab1 325 io_u->ddir = DDIR_SYNC;
c38e9468 326 goto out;
87dc1ab1
JA
327 }
328
a00735e6
JA
329 io_u->ddir = get_rw_ddir(td);
330
48f5abd3
JA
331 /*
332 * See if it's time to switch to a new zone
333 */
334 if (td->zone_bytes >= td->o.zone_size) {
335 td->zone_bytes = 0;
336 io_u->file->last_pos += td->o.zone_skip;
337 td->io_skip_bytes += td->o.zone_skip;
338 }
339
10ba535a 340 /*
c685b5b2
JA
341 * No log, let the seq/rand engine retrieve the next buflen and
342 * position.
10ba535a 343 */
9bf2061e 344 if (get_next_offset(td, io_u))
bca4ed4d 345 return 1;
10ba535a 346
9bf2061e 347 io_u->buflen = get_next_buflen(td, io_u);
bca4ed4d
JA
348 if (!io_u->buflen)
349 return 1;
350
6a5e6884
JA
351 if (io_u->offset + io_u->buflen > io_u->file->real_file_size)
352 return 1;
353
bca4ed4d
JA
354 /*
355 * mark entry before potentially trimming io_u
356 */
8fd15a9a 357 if (td_random(td) && !td->o.norandommap)
9bf2061e 358 mark_random_map(td, io_u);
bca4ed4d
JA
359
360 /*
361 * If using a write iolog, store this entry.
362 */
c38e9468 363out:
f29b25a3 364 log_io_u(td, io_u);
bca4ed4d 365 return 0;
10ba535a
JA
366}
367
b3605062 368void io_u_mark_depth(struct thread_data *td, struct io_u *io_u)
71619dc2
JA
369{
370 int index = 0;
371
b3605062
JA
372 if (io_u->ddir == DDIR_SYNC)
373 return;
374
71619dc2
JA
375 switch (td->cur_depth) {
376 default:
a783e61a
JA
377 index = 6;
378 break;
71619dc2 379 case 32 ... 63:
a783e61a
JA
380 index = 5;
381 break;
71619dc2 382 case 16 ... 31:
a783e61a
JA
383 index = 4;
384 break;
71619dc2 385 case 8 ... 15:
a783e61a
JA
386 index = 3;
387 break;
71619dc2 388 case 4 ... 7:
a783e61a
JA
389 index = 2;
390 break;
71619dc2 391 case 2 ... 3:
a783e61a 392 index = 1;
71619dc2
JA
393 case 1:
394 break;
395 }
396
756867bd 397 td->ts.io_u_map[index]++;
b3605062 398 td->ts.total_io_u[io_u->ddir]++;
71619dc2
JA
399}
400
04a0feae
JA
401static void io_u_mark_lat_usec(struct thread_data *td, unsigned long usec)
402{
403 int index = 0;
404
405 assert(usec < 1000);
406
407 switch (usec) {
408 case 750 ... 999:
409 index = 9;
410 break;
411 case 500 ... 749:
412 index = 8;
413 break;
414 case 250 ... 499:
415 index = 7;
416 break;
417 case 100 ... 249:
418 index = 6;
419 break;
420 case 50 ... 99:
421 index = 5;
422 break;
423 case 20 ... 49:
424 index = 4;
425 break;
426 case 10 ... 19:
427 index = 3;
428 break;
429 case 4 ... 9:
430 index = 2;
431 break;
432 case 2 ... 3:
433 index = 1;
434 case 0 ... 1:
435 break;
436 }
437
438 assert(index < FIO_IO_U_LAT_U_NR);
439 td->ts.io_u_lat_u[index]++;
440}
441
442static void io_u_mark_lat_msec(struct thread_data *td, unsigned long msec)
ec118304
JA
443{
444 int index = 0;
445
446 switch (msec) {
447 default:
04a0feae
JA
448 index = 11;
449 break;
8abdce66 450 case 1000 ... 1999:
04a0feae
JA
451 index = 10;
452 break;
8abdce66 453 case 750 ... 999:
04a0feae
JA
454 index = 9;
455 break;
8abdce66 456 case 500 ... 749:
04a0feae
JA
457 index = 8;
458 break;
8abdce66 459 case 250 ... 499:
04a0feae
JA
460 index = 7;
461 break;
8abdce66 462 case 100 ... 249:
04a0feae
JA
463 index = 6;
464 break;
8abdce66 465 case 50 ... 99:
04a0feae
JA
466 index = 5;
467 break;
8abdce66 468 case 20 ... 49:
04a0feae
JA
469 index = 4;
470 break;
8abdce66 471 case 10 ... 19:
04a0feae
JA
472 index = 3;
473 break;
8abdce66 474 case 4 ... 9:
04a0feae
JA
475 index = 2;
476 break;
ec118304 477 case 2 ... 3:
04a0feae 478 index = 1;
ec118304
JA
479 case 0 ... 1:
480 break;
481 }
482
04a0feae
JA
483 assert(index < FIO_IO_U_LAT_M_NR);
484 td->ts.io_u_lat_m[index]++;
485}
486
487static void io_u_mark_latency(struct thread_data *td, unsigned long usec)
488{
489 if (usec < 1000)
490 io_u_mark_lat_usec(td, usec);
491 else
492 io_u_mark_lat_msec(td, usec / 1000);
ec118304
JA
493}
494
0aabe160
JA
495/*
496 * Get next file to service by choosing one at random
497 */
1c178180
JA
498static struct fio_file *get_next_file_rand(struct thread_data *td, int goodf,
499 int badf)
0aabe160 500{
0aabe160 501 struct fio_file *f;
1c178180 502 int fno;
0aabe160
JA
503
504 do {
7c83c089
JA
505 long r = os_random_long(&td->next_file_state);
506
2dc1bbeb 507 fno = (unsigned int) ((double) td->o.nr_files * (r / (RAND_MAX + 1.0)));
1c178180 508 f = &td->files[fno];
059e63c0
JA
509 if (f->flags & FIO_FILE_DONE)
510 continue;
1c178180
JA
511
512 if ((!goodf || (f->flags & goodf)) && !(f->flags & badf))
0aabe160
JA
513 return f;
514 } while (1);
515}
516
517/*
518 * Get next file to service by doing round robin between all available ones
519 */
1c178180
JA
520static struct fio_file *get_next_file_rr(struct thread_data *td, int goodf,
521 int badf)
3d7c391d
JA
522{
523 unsigned int old_next_file = td->next_file;
524 struct fio_file *f;
525
526 do {
527 f = &td->files[td->next_file];
528
529 td->next_file++;
2dc1bbeb 530 if (td->next_file >= td->o.nr_files)
3d7c391d
JA
531 td->next_file = 0;
532
d5ed68ea
JA
533 if (f->flags & FIO_FILE_DONE) {
534 f = NULL;
059e63c0 535 continue;
d5ed68ea 536 }
059e63c0 537
1c178180 538 if ((!goodf || (f->flags & goodf)) && !(f->flags & badf))
3d7c391d
JA
539 break;
540
541 f = NULL;
542 } while (td->next_file != old_next_file);
543
544 return f;
545}
546
bdb4e2e9
JA
547static struct fio_file *get_next_file(struct thread_data *td)
548{
1907dbc6
JA
549 struct fio_file *f;
550
2dc1bbeb 551 assert(td->o.nr_files <= td->files_index);
1c178180 552
1020a139 553 if (!td->nr_open_files || td->nr_done_files >= td->o.nr_files)
bdb4e2e9
JA
554 return NULL;
555
1907dbc6 556 f = td->file_service_file;
f11bd94d 557 if (f && (f->flags & FIO_FILE_OPEN) && td->file_service_left--)
1907dbc6
JA
558 return f;
559
2dc1bbeb 560 if (td->o.file_service_type == FIO_FSERVICE_RR)
1c178180 561 f = get_next_file_rr(td, FIO_FILE_OPEN, FIO_FILE_CLOSING);
bdb4e2e9 562 else
1c178180 563 f = get_next_file_rand(td, FIO_FILE_OPEN, FIO_FILE_CLOSING);
1907dbc6
JA
564
565 td->file_service_file = f;
566 td->file_service_left = td->file_service_nr - 1;
567 return f;
bdb4e2e9
JA
568}
569
1c178180
JA
570static struct fio_file *find_next_new_file(struct thread_data *td)
571{
572 struct fio_file *f;
573
1020a139
JA
574 if (!td->nr_open_files || td->nr_done_files >= td->o.nr_files)
575 return NULL;
576
2dc1bbeb 577 if (td->o.file_service_type == FIO_FSERVICE_RR)
1c178180
JA
578 f = get_next_file_rr(td, 0, FIO_FILE_OPEN);
579 else
580 f = get_next_file_rand(td, 0, FIO_FILE_OPEN);
581
582 return f;
583}
584
429f6675
JA
585static int set_io_u_file(struct thread_data *td, struct io_u *io_u)
586{
587 struct fio_file *f;
588
589 do {
590 f = get_next_file(td);
591 if (!f)
592 return 1;
593
594set_file:
595 io_u->file = f;
596 get_file(f);
597
598 if (!fill_io_u(td, io_u))
599 break;
600
601 /*
602 * td_io_close() does a put_file() as well, so no need to
603 * do that here.
604 */
605 io_u->file = NULL;
606 td_io_close_file(td, f);
607 f->flags |= FIO_FILE_DONE;
608 td->nr_done_files++;
609
610 /*
611 * probably not the right place to do this, but see
612 * if we need to open a new file
613 */
614 if (td->nr_open_files < td->o.open_files &&
615 td->o.open_files != td->o.nr_files) {
616 f = find_next_new_file(td);
617
618 if (!f || td_io_open_file(td, f))
619 return 1;
620
621 goto set_file;
622 }
623 } while (1);
624
625 return 0;
626}
627
628
10ba535a
JA
629struct io_u *__get_io_u(struct thread_data *td)
630{
631 struct io_u *io_u = NULL;
632
755200a3
JA
633 if (!list_empty(&td->io_u_requeues))
634 io_u = list_entry(td->io_u_requeues.next, struct io_u, list);
635 else if (!queue_full(td)) {
10ba535a
JA
636 io_u = list_entry(td->io_u_freelist.next, struct io_u, list);
637
6040dabc 638 io_u->buflen = 0;
10ba535a 639 io_u->resid = 0;
755200a3 640 io_u->file = NULL;
d7762cf8 641 io_u->end_io = NULL;
755200a3
JA
642 }
643
644 if (io_u) {
0c6e7517
JA
645 assert(io_u->flags & IO_U_F_FREE);
646 io_u->flags &= ~IO_U_F_FREE;
647
755200a3 648 io_u->error = 0;
10ba535a
JA
649 list_del(&io_u->list);
650 list_add(&io_u->list, &td->io_u_busylist);
651 td->cur_depth++;
652 }
653
654 return io_u;
655}
656
657/*
658 * Return an io_u to be processed. Gets a buflen and offset, sets direction,
659 * etc. The returned io_u is fully ready to be prepped and submitted.
660 */
3d7c391d 661struct io_u *get_io_u(struct thread_data *td)
10ba535a 662{
3d7c391d 663 struct fio_file *f;
10ba535a
JA
664 struct io_u *io_u;
665
666 io_u = __get_io_u(td);
667 if (!io_u)
668 return NULL;
669
755200a3
JA
670 /*
671 * from a requeue, io_u already setup
672 */
673 if (io_u->file)
77f392bf 674 goto out;
755200a3 675
429f6675
JA
676 /*
677 * If using an iolog, grab next piece if any available.
678 */
679 if (td->o.read_iolog_file) {
680 if (read_iolog_get(td, io_u))
681 goto err_put;
682 } else if (set_io_u_file(td, io_u))
683 goto err_put;
684
685 f = io_u->file;
686 assert(f->flags & FIO_FILE_OPEN);
97af62ce 687
87dc1ab1 688 if (io_u->ddir != DDIR_SYNC) {
429f6675
JA
689 if (!io_u->buflen)
690 goto err_put;
10ba535a 691
36167d82 692 f->last_pos = io_u->offset + io_u->buflen;
10ba535a 693
2dc1bbeb 694 if (td->o.verify != VERIFY_NONE)
87dc1ab1
JA
695 populate_verify_io_u(td, io_u);
696 }
10ba535a 697
165faf16
JA
698 /*
699 * Set io data pointers.
700 */
d460eb31 701 io_u->endpos = io_u->offset + io_u->buflen;
77f392bf 702out:
cec6b55d
JA
703 io_u->xfer_buf = io_u->buf;
704 io_u->xfer_buflen = io_u->buflen;
165faf16 705
429f6675
JA
706 if (!td_io_prep(td, io_u)) {
707 fio_gettime(&io_u->start_time, NULL);
708 return io_u;
36167d82 709 }
429f6675
JA
710err_put:
711 put_io_u(td, io_u);
712 return NULL;
10ba535a
JA
713}
714
5451792e
JA
715void io_u_log_error(struct thread_data *td, struct io_u *io_u)
716{
717 const char *msg[] = { "read", "write", "sync" };
718
719 log_err("fio: io_u error");
720
721 if (io_u->file)
722 log_err(" on file %s", io_u->file->file_name);
723
724 log_err(": %s\n", strerror(io_u->error));
725
726 log_err(" %s offset=%llu, buflen=%lu\n", msg[io_u->ddir], io_u->offset, io_u->xfer_buflen);
727
728 if (!td->error)
729 td_verror(td, io_u->error, "io_u error");
730}
731
97601024
JA
732static void io_completed(struct thread_data *td, struct io_u *io_u,
733 struct io_completion_data *icd)
10ba535a 734{
d85f5118 735 unsigned long usec;
10ba535a 736
0c6e7517
JA
737 assert(io_u->flags & IO_U_F_FLIGHT);
738 io_u->flags &= ~IO_U_F_FLIGHT;
739
87dc1ab1
JA
740 if (io_u->ddir == DDIR_SYNC) {
741 td->last_was_sync = 1;
742 return;
743 }
744
745 td->last_was_sync = 0;
746
10ba535a
JA
747 if (!io_u->error) {
748 unsigned int bytes = io_u->buflen - io_u->resid;
1e97cce9 749 const enum fio_ddir idx = io_u->ddir;
3af6ef39 750 int ret;
10ba535a
JA
751
752 td->io_blocks[idx]++;
753 td->io_bytes[idx] += bytes;
754 td->zone_bytes += bytes;
755 td->this_io_bytes[idx] += bytes;
756
d460eb31 757 io_u->file->last_completed_pos = io_u->endpos;
02bcaa8c 758
d85f5118 759 usec = utime_since(&io_u->issue_time, &icd->time);
10ba535a 760
d85f5118 761 add_clat_sample(td, idx, usec);
02bcaa8c 762 add_bw_sample(td, idx, &icd->time);
04a0feae 763 io_u_mark_latency(td, usec);
10ba535a 764
660a1cb5 765 if (td_write(td) && idx == DDIR_WRITE &&
41128405 766 td->o.verify != VERIFY_NONE)
10ba535a
JA
767 log_io_piece(td, io_u);
768
769 icd->bytes_done[idx] += bytes;
3af6ef39 770
d7762cf8 771 if (io_u->end_io) {
36690c9b 772 ret = io_u->end_io(td, io_u);
3af6ef39
JA
773 if (ret && !icd->error)
774 icd->error = ret;
775 }
5451792e 776 } else {
10ba535a 777 icd->error = io_u->error;
5451792e
JA
778 io_u_log_error(td, io_u);
779 }
10ba535a
JA
780}
781
d7762cf8 782static void init_icd(struct io_completion_data *icd, int nr)
10ba535a 783{
02bcaa8c
JA
784 fio_gettime(&icd->time, NULL);
785
3af6ef39
JA
786 icd->nr = nr;
787
10ba535a
JA
788 icd->error = 0;
789 icd->bytes_done[0] = icd->bytes_done[1] = 0;
36167d82
JA
790}
791
97601024
JA
792static void ios_completed(struct thread_data *td,
793 struct io_completion_data *icd)
36167d82
JA
794{
795 struct io_u *io_u;
796 int i;
797
10ba535a
JA
798 for (i = 0; i < icd->nr; i++) {
799 io_u = td->io_ops->event(td, i);
800
801 io_completed(td, io_u, icd);
802 put_io_u(td, io_u);
803 }
804}
97601024 805
e7e6cfb4
JA
806/*
807 * Complete a single io_u for the sync engines.
808 */
d7762cf8 809long io_u_sync_complete(struct thread_data *td, struct io_u *io_u)
97601024
JA
810{
811 struct io_completion_data icd;
812
d7762cf8 813 init_icd(&icd, 1);
97601024
JA
814 io_completed(td, io_u, &icd);
815 put_io_u(td, io_u);
816
817 if (!icd.error)
818 return icd.bytes_done[0] + icd.bytes_done[1];
819
37e974a5 820 td_verror(td, icd.error, "io_u_sync_complete");
97601024
JA
821 return -1;
822}
823
e7e6cfb4
JA
824/*
825 * Called to complete min_events number of io for the async engines.
826 */
d7762cf8 827long io_u_queued_complete(struct thread_data *td, int min_events)
97601024 828{
97601024 829 struct io_completion_data icd;
00de55ef 830 struct timespec *tvp = NULL;
97601024 831 int ret;
4d06a338 832 struct timespec ts = { .tv_sec = 0, .tv_nsec = 0, };
97601024 833
4d06a338 834 if (!min_events)
00de55ef 835 tvp = &ts;
97601024 836
00de55ef 837 ret = td_io_getevents(td, min_events, td->cur_depth, tvp);
97601024 838 if (ret < 0) {
e1161c32 839 td_verror(td, -ret, "td_io_getevents");
97601024
JA
840 return ret;
841 } else if (!ret)
842 return ret;
843
d7762cf8 844 init_icd(&icd, ret);
97601024
JA
845 ios_completed(td, &icd);
846 if (!icd.error)
847 return icd.bytes_done[0] + icd.bytes_done[1];
848
37e974a5 849 td_verror(td, icd.error, "io_u_queued_complete");
97601024
JA
850 return -1;
851}
7e77dd02
JA
852
853/*
854 * Call when io_u is really queued, to update the submission latency.
855 */
856void io_u_queued(struct thread_data *td, struct io_u *io_u)
857{
858 unsigned long slat_time;
859
d85f5118 860 slat_time = utime_since(&io_u->start_time, &io_u->issue_time);
7e77dd02
JA
861 add_slat_sample(td, io_u->ddir, slat_time);
862}
433afcb4 863
55bc9728 864#ifdef FIO_USE_TIMEOUT
433afcb4
JA
865void io_u_set_timeout(struct thread_data *td)
866{
867 assert(td->cur_depth);
868
869 td->timer.it_interval.tv_sec = 0;
870 td->timer.it_interval.tv_usec = 0;
871 td->timer.it_value.tv_sec = IO_U_TIMEOUT + IO_U_TIMEOUT_INC;
872 td->timer.it_value.tv_usec = 0;
873 setitimer(ITIMER_REAL, &td->timer, NULL);
874 fio_gettime(&td->timeout_end, NULL);
875}
5945b9b4
JA
876
877static void io_u_dump(struct io_u *io_u)
878{
879 unsigned long t_start = mtime_since_now(&io_u->start_time);
880 unsigned long t_issue = mtime_since_now(&io_u->issue_time);
881
882 log_err("io_u=%p, t_start=%lu, t_issue=%lu\n", io_u, t_start, t_issue);
883 log_err(" buf=%p/%p, len=%lu/%lu, offset=%llu\n", io_u->buf, io_u->xfer_buf, io_u->buflen, io_u->xfer_buflen, io_u->offset);
884 log_err(" ddir=%d, fname=%s\n", io_u->ddir, io_u->file->file_name);
885}
55bc9728
JA
886#else
887void io_u_set_timeout(struct thread_data fio_unused *td)
888{
889}
890#endif
433afcb4 891
55bc9728 892#ifdef FIO_USE_TIMEOUT
433afcb4
JA
893static void io_u_timeout_handler(int fio_unused sig)
894{
895 struct thread_data *td, *__td;
896 pid_t pid = getpid();
5945b9b4
JA
897 struct list_head *entry;
898 struct io_u *io_u;
433afcb4
JA
899 int i;
900
901 log_err("fio: io_u timeout\n");
902
903 /*
904 * TLS would be nice...
905 */
906 td = NULL;
907 for_each_td(__td, i) {
908 if (__td->pid == pid) {
909 td = __td;
910 break;
911 }
912 }
913
914 if (!td) {
915 log_err("fio: io_u timeout, can't find job\n");
916 exit(1);
917 }
918
919 if (!td->cur_depth) {
920 log_err("fio: timeout without pending work?\n");
921 return;
922 }
923
15506d09 924 log_err("fio: io_u timeout: job=%s, pid=%d\n", td->o.name, td->pid);
5945b9b4
JA
925
926 list_for_each(entry, &td->io_u_busylist) {
927 io_u = list_entry(entry, struct io_u, list);
928
929 io_u_dump(io_u);
930 }
931
932 td_verror(td, ETIMEDOUT, "io_u timeout");
433afcb4
JA
933 exit(1);
934}
55bc9728 935#endif
433afcb4
JA
936
937void io_u_init_timeout(void)
938{
55bc9728 939#ifdef FIO_USE_TIMEOUT
433afcb4 940 signal(SIGALRM, io_u_timeout_handler);
55bc9728 941#endif
433afcb4 942}