Add missing types to io_u_log_error()
[fio.git] / io_u.c
... / ...
CommitLineData
1#include <unistd.h>
2#include <fcntl.h>
3#include <string.h>
4#include <signal.h>
5#include <time.h>
6#include <assert.h>
7
8#include "fio.h"
9#include "hash.h"
10#include "verify.h"
11#include "lib/rand.h"
12
13struct io_completion_data {
14 int nr; /* input */
15
16 int error; /* output */
17 unsigned long bytes_done[2]; /* output */
18 struct timeval time; /* output */
19};
20
21/*
22 * The ->file_map[] contains a map of blocks we have or have not done io
23 * to yet. Used to make sure we cover the entire range in a fair fashion.
24 */
25static int random_map_free(struct fio_file *f, const unsigned long long block)
26{
27 unsigned int idx = RAND_MAP_IDX(f, block);
28 unsigned int bit = RAND_MAP_BIT(f, block);
29
30 dprint(FD_RANDOM, "free: b=%llu, idx=%u, bit=%u\n", block, idx, bit);
31
32 return (f->file_map[idx] & (1 << bit)) == 0;
33}
34
35/*
36 * Mark a given offset as used in the map.
37 */
38static void mark_random_map(struct thread_data *td, struct io_u *io_u)
39{
40 unsigned int min_bs = td->o.rw_min_bs;
41 struct fio_file *f = io_u->file;
42 unsigned long long block;
43 unsigned int blocks, nr_blocks;
44 int busy_check;
45
46 block = (io_u->offset - f->file_offset) / (unsigned long long) min_bs;
47 nr_blocks = (io_u->buflen + min_bs - 1) / min_bs;
48 blocks = 0;
49 busy_check = !(io_u->flags & IO_U_F_BUSY_OK);
50
51 while (nr_blocks) {
52 unsigned int this_blocks, mask;
53 unsigned int idx, bit;
54
55 /*
56 * If we have a mixed random workload, we may
57 * encounter blocks we already did IO to.
58 */
59 if (!busy_check) {
60 blocks = nr_blocks;
61 break;
62 }
63 if ((td->o.ddir_seq_nr == 1) && !random_map_free(f, block))
64 break;
65
66 idx = RAND_MAP_IDX(f, block);
67 bit = RAND_MAP_BIT(f, block);
68
69 fio_assert(td, idx < f->num_maps);
70
71 this_blocks = nr_blocks;
72 if (this_blocks + bit > BLOCKS_PER_MAP)
73 this_blocks = BLOCKS_PER_MAP - bit;
74
75 do {
76 if (this_blocks == BLOCKS_PER_MAP)
77 mask = -1U;
78 else
79 mask = ((1U << this_blocks) - 1) << bit;
80
81 if (!(f->file_map[idx] & mask))
82 break;
83
84 this_blocks--;
85 } while (this_blocks);
86
87 if (!this_blocks)
88 break;
89
90 f->file_map[idx] |= mask;
91 nr_blocks -= this_blocks;
92 blocks += this_blocks;
93 block += this_blocks;
94 }
95
96 if ((blocks * min_bs) < io_u->buflen)
97 io_u->buflen = blocks * min_bs;
98}
99
100static unsigned long long last_block(struct thread_data *td, struct fio_file *f,
101 enum fio_ddir ddir)
102{
103 unsigned long long max_blocks;
104 unsigned long long max_size;
105
106 /*
107 * Hmm, should we make sure that ->io_size <= ->real_file_size?
108 */
109 max_size = f->io_size;
110 if (max_size > f->real_file_size)
111 max_size = f->real_file_size;
112
113 max_blocks = max_size / (unsigned long long) td->o.ba[ddir];
114 if (!max_blocks)
115 return 0;
116
117 return max_blocks;
118}
119
120/*
121 * Return the next free block in the map.
122 */
123static int get_next_free_block(struct thread_data *td, struct fio_file *f,
124 enum fio_ddir ddir, unsigned long long *b)
125{
126 unsigned long long min_bs = td->o.rw_min_bs;
127 int i;
128
129 i = f->last_free_lookup;
130 *b = (i * BLOCKS_PER_MAP);
131 while ((*b) * min_bs < f->real_file_size &&
132 (*b) * min_bs < f->io_size) {
133 if (f->file_map[i] != (unsigned int) -1) {
134 *b += ffz(f->file_map[i]);
135 if (*b > last_block(td, f, ddir))
136 break;
137 f->last_free_lookup = i;
138 return 0;
139 }
140
141 *b += BLOCKS_PER_MAP;
142 i++;
143 }
144
145 dprint(FD_IO, "failed finding a free block\n");
146 return 1;
147}
148
149static int get_next_rand_offset(struct thread_data *td, struct fio_file *f,
150 enum fio_ddir ddir, unsigned long long *b)
151{
152 unsigned long long r;
153 int loops = 5;
154
155 do {
156 r = os_random_long(&td->random_state);
157 dprint(FD_RANDOM, "off rand %llu\n", r);
158 *b = (last_block(td, f, ddir) - 1)
159 * (r / ((unsigned long long) OS_RAND_MAX + 1.0));
160
161 /*
162 * if we are not maintaining a random map, we are done.
163 */
164 if (!file_randommap(td, f))
165 return 0;
166
167 /*
168 * calculate map offset and check if it's free
169 */
170 if (random_map_free(f, *b))
171 return 0;
172
173 dprint(FD_RANDOM, "get_next_rand_offset: offset %llu busy\n",
174 *b);
175 } while (--loops);
176
177 /*
178 * we get here, if we didn't suceed in looking up a block. generate
179 * a random start offset into the filemap, and find the first free
180 * block from there.
181 */
182 loops = 10;
183 do {
184 f->last_free_lookup = (f->num_maps - 1) *
185 (r / (OS_RAND_MAX + 1.0));
186 if (!get_next_free_block(td, f, ddir, b))
187 return 0;
188
189 r = os_random_long(&td->random_state);
190 } while (--loops);
191
192 /*
193 * that didn't work either, try exhaustive search from the start
194 */
195 f->last_free_lookup = 0;
196 return get_next_free_block(td, f, ddir, b);
197}
198
199static int get_next_rand_block(struct thread_data *td, struct fio_file *f,
200 enum fio_ddir ddir, unsigned long long *b)
201{
202 if (get_next_rand_offset(td, f, ddir, b)) {
203 dprint(FD_IO, "%s: rand offset failed, last=%llu, size=%llu\n",
204 f->file_name, f->last_pos, f->real_file_size);
205 return 1;
206 }
207
208 return 0;
209}
210
211static int get_next_seq_block(struct thread_data *td, struct fio_file *f,
212 enum fio_ddir ddir, unsigned long long *b)
213{
214 if (f->last_pos < f->real_file_size) {
215 *b = (f->last_pos - f->file_offset) / td->o.min_bs[ddir];
216 return 0;
217 }
218
219 return 1;
220}
221
222static int get_next_block(struct thread_data *td, struct io_u *io_u,
223 enum fio_ddir ddir, int rw_seq, unsigned long long *b)
224{
225 struct fio_file *f = io_u->file;
226 int ret;
227
228 if (rw_seq) {
229 if (td_random(td))
230 ret = get_next_rand_block(td, f, ddir, b);
231 else
232 ret = get_next_seq_block(td, f, ddir, b);
233 } else {
234 io_u->flags |= IO_U_F_BUSY_OK;
235
236 if (td->o.rw_seq == RW_SEQ_SEQ) {
237 ret = get_next_seq_block(td, f, ddir, b);
238 if (ret)
239 ret = get_next_rand_block(td, f, ddir, b);
240 } else if (td->o.rw_seq == RW_SEQ_IDENT) {
241 if (f->last_start != -1ULL)
242 *b = (f->last_start - f->file_offset) / td->o.min_bs[ddir];
243 else
244 *b = 0;
245 ret = 0;
246 } else {
247 log_err("fio: unknown rw_seq=%d\n", td->o.rw_seq);
248 ret = 1;
249 }
250 }
251
252 return ret;
253}
254
255/*
256 * For random io, generate a random new block and see if it's used. Repeat
257 * until we find a free one. For sequential io, just return the end of
258 * the last io issued.
259 */
260static int __get_next_offset(struct thread_data *td, struct io_u *io_u)
261{
262 struct fio_file *f = io_u->file;
263 unsigned long long b;
264 enum fio_ddir ddir = io_u->ddir;
265 int rw_seq_hit = 0;
266
267 if (td->o.ddir_seq_nr && !--td->ddir_seq_nr) {
268 rw_seq_hit = 1;
269 td->ddir_seq_nr = td->o.ddir_seq_nr;
270 }
271
272 if (get_next_block(td, io_u, ddir, rw_seq_hit, &b)) {
273 printf("fail\n");
274 return 1;
275 }
276
277 io_u->offset = b * td->o.ba[ddir];
278 if (io_u->offset >= f->io_size) {
279 dprint(FD_IO, "get_next_offset: offset %llu >= io_size %llu\n",
280 io_u->offset, f->io_size);
281 return 1;
282 }
283
284 io_u->offset += f->file_offset;
285 if (io_u->offset >= f->real_file_size) {
286 dprint(FD_IO, "get_next_offset: offset %llu >= size %llu\n",
287 io_u->offset, f->real_file_size);
288 return 1;
289 }
290
291 return 0;
292}
293
294static int get_next_offset(struct thread_data *td, struct io_u *io_u)
295{
296 struct prof_io_ops *ops = &td->prof_io_ops;
297
298 if (ops->fill_io_u_off)
299 return ops->fill_io_u_off(td, io_u);
300
301 return __get_next_offset(td, io_u);
302}
303
304static unsigned int __get_next_buflen(struct thread_data *td, struct io_u *io_u)
305{
306 const int ddir = io_u->ddir;
307 unsigned int uninitialized_var(buflen);
308 unsigned int minbs, maxbs;
309 long r;
310
311 minbs = td->o.min_bs[ddir];
312 maxbs = td->o.max_bs[ddir];
313
314 if (minbs == maxbs)
315 buflen = minbs;
316 else {
317 r = os_random_long(&td->bsrange_state);
318 if (!td->o.bssplit_nr[ddir]) {
319 buflen = 1 + (unsigned int) ((double) maxbs *
320 (r / (OS_RAND_MAX + 1.0)));
321 if (buflen < minbs)
322 buflen = minbs;
323 } else {
324 long perc = 0;
325 unsigned int i;
326
327 for (i = 0; i < td->o.bssplit_nr[ddir]; i++) {
328 struct bssplit *bsp = &td->o.bssplit[ddir][i];
329
330 buflen = bsp->bs;
331 perc += bsp->perc;
332 if (r <= ((OS_RAND_MAX / 100L) * perc))
333 break;
334 }
335 }
336 if (!td->o.bs_unaligned && is_power_of_2(minbs))
337 buflen = (buflen + minbs - 1) & ~(minbs - 1);
338 }
339
340 if (io_u->offset + buflen > io_u->file->real_file_size) {
341 dprint(FD_IO, "lower buflen %u -> %u (ddir=%d)\n", buflen,
342 minbs, ddir);
343 buflen = minbs;
344 }
345
346 return buflen;
347}
348
349static unsigned int get_next_buflen(struct thread_data *td, struct io_u *io_u)
350{
351 struct prof_io_ops *ops = &td->prof_io_ops;
352
353 if (ops->fill_io_u_size)
354 return ops->fill_io_u_size(td, io_u);
355
356 return __get_next_buflen(td, io_u);
357}
358
359static void set_rwmix_bytes(struct thread_data *td)
360{
361 unsigned int diff;
362
363 /*
364 * we do time or byte based switch. this is needed because
365 * buffered writes may issue a lot quicker than they complete,
366 * whereas reads do not.
367 */
368 diff = td->o.rwmix[td->rwmix_ddir ^ 1];
369 td->rwmix_issues = (td->io_issues[td->rwmix_ddir] * diff) / 100;
370}
371
372static inline enum fio_ddir get_rand_ddir(struct thread_data *td)
373{
374 unsigned int v;
375 long r;
376
377 r = os_random_long(&td->rwmix_state);
378 v = 1 + (int) (100.0 * (r / (OS_RAND_MAX + 1.0)));
379 if (v <= td->o.rwmix[DDIR_READ])
380 return DDIR_READ;
381
382 return DDIR_WRITE;
383}
384
385static enum fio_ddir rate_ddir(struct thread_data *td, enum fio_ddir ddir)
386{
387 enum fio_ddir odir = ddir ^ 1;
388 struct timeval t;
389 long usec;
390
391 if (td->rate_pending_usleep[ddir] <= 0)
392 return ddir;
393
394 /*
395 * We have too much pending sleep in this direction. See if we
396 * should switch.
397 */
398 if (td_rw(td)) {
399 /*
400 * Other direction does not have too much pending, switch
401 */
402 if (td->rate_pending_usleep[odir] < 100000)
403 return odir;
404
405 /*
406 * Both directions have pending sleep. Sleep the minimum time
407 * and deduct from both.
408 */
409 if (td->rate_pending_usleep[ddir] <=
410 td->rate_pending_usleep[odir]) {
411 usec = td->rate_pending_usleep[ddir];
412 } else {
413 usec = td->rate_pending_usleep[odir];
414 ddir = odir;
415 }
416 } else
417 usec = td->rate_pending_usleep[ddir];
418
419 fio_gettime(&t, NULL);
420 usec_sleep(td, usec);
421 usec = utime_since_now(&t);
422
423 td->rate_pending_usleep[ddir] -= usec;
424
425 odir = ddir ^ 1;
426 if (td_rw(td) && __should_check_rate(td, odir))
427 td->rate_pending_usleep[odir] -= usec;
428
429 return ddir;
430}
431
432/*
433 * Return the data direction for the next io_u. If the job is a
434 * mixed read/write workload, check the rwmix cycle and switch if
435 * necessary.
436 */
437static enum fio_ddir get_rw_ddir(struct thread_data *td)
438{
439 enum fio_ddir ddir;
440
441 /*
442 * see if it's time to fsync
443 */
444 if (td->o.fsync_blocks &&
445 !(td->io_issues[DDIR_WRITE] % td->o.fsync_blocks) &&
446 td->io_issues[DDIR_WRITE] && should_fsync(td))
447 return DDIR_SYNC;
448
449 /*
450 * see if it's time to fdatasync
451 */
452 if (td->o.fdatasync_blocks &&
453 !(td->io_issues[DDIR_WRITE] % td->o.fdatasync_blocks) &&
454 td->io_issues[DDIR_WRITE] && should_fsync(td))
455 return DDIR_DATASYNC;
456
457 /*
458 * see if it's time to sync_file_range
459 */
460 if (td->sync_file_range_nr &&
461 !(td->io_issues[DDIR_WRITE] % td->sync_file_range_nr) &&
462 td->io_issues[DDIR_WRITE] && should_fsync(td))
463 return DDIR_SYNC_FILE_RANGE;
464
465 if (td_rw(td)) {
466 /*
467 * Check if it's time to seed a new data direction.
468 */
469 if (td->io_issues[td->rwmix_ddir] >= td->rwmix_issues) {
470 /*
471 * Put a top limit on how many bytes we do for
472 * one data direction, to avoid overflowing the
473 * ranges too much
474 */
475 ddir = get_rand_ddir(td);
476
477 if (ddir != td->rwmix_ddir)
478 set_rwmix_bytes(td);
479
480 td->rwmix_ddir = ddir;
481 }
482 ddir = td->rwmix_ddir;
483 } else if (td_read(td))
484 ddir = DDIR_READ;
485 else
486 ddir = DDIR_WRITE;
487
488 td->rwmix_ddir = rate_ddir(td, ddir);
489 return td->rwmix_ddir;
490}
491
492void put_file_log(struct thread_data *td, struct fio_file *f)
493{
494 int ret = put_file(td, f);
495
496 if (ret)
497 td_verror(td, ret, "file close");
498}
499
500void put_io_u(struct thread_data *td, struct io_u *io_u)
501{
502 td_io_u_lock(td);
503
504 io_u->flags |= IO_U_F_FREE;
505 io_u->flags &= ~IO_U_F_FREE_DEF;
506
507 if (io_u->file)
508 put_file_log(td, io_u->file);
509
510 io_u->file = NULL;
511 if (io_u->flags & IO_U_F_IN_CUR_DEPTH)
512 td->cur_depth--;
513 flist_del_init(&io_u->list);
514 flist_add(&io_u->list, &td->io_u_freelist);
515 td_io_u_unlock(td);
516 td_io_u_free_notify(td);
517}
518
519void clear_io_u(struct thread_data *td, struct io_u *io_u)
520{
521 io_u->flags &= ~IO_U_F_FLIGHT;
522 put_io_u(td, io_u);
523}
524
525void requeue_io_u(struct thread_data *td, struct io_u **io_u)
526{
527 struct io_u *__io_u = *io_u;
528
529 dprint(FD_IO, "requeue %p\n", __io_u);
530
531 td_io_u_lock(td);
532
533 __io_u->flags |= IO_U_F_FREE;
534 if ((__io_u->flags & IO_U_F_FLIGHT) && !ddir_sync(__io_u->ddir))
535 td->io_issues[__io_u->ddir]--;
536
537 __io_u->flags &= ~IO_U_F_FLIGHT;
538 if (__io_u->flags & IO_U_F_IN_CUR_DEPTH)
539 td->cur_depth--;
540 flist_del(&__io_u->list);
541 flist_add_tail(&__io_u->list, &td->io_u_requeues);
542 td_io_u_unlock(td);
543 *io_u = NULL;
544}
545
546static int fill_io_u(struct thread_data *td, struct io_u *io_u)
547{
548 if (td->io_ops->flags & FIO_NOIO)
549 goto out;
550
551 io_u->ddir = get_rw_ddir(td);
552
553 /*
554 * fsync() or fdatasync(), we are done
555 */
556 if (ddir_sync(io_u->ddir))
557 goto out;
558
559 /*
560 * See if it's time to switch to a new zone
561 */
562 if (td->zone_bytes >= td->o.zone_size) {
563 td->zone_bytes = 0;
564 io_u->file->last_pos += td->o.zone_skip;
565 td->io_skip_bytes += td->o.zone_skip;
566 }
567
568 /*
569 * No log, let the seq/rand engine retrieve the next buflen and
570 * position.
571 */
572 if (get_next_offset(td, io_u)) {
573 dprint(FD_IO, "io_u %p, failed getting offset\n", io_u);
574 return 1;
575 }
576
577 io_u->buflen = get_next_buflen(td, io_u);
578 if (!io_u->buflen) {
579 dprint(FD_IO, "io_u %p, failed getting buflen\n", io_u);
580 return 1;
581 }
582
583 if (io_u->offset + io_u->buflen > io_u->file->real_file_size) {
584 dprint(FD_IO, "io_u %p, offset too large\n", io_u);
585 dprint(FD_IO, " off=%llu/%lu > %llu\n", io_u->offset,
586 io_u->buflen, io_u->file->real_file_size);
587 return 1;
588 }
589
590 /*
591 * mark entry before potentially trimming io_u
592 */
593 if (td_random(td) && file_randommap(td, io_u->file))
594 mark_random_map(td, io_u);
595
596 /*
597 * If using a write iolog, store this entry.
598 */
599out:
600 dprint_io_u(io_u, "fill_io_u");
601 td->zone_bytes += io_u->buflen;
602 log_io_u(td, io_u);
603 return 0;
604}
605
606static void __io_u_mark_map(unsigned int *map, unsigned int nr)
607{
608 int index = 0;
609
610 switch (nr) {
611 default:
612 index = 6;
613 break;
614 case 33 ... 64:
615 index = 5;
616 break;
617 case 17 ... 32:
618 index = 4;
619 break;
620 case 9 ... 16:
621 index = 3;
622 break;
623 case 5 ... 8:
624 index = 2;
625 break;
626 case 1 ... 4:
627 index = 1;
628 case 0:
629 break;
630 }
631
632 map[index]++;
633}
634
635void io_u_mark_submit(struct thread_data *td, unsigned int nr)
636{
637 __io_u_mark_map(td->ts.io_u_submit, nr);
638 td->ts.total_submit++;
639}
640
641void io_u_mark_complete(struct thread_data *td, unsigned int nr)
642{
643 __io_u_mark_map(td->ts.io_u_complete, nr);
644 td->ts.total_complete++;
645}
646
647void io_u_mark_depth(struct thread_data *td, unsigned int nr)
648{
649 int index = 0;
650
651 switch (td->cur_depth) {
652 default:
653 index = 6;
654 break;
655 case 32 ... 63:
656 index = 5;
657 break;
658 case 16 ... 31:
659 index = 4;
660 break;
661 case 8 ... 15:
662 index = 3;
663 break;
664 case 4 ... 7:
665 index = 2;
666 break;
667 case 2 ... 3:
668 index = 1;
669 case 1:
670 break;
671 }
672
673 td->ts.io_u_map[index] += nr;
674}
675
676static void io_u_mark_lat_usec(struct thread_data *td, unsigned long usec)
677{
678 int index = 0;
679
680 assert(usec < 1000);
681
682 switch (usec) {
683 case 750 ... 999:
684 index = 9;
685 break;
686 case 500 ... 749:
687 index = 8;
688 break;
689 case 250 ... 499:
690 index = 7;
691 break;
692 case 100 ... 249:
693 index = 6;
694 break;
695 case 50 ... 99:
696 index = 5;
697 break;
698 case 20 ... 49:
699 index = 4;
700 break;
701 case 10 ... 19:
702 index = 3;
703 break;
704 case 4 ... 9:
705 index = 2;
706 break;
707 case 2 ... 3:
708 index = 1;
709 case 0 ... 1:
710 break;
711 }
712
713 assert(index < FIO_IO_U_LAT_U_NR);
714 td->ts.io_u_lat_u[index]++;
715}
716
717static void io_u_mark_lat_msec(struct thread_data *td, unsigned long msec)
718{
719 int index = 0;
720
721 switch (msec) {
722 default:
723 index = 11;
724 break;
725 case 1000 ... 1999:
726 index = 10;
727 break;
728 case 750 ... 999:
729 index = 9;
730 break;
731 case 500 ... 749:
732 index = 8;
733 break;
734 case 250 ... 499:
735 index = 7;
736 break;
737 case 100 ... 249:
738 index = 6;
739 break;
740 case 50 ... 99:
741 index = 5;
742 break;
743 case 20 ... 49:
744 index = 4;
745 break;
746 case 10 ... 19:
747 index = 3;
748 break;
749 case 4 ... 9:
750 index = 2;
751 break;
752 case 2 ... 3:
753 index = 1;
754 case 0 ... 1:
755 break;
756 }
757
758 assert(index < FIO_IO_U_LAT_M_NR);
759 td->ts.io_u_lat_m[index]++;
760}
761
762static void io_u_mark_latency(struct thread_data *td, unsigned long usec)
763{
764 if (usec < 1000)
765 io_u_mark_lat_usec(td, usec);
766 else
767 io_u_mark_lat_msec(td, usec / 1000);
768}
769
770/*
771 * Get next file to service by choosing one at random
772 */
773static struct fio_file *get_next_file_rand(struct thread_data *td,
774 enum fio_file_flags goodf,
775 enum fio_file_flags badf)
776{
777 struct fio_file *f;
778 int fno;
779
780 do {
781 long r = os_random_long(&td->next_file_state);
782 int opened = 0;
783
784 fno = (unsigned int) ((double) td->o.nr_files
785 * (r / (OS_RAND_MAX + 1.0)));
786 f = td->files[fno];
787 if (fio_file_done(f))
788 continue;
789
790 if (!fio_file_open(f)) {
791 int err;
792
793 err = td_io_open_file(td, f);
794 if (err)
795 continue;
796 opened = 1;
797 }
798
799 if ((!goodf || (f->flags & goodf)) && !(f->flags & badf)) {
800 dprint(FD_FILE, "get_next_file_rand: %p\n", f);
801 return f;
802 }
803 if (opened)
804 td_io_close_file(td, f);
805 } while (1);
806}
807
808/*
809 * Get next file to service by doing round robin between all available ones
810 */
811static struct fio_file *get_next_file_rr(struct thread_data *td, int goodf,
812 int badf)
813{
814 unsigned int old_next_file = td->next_file;
815 struct fio_file *f;
816
817 do {
818 int opened = 0;
819
820 f = td->files[td->next_file];
821
822 td->next_file++;
823 if (td->next_file >= td->o.nr_files)
824 td->next_file = 0;
825
826 dprint(FD_FILE, "trying file %s %x\n", f->file_name, f->flags);
827 if (fio_file_done(f)) {
828 f = NULL;
829 continue;
830 }
831
832 if (!fio_file_open(f)) {
833 int err;
834
835 err = td_io_open_file(td, f);
836 if (err) {
837 dprint(FD_FILE, "error %d on open of %s\n",
838 err, f->file_name);
839 f = NULL;
840 continue;
841 }
842 opened = 1;
843 }
844
845 dprint(FD_FILE, "goodf=%x, badf=%x, ff=%x\n", goodf, badf,
846 f->flags);
847 if ((!goodf || (f->flags & goodf)) && !(f->flags & badf))
848 break;
849
850 if (opened)
851 td_io_close_file(td, f);
852
853 f = NULL;
854 } while (td->next_file != old_next_file);
855
856 dprint(FD_FILE, "get_next_file_rr: %p\n", f);
857 return f;
858}
859
860static struct fio_file *__get_next_file(struct thread_data *td)
861{
862 struct fio_file *f;
863
864 assert(td->o.nr_files <= td->files_index);
865
866 if (td->nr_done_files >= td->o.nr_files) {
867 dprint(FD_FILE, "get_next_file: nr_open=%d, nr_done=%d,"
868 " nr_files=%d\n", td->nr_open_files,
869 td->nr_done_files,
870 td->o.nr_files);
871 return NULL;
872 }
873
874 f = td->file_service_file;
875 if (f && fio_file_open(f) && !fio_file_closing(f)) {
876 if (td->o.file_service_type == FIO_FSERVICE_SEQ)
877 goto out;
878 if (td->file_service_left--)
879 goto out;
880 }
881
882 if (td->o.file_service_type == FIO_FSERVICE_RR ||
883 td->o.file_service_type == FIO_FSERVICE_SEQ)
884 f = get_next_file_rr(td, FIO_FILE_open, FIO_FILE_closing);
885 else
886 f = get_next_file_rand(td, FIO_FILE_open, FIO_FILE_closing);
887
888 td->file_service_file = f;
889 td->file_service_left = td->file_service_nr - 1;
890out:
891 dprint(FD_FILE, "get_next_file: %p [%s]\n", f, f->file_name);
892 return f;
893}
894
895static struct fio_file *get_next_file(struct thread_data *td)
896{
897 struct prof_io_ops *ops = &td->prof_io_ops;
898
899 if (ops->get_next_file)
900 return ops->get_next_file(td);
901
902 return __get_next_file(td);
903}
904
905static int set_io_u_file(struct thread_data *td, struct io_u *io_u)
906{
907 struct fio_file *f;
908
909 do {
910 f = get_next_file(td);
911 if (!f)
912 return 1;
913
914 io_u->file = f;
915 get_file(f);
916
917 if (!fill_io_u(td, io_u))
918 break;
919
920 put_file_log(td, f);
921 td_io_close_file(td, f);
922 io_u->file = NULL;
923 fio_file_set_done(f);
924 td->nr_done_files++;
925 dprint(FD_FILE, "%s: is done (%d of %d)\n", f->file_name,
926 td->nr_done_files, td->o.nr_files);
927 } while (1);
928
929 return 0;
930}
931
932
933struct io_u *__get_io_u(struct thread_data *td)
934{
935 struct io_u *io_u = NULL;
936
937 td_io_u_lock(td);
938
939again:
940 if (!flist_empty(&td->io_u_requeues))
941 io_u = flist_entry(td->io_u_requeues.next, struct io_u, list);
942 else if (!queue_full(td)) {
943 io_u = flist_entry(td->io_u_freelist.next, struct io_u, list);
944
945 io_u->buflen = 0;
946 io_u->resid = 0;
947 io_u->file = NULL;
948 io_u->end_io = NULL;
949 }
950
951 if (io_u) {
952 assert(io_u->flags & IO_U_F_FREE);
953 io_u->flags &= ~(IO_U_F_FREE | IO_U_F_FREE_DEF);
954
955 io_u->error = 0;
956 flist_del(&io_u->list);
957 flist_add(&io_u->list, &td->io_u_busylist);
958 td->cur_depth++;
959 io_u->flags |= IO_U_F_IN_CUR_DEPTH;
960 } else if (td->o.verify_async) {
961 /*
962 * We ran out, wait for async verify threads to finish and
963 * return one
964 */
965 pthread_cond_wait(&td->free_cond, &td->io_u_lock);
966 goto again;
967 }
968
969 td_io_u_unlock(td);
970 return io_u;
971}
972
973/*
974 * Return an io_u to be processed. Gets a buflen and offset, sets direction,
975 * etc. The returned io_u is fully ready to be prepped and submitted.
976 */
977struct io_u *get_io_u(struct thread_data *td)
978{
979 struct fio_file *f;
980 struct io_u *io_u;
981
982 io_u = __get_io_u(td);
983 if (!io_u) {
984 dprint(FD_IO, "__get_io_u failed\n");
985 return NULL;
986 }
987
988 if (td->o.verify_backlog && td->io_hist_len) {
989 int get_verify = 0;
990
991 if (td->verify_batch) {
992 td->verify_batch--;
993 get_verify = 1;
994 } else if (!(td->io_hist_len % td->o.verify_backlog) &&
995 td->last_ddir != DDIR_READ) {
996 td->verify_batch = td->o.verify_batch;
997 if (!td->verify_batch)
998 td->verify_batch = td->o.verify_backlog;
999 get_verify = 1;
1000 }
1001
1002 if (get_verify && !get_next_verify(td, io_u))
1003 goto out;
1004 }
1005
1006 /*
1007 * from a requeue, io_u already setup
1008 */
1009 if (io_u->file)
1010 goto out;
1011
1012 /*
1013 * If using an iolog, grab next piece if any available.
1014 */
1015 if (td->o.read_iolog_file) {
1016 if (read_iolog_get(td, io_u))
1017 goto err_put;
1018 } else if (set_io_u_file(td, io_u)) {
1019 dprint(FD_IO, "io_u %p, setting file failed\n", io_u);
1020 goto err_put;
1021 }
1022
1023 f = io_u->file;
1024 assert(fio_file_open(f));
1025
1026 if (!ddir_sync(io_u->ddir)) {
1027 if (!io_u->buflen && !(td->io_ops->flags & FIO_NOIO)) {
1028 dprint(FD_IO, "get_io_u: zero buflen on %p\n", io_u);
1029 goto err_put;
1030 }
1031
1032 f->last_start = io_u->offset;
1033 f->last_pos = io_u->offset + io_u->buflen;
1034
1035 if (td->o.verify != VERIFY_NONE && io_u->ddir == DDIR_WRITE)
1036 populate_verify_io_u(td, io_u);
1037 else if (td->o.refill_buffers && io_u->ddir == DDIR_WRITE)
1038 io_u_fill_buffer(td, io_u, io_u->xfer_buflen);
1039 else if (io_u->ddir == DDIR_READ) {
1040 /*
1041 * Reset the buf_filled parameters so next time if the
1042 * buffer is used for writes it is refilled.
1043 */
1044 io_u->buf_filled_len = 0;
1045 }
1046 }
1047
1048 /*
1049 * Set io data pointers.
1050 */
1051 io_u->xfer_buf = io_u->buf;
1052 io_u->xfer_buflen = io_u->buflen;
1053
1054out:
1055 if (!td_io_prep(td, io_u)) {
1056 if (!td->o.disable_slat)
1057 fio_gettime(&io_u->start_time, NULL);
1058 return io_u;
1059 }
1060err_put:
1061 dprint(FD_IO, "get_io_u failed\n");
1062 put_io_u(td, io_u);
1063 return NULL;
1064}
1065
1066void io_u_log_error(struct thread_data *td, struct io_u *io_u)
1067{
1068 const char *msg[] = { "read", "write", "sync", "datasync",
1069 "sync_file_range", "wait", "trim" };
1070
1071
1072
1073 log_err("fio: io_u error");
1074
1075 if (io_u->file)
1076 log_err(" on file %s", io_u->file->file_name);
1077
1078 log_err(": %s\n", strerror(io_u->error));
1079
1080 log_err(" %s offset=%llu, buflen=%lu\n", msg[io_u->ddir],
1081 io_u->offset, io_u->xfer_buflen);
1082
1083 if (!td->error)
1084 td_verror(td, io_u->error, "io_u error");
1085}
1086
1087static void io_completed(struct thread_data *td, struct io_u *io_u,
1088 struct io_completion_data *icd)
1089{
1090 /*
1091 * Older gcc's are too dumb to realize that usec is always used
1092 * initialized, silence that warning.
1093 */
1094 unsigned long uninitialized_var(usec);
1095 struct fio_file *f;
1096
1097 dprint_io_u(io_u, "io complete");
1098
1099 td_io_u_lock(td);
1100 assert(io_u->flags & IO_U_F_FLIGHT);
1101 io_u->flags &= ~(IO_U_F_FLIGHT | IO_U_F_BUSY_OK);
1102 td_io_u_unlock(td);
1103
1104 if (ddir_sync(io_u->ddir)) {
1105 td->last_was_sync = 1;
1106 f = io_u->file;
1107 if (f) {
1108 f->first_write = -1ULL;
1109 f->last_write = -1ULL;
1110 }
1111 return;
1112 }
1113
1114 td->last_was_sync = 0;
1115 td->last_ddir = io_u->ddir;
1116
1117 if (!io_u->error) {
1118 unsigned int bytes = io_u->buflen - io_u->resid;
1119 const enum fio_ddir idx = io_u->ddir;
1120 const enum fio_ddir odx = io_u->ddir ^ 1;
1121 int ret;
1122
1123 td->io_blocks[idx]++;
1124 td->io_bytes[idx] += bytes;
1125 td->this_io_bytes[idx] += bytes;
1126
1127 if (idx == DDIR_WRITE) {
1128 f = io_u->file;
1129 if (f) {
1130 if (f->first_write == -1ULL ||
1131 io_u->offset < f->first_write)
1132 f->first_write = io_u->offset;
1133 if (f->last_write == -1ULL ||
1134 ((io_u->offset + bytes) > f->last_write))
1135 f->last_write = io_u->offset + bytes;
1136 }
1137 }
1138
1139 if (ramp_time_over(td)) {
1140 unsigned long uninitialized_var(lusec);
1141
1142 if (!td->o.disable_clat || !td->o.disable_bw)
1143 lusec = utime_since(&io_u->issue_time,
1144 &icd->time);
1145 if (!td->o.disable_lat) {
1146 unsigned long tusec;
1147
1148 tusec = utime_since(&io_u->start_time,
1149 &icd->time);
1150 add_lat_sample(td, idx, tusec, bytes);
1151 }
1152 if (!td->o.disable_clat) {
1153 add_clat_sample(td, idx, lusec, bytes);
1154 io_u_mark_latency(td, lusec);
1155 }
1156 if (!td->o.disable_bw)
1157 add_bw_sample(td, idx, bytes, &icd->time);
1158 if (__should_check_rate(td, idx)) {
1159 td->rate_pending_usleep[idx] =
1160 ((td->this_io_bytes[idx] *
1161 td->rate_nsec_cycle[idx]) / 1000 -
1162 utime_since_now(&td->start));
1163 }
1164 if (__should_check_rate(td, idx ^ 1))
1165 td->rate_pending_usleep[odx] =
1166 ((td->this_io_bytes[odx] *
1167 td->rate_nsec_cycle[odx]) / 1000 -
1168 utime_since_now(&td->start));
1169 }
1170
1171 if (td_write(td) && idx == DDIR_WRITE &&
1172 td->o.do_verify &&
1173 td->o.verify != VERIFY_NONE)
1174 log_io_piece(td, io_u);
1175
1176 icd->bytes_done[idx] += bytes;
1177
1178 if (io_u->end_io) {
1179 ret = io_u->end_io(td, io_u);
1180 if (ret && !icd->error)
1181 icd->error = ret;
1182 }
1183 } else {
1184 icd->error = io_u->error;
1185 io_u_log_error(td, io_u);
1186 }
1187 if (td->o.continue_on_error && icd->error &&
1188 td_non_fatal_error(icd->error)) {
1189 /*
1190 * If there is a non_fatal error, then add to the error count
1191 * and clear all the errors.
1192 */
1193 update_error_count(td, icd->error);
1194 td_clear_error(td);
1195 icd->error = 0;
1196 io_u->error = 0;
1197 }
1198}
1199
1200static void init_icd(struct thread_data *td, struct io_completion_data *icd,
1201 int nr)
1202{
1203 if (!td->o.disable_clat || !td->o.disable_bw)
1204 fio_gettime(&icd->time, NULL);
1205
1206 icd->nr = nr;
1207
1208 icd->error = 0;
1209 icd->bytes_done[0] = icd->bytes_done[1] = 0;
1210}
1211
1212static void ios_completed(struct thread_data *td,
1213 struct io_completion_data *icd)
1214{
1215 struct io_u *io_u;
1216 int i;
1217
1218 for (i = 0; i < icd->nr; i++) {
1219 io_u = td->io_ops->event(td, i);
1220
1221 io_completed(td, io_u, icd);
1222
1223 if (!(io_u->flags & IO_U_F_FREE_DEF))
1224 put_io_u(td, io_u);
1225 }
1226}
1227
1228/*
1229 * Complete a single io_u for the sync engines.
1230 */
1231int io_u_sync_complete(struct thread_data *td, struct io_u *io_u,
1232 unsigned long *bytes)
1233{
1234 struct io_completion_data icd;
1235
1236 init_icd(td, &icd, 1);
1237 io_completed(td, io_u, &icd);
1238
1239 if (!(io_u->flags & IO_U_F_FREE_DEF))
1240 put_io_u(td, io_u);
1241
1242 if (icd.error) {
1243 td_verror(td, icd.error, "io_u_sync_complete");
1244 return -1;
1245 }
1246
1247 if (bytes) {
1248 bytes[0] += icd.bytes_done[0];
1249 bytes[1] += icd.bytes_done[1];
1250 }
1251
1252 return 0;
1253}
1254
1255/*
1256 * Called to complete min_events number of io for the async engines.
1257 */
1258int io_u_queued_complete(struct thread_data *td, int min_evts,
1259 unsigned long *bytes)
1260{
1261 struct io_completion_data icd;
1262 struct timespec *tvp = NULL;
1263 int ret;
1264 struct timespec ts = { .tv_sec = 0, .tv_nsec = 0, };
1265
1266 dprint(FD_IO, "io_u_queued_completed: min=%d\n", min_evts);
1267
1268 if (!min_evts)
1269 tvp = &ts;
1270
1271 ret = td_io_getevents(td, min_evts, td->o.iodepth_batch_complete, tvp);
1272 if (ret < 0) {
1273 td_verror(td, -ret, "td_io_getevents");
1274 return ret;
1275 } else if (!ret)
1276 return ret;
1277
1278 init_icd(td, &icd, ret);
1279 ios_completed(td, &icd);
1280 if (icd.error) {
1281 td_verror(td, icd.error, "io_u_queued_complete");
1282 return -1;
1283 }
1284
1285 if (bytes) {
1286 bytes[0] += icd.bytes_done[0];
1287 bytes[1] += icd.bytes_done[1];
1288 }
1289
1290 return 0;
1291}
1292
1293/*
1294 * Call when io_u is really queued, to update the submission latency.
1295 */
1296void io_u_queued(struct thread_data *td, struct io_u *io_u)
1297{
1298 if (!td->o.disable_slat) {
1299 unsigned long slat_time;
1300
1301 slat_time = utime_since(&io_u->start_time, &io_u->issue_time);
1302 add_slat_sample(td, io_u->ddir, slat_time, io_u->xfer_buflen);
1303 }
1304}
1305
1306/*
1307 * "randomly" fill the buffer contents
1308 */
1309void io_u_fill_buffer(struct thread_data *td, struct io_u *io_u,
1310 unsigned int max_bs)
1311{
1312 if (!td->o.zero_buffers)
1313 fill_random_buf(io_u->buf, max_bs);
1314 else
1315 memset(io_u->buf, 0, max_bs);
1316}