Commit | Line | Data |
---|---|---|
ebac4655 JA |
1 | #ifndef FIO_H |
2 | #define FIO_H | |
3 | ||
4 | #include <sched.h> | |
5 | #include <limits.h> | |
6 | #include <pthread.h> | |
7 | #include <sys/time.h> | |
8 | #include <sys/resource.h> | |
3c39a379 JA |
9 | #include <errno.h> |
10 | #include <stdlib.h> | |
11 | #include <stdio.h> | |
6d6f031f | 12 | #include <unistd.h> |
34cfcdaf | 13 | #include <string.h> |
cd14cc10 | 14 | #include <inttypes.h> |
7101d9c2 | 15 | #include <assert.h> |
ebac4655 | 16 | |
ecc314ba BC |
17 | struct thread_data; |
18 | ||
317b95d0 | 19 | #include "compiler/compiler.h" |
01743ee1 | 20 | #include "flist.h" |
e2887563 | 21 | #include "fifo.h" |
4b87898e | 22 | #include "rbtree.h" |
317b95d0 JA |
23 | #include "arch/arch.h" |
24 | #include "os/os.h" | |
07739b57 | 25 | #include "mutex.h" |
a3d741fa JA |
26 | #include "log.h" |
27 | #include "debug.h" | |
d6aed795 JA |
28 | #include "file.h" |
29 | #include "io_ddir.h" | |
dcefb588 | 30 | #include "ioengine.h" |
5995a6a4 | 31 | #include "iolog.h" |
c5c8bd5c | 32 | #include "helpers.h" |
07b3232d | 33 | #include "options.h" |
7eb36574 | 34 | #include "profile.h" |
c223da83 | 35 | #include "time.h" |
bf2e821a | 36 | #include "lib/getopt.h" |
2615cc4b | 37 | #include "lib/rand.h" |
37db14fe | 38 | #include "server.h" |
ebac4655 | 39 | |
609342ff DL |
40 | #ifdef FIO_HAVE_GUASI |
41 | #include <guasi.h> | |
42 | #endif | |
43 | ||
417f0068 JA |
44 | #ifdef FIO_HAVE_SOLARISAIO |
45 | #include <sys/asynch.h> | |
46 | #endif | |
47 | ||
ebac4655 | 48 | struct group_run_stats { |
9104f874 JA |
49 | unsigned long long max_run[2], min_run[2]; |
50 | unsigned long long max_bw[2], min_bw[2]; | |
e9b2a3fa | 51 | unsigned long long io_kb[2]; |
9104f874 | 52 | unsigned long long agg[2]; |
90fef2d1 | 53 | unsigned int kb_base; |
ebac4655 JA |
54 | }; |
55 | ||
e9c047a0 JA |
56 | /* |
57 | * What type of allocation to use for io buffers | |
58 | */ | |
59 | enum fio_memtype { | |
60 | MEM_MALLOC = 0, /* ordinary malloc */ | |
61 | MEM_SHM, /* use shared memory segments */ | |
74b025b0 | 62 | MEM_SHMHUGE, /* use shared memory segments with huge pages */ |
e9c047a0 | 63 | MEM_MMAP, /* use anonynomous mmap */ |
d0bdaf49 | 64 | MEM_MMAPHUGE, /* memory mapped huge file */ |
e9c047a0 JA |
65 | }; |
66 | ||
38dad62d JA |
67 | /* |
68 | * offset generator types | |
69 | */ | |
70 | enum { | |
71 | RW_SEQ_SEQ = 0, | |
72 | RW_SEQ_IDENT, | |
73 | }; | |
74 | ||
b2560f3c JA |
75 | /* |
76 | * How many depth levels to log | |
77 | */ | |
bb067558 | 78 | #define FIO_IO_U_MAP_NR 7 |
a9a18a3f JA |
79 | #define FIO_IO_U_LAT_U_NR 10 |
80 | #define FIO_IO_U_LAT_M_NR 12 | |
b2560f3c | 81 | |
83349190 YH |
82 | /* |
83 | * Aggregate clat samples to report percentile(s) of them. | |
84 | * | |
85 | * EXECUTIVE SUMMARY | |
86 | * | |
87 | * FIO_IO_U_PLAT_BITS determines the maximum statistical error on the | |
88 | * value of resulting percentiles. The error will be approximately | |
89 | * 1/2^(FIO_IO_U_PLAT_BITS+1) of the value. | |
90 | * | |
91 | * FIO_IO_U_PLAT_GROUP_NR and FIO_IO_U_PLAT_BITS determine the maximum | |
92 | * range being tracked for latency samples. The maximum value tracked | |
93 | * accurately will be 2^(GROUP_NR + PLAT_BITS -1) microseconds. | |
94 | * | |
95 | * FIO_IO_U_PLAT_GROUP_NR and FIO_IO_U_PLAT_BITS determine the memory | |
96 | * requirement of storing those aggregate counts. The memory used will | |
97 | * be (FIO_IO_U_PLAT_GROUP_NR * 2^FIO_IO_U_PLAT_BITS) * sizeof(int) | |
98 | * bytes. | |
99 | * | |
100 | * FIO_IO_U_PLAT_NR is the total number of buckets. | |
101 | * | |
102 | * DETAILS | |
103 | * | |
104 | * Suppose the clat varies from 0 to 999 (usec), the straightforward | |
105 | * method is to keep an array of (999 + 1) buckets, in which a counter | |
106 | * keeps the count of samples which fall in the bucket, e.g., | |
107 | * {[0],[1],...,[999]}. However this consumes a huge amount of space, | |
108 | * and can be avoided if an approximation is acceptable. | |
109 | * | |
110 | * One such method is to let the range of the bucket to be greater | |
111 | * than one. This method has low accuracy when the value is small. For | |
112 | * example, let the buckets be {[0,99],[100,199],...,[900,999]}, and | |
113 | * the represented value of each bucket be the mean of the range. Then | |
114 | * a value 0 has an round-off error of 49.5. To improve on this, we | |
115 | * use buckets with non-uniform ranges, while bounding the error of | |
116 | * each bucket within a ratio of the sample value. A simple example | |
117 | * would be when error_bound = 0.005, buckets are { | |
118 | * {[0],[1],...,[99]}, {[100,101],[102,103],...,[198,199]},.., | |
119 | * {[900,909],[910,919]...} }. The total range is partitioned into | |
120 | * groups with different ranges, then buckets with uniform ranges. An | |
121 | * upper bound of the error is (range_of_bucket/2)/value_of_bucket | |
122 | * | |
123 | * For better efficiency, we implement this using base two. We group | |
124 | * samples by their Most Significant Bit (MSB), extract the next M bit | |
125 | * of them as an index within the group, and discard the rest of the | |
126 | * bits. | |
127 | * | |
128 | * E.g., assume a sample 'x' whose MSB is bit n (starting from bit 0), | |
129 | * and use M bit for indexing | |
130 | * | |
131 | * | n | M bits | bit (n-M-1) ... bit 0 | | |
132 | * | |
133 | * Because x is at least 2^n, and bit 0 to bit (n-M-1) is at most | |
134 | * (2^(n-M) - 1), discarding bit 0 to (n-M-1) makes the round-off | |
135 | * error | |
136 | * | |
137 | * 2^(n-M)-1 2^(n-M) 1 | |
138 | * e <= --------- <= ------- = --- | |
139 | * 2^n 2^n 2^M | |
140 | * | |
141 | * Furthermore, we use "mean" of the range to represent the bucket, | |
142 | * the error e can be lowered by half to 1 / 2^(M+1). By using M bits | |
143 | * as the index, each group must contains 2^M buckets. | |
144 | * | |
145 | * E.g. Let M (FIO_IO_U_PLAT_BITS) be 6 | |
146 | * Error bound is 1/2^(6+1) = 0.0078125 (< 1%) | |
147 | * | |
148 | * Group MSB #discarded range of #buckets | |
149 | * error_bits value | |
150 | * ---------------------------------------------------------------- | |
151 | * 0* 0~5 0 [0,63] 64 | |
152 | * 1* 6 0 [64,127] 64 | |
153 | * 2 7 1 [128,255] 64 | |
154 | * 3 8 2 [256,511] 64 | |
155 | * 4 9 3 [512,1023] 64 | |
156 | * ... ... ... [...,...] ... | |
157 | * 18 23 17 [8838608,+inf]** 64 | |
158 | * | |
159 | * * Special cases: when n < (M-1) or when n == (M-1), in both cases, | |
160 | * the value cannot be rounded off. Use all bits of the sample as | |
161 | * index. | |
162 | * | |
163 | * ** If a sample's MSB is greater than 23, it will be counted as 23. | |
164 | */ | |
165 | ||
166 | #define FIO_IO_U_PLAT_BITS 6 | |
167 | #define FIO_IO_U_PLAT_VAL (1 << FIO_IO_U_PLAT_BITS) | |
168 | #define FIO_IO_U_PLAT_GROUP_NR 19 | |
169 | #define FIO_IO_U_PLAT_NR (FIO_IO_U_PLAT_GROUP_NR * FIO_IO_U_PLAT_VAL) | |
170 | #define FIO_IO_U_LIST_MAX_LEN 20 /* The size of the default and user-specified | |
171 | list of percentiles */ | |
172 | ||
0e92f873 RR |
173 | #define MAX_PATTERN_SIZE 512 |
174 | ||
079ad09b | 175 | struct thread_stat { |
756867bd JA |
176 | char *name; |
177 | char *verror; | |
178 | int error; | |
179 | int groupid; | |
180 | pid_t pid; | |
181 | char *description; | |
6586ee89 | 182 | int members; |
756867bd | 183 | |
079ad09b JA |
184 | struct io_log *slat_log; |
185 | struct io_log *clat_log; | |
02af0988 | 186 | struct io_log *lat_log; |
079ad09b JA |
187 | struct io_log *bw_log; |
188 | ||
189 | /* | |
190 | * bandwidth and latency stats | |
191 | */ | |
192 | struct io_stat clat_stat[2]; /* completion latency */ | |
193 | struct io_stat slat_stat[2]; /* submission latency */ | |
02af0988 | 194 | struct io_stat lat_stat[2]; /* total latency */ |
079ad09b JA |
195 | struct io_stat bw_stat[2]; /* bandwidth stats */ |
196 | ||
197 | unsigned long long stat_io_bytes[2]; | |
198 | struct timeval stat_sample_time[2]; | |
199 | ||
200 | /* | |
201 | * fio system usage accounting | |
202 | */ | |
203 | struct rusage ru_start; | |
204 | struct rusage ru_end; | |
205 | unsigned long usr_time; | |
206 | unsigned long sys_time; | |
207 | unsigned long ctx; | |
81887d5d | 208 | unsigned long minf, majf; |
079ad09b | 209 | |
b2560f3c JA |
210 | /* |
211 | * IO depth and latency stats | |
212 | */ | |
83349190 YH |
213 | unsigned int clat_percentiles; |
214 | double* percentile_list; | |
215 | ||
b2560f3c | 216 | unsigned int io_u_map[FIO_IO_U_MAP_NR]; |
838bc709 JA |
217 | unsigned int io_u_submit[FIO_IO_U_MAP_NR]; |
218 | unsigned int io_u_complete[FIO_IO_U_MAP_NR]; | |
04a0feae JA |
219 | unsigned int io_u_lat_u[FIO_IO_U_LAT_U_NR]; |
220 | unsigned int io_u_lat_m[FIO_IO_U_LAT_M_NR]; | |
0a7d7f9e | 221 | unsigned int io_u_plat[2][FIO_IO_U_PLAT_NR]; |
0d29de83 JA |
222 | unsigned long total_io_u[3]; |
223 | unsigned long short_io_u[3]; | |
838bc709 JA |
224 | unsigned long total_submit; |
225 | unsigned long total_complete; | |
756867bd JA |
226 | |
227 | unsigned long long io_bytes[2]; | |
cda99fa0 | 228 | unsigned long long runtime[2]; |
756867bd | 229 | unsigned long total_run_time; |
f2bba182 RR |
230 | |
231 | /* | |
232 | * IO Error related stats | |
233 | */ | |
234 | unsigned continue_on_error; | |
235 | unsigned long total_err_count; | |
236 | int first_error; | |
90fef2d1 JA |
237 | |
238 | unsigned int kb_base; | |
b2560f3c | 239 | }; |
71619dc2 | 240 | |
564ca972 JA |
241 | struct bssplit { |
242 | unsigned int bs; | |
243 | unsigned char perc; | |
244 | }; | |
245 | ||
2dc1bbeb | 246 | struct thread_options { |
b1ec1da6 | 247 | int pad; |
61697c37 | 248 | char *description; |
b4692828 | 249 | char *name; |
ef899b63 | 250 | char *directory; |
13f8e2d2 | 251 | char *filename; |
2dc1bbeb | 252 | char *opendir; |
09629a90 | 253 | char *ioengine; |
413dd459 | 254 | enum td_ddir td_ddir; |
38dad62d | 255 | unsigned int rw_seq; |
90fef2d1 | 256 | unsigned int kb_base; |
5736c10d | 257 | unsigned int ddir_seq_nr; |
a66da7a2 | 258 | long ddir_seq_add; |
2dc1bbeb JA |
259 | unsigned int iodepth; |
260 | unsigned int iodepth_low; | |
261 | unsigned int iodepth_batch; | |
4950421a | 262 | unsigned int iodepth_batch_complete; |
2dc1bbeb JA |
263 | |
264 | unsigned long long size; | |
7bb59102 | 265 | unsigned int size_percent; |
aa31f1f1 | 266 | unsigned int fill_device; |
2dc1bbeb JA |
267 | unsigned long long file_size_low; |
268 | unsigned long long file_size_high; | |
269 | unsigned long long start_offset; | |
270 | ||
271 | unsigned int bs[2]; | |
2b7a01d0 | 272 | unsigned int ba[2]; |
2dc1bbeb JA |
273 | unsigned int min_bs[2]; |
274 | unsigned int max_bs[2]; | |
720e84ad JA |
275 | struct bssplit *bssplit[2]; |
276 | unsigned int bssplit_nr[2]; | |
2dc1bbeb JA |
277 | |
278 | unsigned int nr_files; | |
279 | unsigned int open_files; | |
4d4e80f2 | 280 | enum file_lock_mode file_lock_mode; |
29c1349f | 281 | unsigned int lockfile_batch; |
e9c047a0 | 282 | |
9158d2f7 JA |
283 | unsigned int odirect; |
284 | unsigned int invalidate_cache; | |
285 | unsigned int create_serialize; | |
286 | unsigned int create_fsync; | |
814452bd | 287 | unsigned int create_on_open; |
9158d2f7 | 288 | unsigned int end_fsync; |
afad68f7 | 289 | unsigned int pre_read; |
9158d2f7 JA |
290 | unsigned int sync_io; |
291 | unsigned int verify; | |
e84c73a8 | 292 | unsigned int do_verify; |
160b966d | 293 | unsigned int verifysort; |
a59e170d JA |
294 | unsigned int verify_interval; |
295 | unsigned int verify_offset; | |
0e92f873 | 296 | char verify_pattern[MAX_PATTERN_SIZE]; |
90059d65 | 297 | unsigned int verify_pattern_bytes; |
a12a3b4d | 298 | unsigned int verify_fatal; |
b463e936 | 299 | unsigned int verify_dump; |
e8462bd8 | 300 | unsigned int verify_async; |
9e144189 JA |
301 | unsigned long long verify_backlog; |
302 | unsigned int verify_batch; | |
9158d2f7 JA |
303 | unsigned int use_thread; |
304 | unsigned int unlink; | |
305 | unsigned int do_disk_util; | |
306 | unsigned int override_sync; | |
307 | unsigned int rand_repeatable; | |
2615cc4b | 308 | unsigned int use_os_rand; |
9158d2f7 JA |
309 | unsigned int write_lat_log; |
310 | unsigned int write_bw_log; | |
bb8895e0 | 311 | unsigned int norandommap; |
2b386d25 | 312 | unsigned int softrandommap; |
690adba3 | 313 | unsigned int bs_unaligned; |
ebb1415f | 314 | unsigned int fsync_on_close; |
e9c047a0 | 315 | |
56bb17f2 | 316 | unsigned int hugepage_size; |
a00735e6 | 317 | unsigned int rw_min_bs; |
ebac4655 | 318 | unsigned int thinktime; |
48097d5c | 319 | unsigned int thinktime_spin; |
9c1f7434 | 320 | unsigned int thinktime_blocks; |
ebac4655 | 321 | unsigned int fsync_blocks; |
5f9099ea | 322 | unsigned int fdatasync_blocks; |
1ef2b6be | 323 | unsigned int barrier_blocks; |
dce8b847 | 324 | unsigned long long start_delay; |
0db26797 | 325 | unsigned long long timeout; |
721938ae | 326 | unsigned long long ramp_time; |
ebac4655 | 327 | unsigned int overwrite; |
ebac4655 | 328 | unsigned int bw_avg_time; |
ebac4655 | 329 | unsigned int loops; |
20dc95c4 JA |
330 | unsigned long long zone_size; |
331 | unsigned long long zone_skip; | |
e9c047a0 | 332 | enum fio_memtype mem_type; |
d529ee19 | 333 | unsigned int mem_align; |
2dc1bbeb | 334 | |
ebac4655 | 335 | unsigned int stonewall; |
b3d62a75 | 336 | unsigned int new_group; |
ebac4655 | 337 | unsigned int numjobs; |
ebac4655 | 338 | os_cpu_mask_t cpumask; |
375b2695 | 339 | unsigned int cpumask_set; |
e8462bd8 JA |
340 | os_cpu_mask_t verify_cpumask; |
341 | unsigned int verify_cpumask_set; | |
aea47d44 | 342 | unsigned int iolog; |
a6ccc7be | 343 | unsigned int rwmixcycle; |
e47f799f | 344 | unsigned int rwmix[2]; |
b6f4d880 | 345 | unsigned int nice; |
0aabe160 | 346 | unsigned int file_service_type; |
b2560f3c | 347 | unsigned int group_reporting; |
d2f3ac35 | 348 | unsigned int fadvise_hint; |
a596f047 | 349 | enum fio_fallocate_mode fallocate_mode; |
e9459e5a | 350 | unsigned int zero_buffers; |
5973cafb | 351 | unsigned int refill_buffers; |
fd68418e | 352 | unsigned int scramble_buffers; |
cf4464ca | 353 | unsigned int time_based; |
02af0988 | 354 | unsigned int disable_lat; |
9520ebb9 JA |
355 | unsigned int disable_clat; |
356 | unsigned int disable_slat; | |
357 | unsigned int disable_bw; | |
993bf48b | 358 | unsigned int gtod_reduce; |
be4ecfdf JA |
359 | unsigned int gtod_cpu; |
360 | unsigned int gtod_offload; | |
c223da83 | 361 | enum fio_cs clocksource; |
64bbb865 | 362 | unsigned int no_stall; |
0d29de83 JA |
363 | unsigned int trim_percentage; |
364 | unsigned int trim_batch; | |
365 | unsigned int trim_zero; | |
366 | unsigned long long trim_backlog; | |
83349190 YH |
367 | unsigned int clat_percentiles; |
368 | unsigned int overwrite_plist; | |
369 | double percentile_list[FIO_IO_U_LIST_MAX_LEN]; | |
aea47d44 | 370 | |
076efc7c JA |
371 | char *read_iolog_file; |
372 | char *write_iolog_file; | |
e3cedca7 JA |
373 | char *bw_log_file; |
374 | char *lat_log_file; | |
d1c46c04 | 375 | char *replay_redirect; |
2dc1bbeb JA |
376 | |
377 | /* | |
378 | * Pre-run and post-run shell | |
379 | */ | |
380 | char *exec_prerun; | |
381 | char *exec_postrun; | |
382 | ||
581e7141 JA |
383 | unsigned int rate[2]; |
384 | unsigned int ratemin[2]; | |
2dc1bbeb | 385 | unsigned int ratecycle; |
581e7141 JA |
386 | unsigned int rate_iops[2]; |
387 | unsigned int rate_iops_min[2]; | |
2dc1bbeb JA |
388 | |
389 | char *ioscheduler; | |
390 | ||
391 | /* | |
392 | * CPU "io" cycle burner | |
393 | */ | |
394 | unsigned int cpuload; | |
395 | unsigned int cpucycle; | |
f2bba182 RR |
396 | |
397 | /* | |
398 | * I/O Error handling | |
399 | */ | |
400 | unsigned int continue_on_error; | |
9ac8a797 JA |
401 | |
402 | /* | |
403 | * Benchmark profile type | |
404 | */ | |
79d16311 | 405 | char *profile; |
a696fa2a JA |
406 | |
407 | /* | |
408 | * blkio cgroup support | |
409 | */ | |
a696fa2a JA |
410 | char *cgroup; |
411 | unsigned int cgroup_weight; | |
7de87099 | 412 | unsigned int cgroup_nodelete; |
e0b0d892 JA |
413 | |
414 | unsigned int uid; | |
415 | unsigned int gid; | |
44f29692 JA |
416 | |
417 | unsigned int sync_file_range; | |
675012f0 DE |
418 | |
419 | unsigned int userspace_libaio_reap; | |
2dc1bbeb JA |
420 | }; |
421 | ||
e4e33258 JA |
422 | #define FIO_VERROR_SIZE 128 |
423 | ||
2dc1bbeb JA |
424 | /* |
425 | * This describes a single thread/process executing a fio job. | |
426 | */ | |
427 | struct thread_data { | |
428 | struct thread_options o; | |
e4e33258 | 429 | char verror[FIO_VERROR_SIZE]; |
2dc1bbeb JA |
430 | pthread_t thread; |
431 | int thread_number; | |
432 | int groupid; | |
433 | struct thread_stat ts; | |
126d65c6 | 434 | struct fio_file **files; |
dd87b2c9 | 435 | unsigned int files_size; |
2dc1bbeb JA |
436 | unsigned int files_index; |
437 | unsigned int nr_open_files; | |
1020a139 | 438 | unsigned int nr_done_files; |
2dc1bbeb JA |
439 | unsigned int nr_normal_files; |
440 | union { | |
441 | unsigned int next_file; | |
442 | os_random_state_t next_file_state; | |
4c07ad86 | 443 | struct frand_state __next_file_state; |
2dc1bbeb JA |
444 | }; |
445 | int error; | |
20e354ef | 446 | int done; |
2dc1bbeb JA |
447 | pid_t pid; |
448 | char *orig_buffer; | |
449 | size_t orig_buffer_size; | |
450 | volatile int terminate; | |
451 | volatile int runstate; | |
452 | unsigned int ioprio; | |
ac684785 | 453 | unsigned int ioprio_set; |
2dc1bbeb | 454 | unsigned int last_was_sync; |
9e144189 | 455 | enum fio_ddir last_ddir; |
2dc1bbeb JA |
456 | |
457 | char *mmapfile; | |
458 | int mmapfd; | |
459 | ||
843a7413 JA |
460 | void *iolog_buf; |
461 | FILE *iolog_f; | |
ebac4655 | 462 | |
da86774e | 463 | char *sysfs_root; |
da86774e | 464 | |
3545a109 | 465 | unsigned long rand_seeds[8]; |
5bfc35d7 | 466 | |
4c07ad86 JA |
467 | union { |
468 | os_random_state_t bsrange_state; | |
469 | struct frand_state __bsrange_state; | |
470 | }; | |
471 | union { | |
472 | os_random_state_t verify_state; | |
473 | struct frand_state __verify_state; | |
474 | }; | |
475 | union { | |
476 | os_random_state_t trim_state; | |
477 | struct frand_state __trim_state; | |
478 | }; | |
ebac4655 | 479 | |
3545a109 JA |
480 | struct frand_state buf_state; |
481 | ||
9e144189 | 482 | unsigned int verify_batch; |
0d29de83 | 483 | unsigned int trim_batch; |
9e144189 | 484 | |
ebac4655 JA |
485 | int shm_id; |
486 | ||
e9c047a0 JA |
487 | /* |
488 | * IO engine hooks, contains everything needed to submit an io_u | |
489 | * to any of the available IO engines. | |
490 | */ | |
2866c82d | 491 | struct ioengine_ops *io_ops; |
ebac4655 | 492 | |
e9c047a0 JA |
493 | /* |
494 | * Current IO depth and list of free and busy io_u's. | |
495 | */ | |
ebac4655 | 496 | unsigned int cur_depth; |
d8005759 | 497 | unsigned int io_u_queued; |
01743ee1 JA |
498 | struct flist_head io_u_freelist; |
499 | struct flist_head io_u_busylist; | |
500 | struct flist_head io_u_requeues; | |
e8462bd8 JA |
501 | pthread_mutex_t io_u_lock; |
502 | pthread_cond_t free_cond; | |
503 | ||
504 | /* | |
505 | * async verify offload | |
506 | */ | |
507 | struct flist_head verify_list; | |
508 | pthread_t *verify_threads; | |
509 | unsigned int nr_verify_threads; | |
510 | pthread_cond_t verify_cond; | |
511 | int verify_thread_exit; | |
ebac4655 | 512 | |
e9c047a0 JA |
513 | /* |
514 | * Rate state | |
515 | */ | |
ba3e4e0c | 516 | unsigned long rate_nsec_cycle[2]; |
581e7141 JA |
517 | long rate_pending_usleep[2]; |
518 | unsigned long rate_bytes[2]; | |
519 | unsigned long rate_blocks[2]; | |
520 | struct timeval lastrate[2]; | |
ebac4655 | 521 | |
ebac4655 | 522 | unsigned long long total_io_size; |
2e3bd4c2 | 523 | unsigned long long fill_device_size; |
ebac4655 | 524 | |
755200a3 | 525 | unsigned long io_issues[2]; |
9104f874 JA |
526 | unsigned long long io_blocks[2]; |
527 | unsigned long long io_bytes[2]; | |
48f5abd3 | 528 | unsigned long long io_skip_bytes; |
9104f874 | 529 | unsigned long long this_io_bytes[2]; |
079ad09b | 530 | unsigned long long zone_bytes; |
cdd18ad8 | 531 | struct fio_mutex *mutex; |
ebac4655 | 532 | |
e9c047a0 JA |
533 | /* |
534 | * State for random io, a bitmap of blocks done vs not done | |
535 | */ | |
4c07ad86 JA |
536 | union { |
537 | os_random_state_t random_state; | |
538 | struct frand_state __random_state; | |
539 | }; | |
ebac4655 | 540 | |
ebac4655 JA |
541 | struct timeval start; /* start of this loop */ |
542 | struct timeval epoch; /* time job was started */ | |
a61eddec | 543 | struct timeval last_issue; |
993bf48b JA |
544 | struct timeval tv_cache; |
545 | unsigned int tv_cache_nr; | |
546 | unsigned int tv_cache_mask; | |
721938ae | 547 | unsigned int ramp_time_over; |
ebac4655 | 548 | |
e9c047a0 JA |
549 | /* |
550 | * read/write mixed workload state | |
551 | */ | |
4c07ad86 JA |
552 | union { |
553 | os_random_state_t rwmix_state; | |
554 | struct frand_state __rwmix_state; | |
555 | }; | |
e4928662 | 556 | unsigned long rwmix_issues; |
e9c047a0 | 557 | enum fio_ddir rwmix_ddir; |
5736c10d | 558 | unsigned int ddir_seq_nr; |
a6ccc7be | 559 | |
e9c047a0 | 560 | /* |
8de8f047 JA |
561 | * IO history logs for verification. We use a tree for sorting, |
562 | * if we are overwriting. Otherwise just use a fifo. | |
e9c047a0 | 563 | */ |
4b87898e | 564 | struct rb_root io_hist_tree; |
01743ee1 | 565 | struct flist_head io_hist_list; |
9e144189 | 566 | unsigned long io_hist_len; |
8de8f047 JA |
567 | |
568 | /* | |
569 | * For IO replaying | |
570 | */ | |
01743ee1 | 571 | struct flist_head io_log_list; |
433afcb4 | 572 | |
0d29de83 JA |
573 | /* |
574 | * For tracking/handling discards | |
575 | */ | |
576 | struct flist_head trim_list; | |
577 | unsigned long trim_entries; | |
578 | ||
1907dbc6 JA |
579 | /* |
580 | * for fileservice, how often to switch to a new file | |
581 | */ | |
582 | unsigned int file_service_nr; | |
583 | unsigned int file_service_left; | |
584 | struct fio_file *file_service_file; | |
9c60ce64 | 585 | |
44f29692 JA |
586 | unsigned int sync_file_range_nr; |
587 | ||
9c60ce64 JA |
588 | /* |
589 | * For generating file sizes | |
590 | */ | |
4c07ad86 JA |
591 | union { |
592 | os_random_state_t file_size_state; | |
593 | struct frand_state __file_size_state; | |
594 | }; | |
f2bba182 RR |
595 | |
596 | /* | |
597 | * Error counts | |
598 | */ | |
599 | unsigned int total_err_count; | |
600 | int first_error; | |
15dc1934 JA |
601 | |
602 | /* | |
603 | * Can be overloaded by profiles | |
604 | */ | |
7eb36574 | 605 | struct prof_io_ops prof_io_ops; |
58c55ba0 | 606 | void *prof_data; |
ebac4655 JA |
607 | }; |
608 | ||
e592a06b AC |
609 | /* |
610 | * when should interactive ETA output be generated | |
611 | */ | |
612 | enum { | |
613 | FIO_ETA_AUTO, | |
614 | FIO_ETA_ALWAYS, | |
615 | FIO_ETA_NEVER, | |
616 | }; | |
617 | ||
e1161c32 | 618 | #define __td_verror(td, err, msg, func) \ |
ebac4655 | 619 | do { \ |
19abcd3d JA |
620 | if ((td)->error) \ |
621 | break; \ | |
ebac4655 JA |
622 | int e = (err); \ |
623 | (td)->error = e; \ | |
f2bba182 RR |
624 | if (!(td)->first_error) \ |
625 | snprintf(td->verror, sizeof(td->verror) - 1, "file:%s:%d, func=%s, error=%s", __FILE__, __LINE__, (func), (msg)); \ | |
ebac4655 JA |
626 | } while (0) |
627 | ||
b990b5c0 | 628 | |
f2bba182 RR |
629 | #define td_clear_error(td) \ |
630 | (td)->error = 0; | |
e1161c32 JA |
631 | #define td_verror(td, err, func) \ |
632 | __td_verror((td), (err), strerror((err)), (func)) | |
633 | #define td_vmsg(td, err, msg, func) \ | |
634 | __td_verror((td), (err), (msg), (func)) | |
b990b5c0 | 635 | |
ebac4655 JA |
636 | extern int exitall_on_terminate; |
637 | extern int thread_number; | |
9cedf167 | 638 | extern int nr_process, nr_thread; |
ebac4655 JA |
639 | extern int shm_id; |
640 | extern int groupid; | |
c6ae0a5b | 641 | extern int terse_output; |
53cdc686 | 642 | extern int temp_stall_ts; |
1e97cce9 | 643 | extern unsigned long long mlock_size; |
cfc99db7 | 644 | extern unsigned long page_mask, page_size; |
4241ea8f | 645 | extern int read_only; |
e592a06b | 646 | extern int eta_print; |
d3eeeabc | 647 | extern unsigned long done_secs; |
01f06b63 | 648 | extern char *job_section; |
be4ecfdf JA |
649 | extern int fio_gtod_offload; |
650 | extern int fio_gtod_cpu; | |
c223da83 | 651 | extern enum fio_cs fio_clock_source; |
a9523c6f | 652 | extern int warnings_fatal; |
f57a9c59 | 653 | extern int terse_version; |
5c341e9a | 654 | extern int is_backend; |
a37f69b7 | 655 | extern int nr_clients; |
ebac4655 JA |
656 | |
657 | extern struct thread_data *threads; | |
658 | ||
7101d9c2 JA |
659 | static inline void fio_ro_check(struct thread_data *td, struct io_u *io_u) |
660 | { | |
661 | assert(!(io_u->ddir == DDIR_WRITE && !td_write(td))); | |
662 | } | |
663 | ||
0ce8b119 | 664 | #define BLOCKS_PER_MAP (8 * sizeof(unsigned long)) |
aec2de20 JA |
665 | #define TO_MAP_BLOCK(f, b) (b) |
666 | #define RAND_MAP_IDX(f, b) (TO_MAP_BLOCK(f, b) / BLOCKS_PER_MAP) | |
667 | #define RAND_MAP_BIT(f, b) (TO_MAP_BLOCK(f, b) & (BLOCKS_PER_MAP - 1)) | |
ebac4655 | 668 | |
fca70358 | 669 | #define REAL_MAX_JOBS 2048 |
ebac4655 | 670 | |
1ec99eea | 671 | #define td_non_fatal_error(e) ((e) == EIO || (e) == EILSEQ) |
f2bba182 RR |
672 | |
673 | static inline void update_error_count(struct thread_data *td, int err) | |
674 | { | |
675 | td->total_err_count++; | |
676 | if (td->total_err_count == 1) | |
677 | td->first_error = err; | |
678 | } | |
679 | ||
87dc1ab1 JA |
680 | static inline int should_fsync(struct thread_data *td) |
681 | { | |
682 | if (td->last_was_sync) | |
683 | return 0; | |
2dc1bbeb | 684 | if (td->o.odirect) |
87dc1ab1 | 685 | return 0; |
2dc1bbeb | 686 | if (td_write(td) || td_rw(td) || td->o.override_sync) |
87dc1ab1 JA |
687 | return 1; |
688 | ||
689 | return 0; | |
690 | } | |
691 | ||
8914a9d8 | 692 | /* |
214e1eca | 693 | * Init/option functions |
8914a9d8 | 694 | */ |
72cb971b | 695 | extern int __must_check parse_options(int, char **); |
50d16976 JA |
696 | extern int parse_jobs_ini(char *, int, int); |
697 | extern int exec_run(void); | |
698 | extern void reset_fio_state(void); | |
3b8b7135 | 699 | extern int fio_options_parse(struct thread_data *, char **, int); |
74929ac2 | 700 | extern void fio_keywords_init(void); |
214e1eca JA |
701 | extern int fio_cmd_option_parse(struct thread_data *, const char *, char *); |
702 | extern void fio_fill_default_options(struct thread_data *); | |
703 | extern int fio_show_option_help(const char *); | |
704 | extern void fio_options_dup_and_init(struct option *); | |
d23bb327 JA |
705 | extern void options_mem_dupe(struct thread_data *); |
706 | extern void options_mem_free(struct thread_data *); | |
5bfc35d7 | 707 | extern void td_fill_rand_seeds(struct thread_data *); |
79d16311 | 708 | extern void add_job_opts(const char **); |
1ec3d69b JA |
709 | extern char *num2str(unsigned long, int, int, int); |
710 | ||
214e1eca | 711 | #define FIO_GETOPT_JOB 0x89988998 |
07b3232d | 712 | #define FIO_NR_OPTIONS (FIO_MAX_OPTS + 128) |
8914a9d8 | 713 | |
263e529f JA |
714 | /* |
715 | * ETA/status stuff | |
716 | */ | |
717 | extern void print_thread_status(void); | |
718 | extern void print_status_init(int); | |
719 | ||
720 | /* | |
721 | * Thread life cycle. Once a thread has a runstate beyond TD_INITIALIZED, it | |
722 | * will never back again. It may cycle between running/verififying/fsyncing. | |
723 | * Once the thread reaches TD_EXITED, it is just waiting for the core to | |
724 | * reap it. | |
725 | */ | |
726 | enum { | |
727 | TD_NOT_CREATED = 0, | |
728 | TD_CREATED, | |
729 | TD_INITIALIZED, | |
b29ee5b3 | 730 | TD_RAMP, |
263e529f | 731 | TD_RUNNING, |
b0f65863 | 732 | TD_PRE_READING, |
263e529f JA |
733 | TD_VERIFYING, |
734 | TD_FSYNCING, | |
735 | TD_EXITED, | |
736 | TD_REAPED, | |
737 | }; | |
738 | ||
b29ee5b3 JA |
739 | extern void td_set_runstate(struct thread_data *, int); |
740 | ||
2f9ade3c JA |
741 | /* |
742 | * Memory helpers | |
743 | */ | |
b2fdda43 | 744 | extern int __must_check fio_pin_memory(void); |
2f9ade3c | 745 | extern void fio_unpin_memory(void); |
b2fdda43 | 746 | extern int __must_check allocate_io_mem(struct thread_data *); |
2f9ade3c JA |
747 | extern void free_io_mem(struct thread_data *); |
748 | ||
b29ee5b3 JA |
749 | /* |
750 | * Reset stats after ramp time completes | |
751 | */ | |
752 | extern void reset_all_stats(struct thread_data *); | |
753 | ||
fb7b71a3 JA |
754 | /* |
755 | * blktrace support | |
756 | */ | |
5e62c22a | 757 | #ifdef FIO_HAVE_BLKTRACE |
fb7b71a3 JA |
758 | extern int is_blktrace(const char *); |
759 | extern int load_blktrace(struct thread_data *, const char *); | |
5e62c22a | 760 | #endif |
fb7b71a3 | 761 | |
2866c82d JA |
762 | /* |
763 | * Mark unused variables passed to ops functions as unused, to silence gcc | |
764 | */ | |
765 | #define fio_unused __attribute((__unused__)) | |
5f350952 JA |
766 | #define fio_init __attribute__((constructor)) |
767 | #define fio_exit __attribute__((destructor)) | |
2866c82d | 768 | |
34572e28 JA |
769 | #define for_each_td(td, i) \ |
770 | for ((i) = 0, (td) = &threads[0]; (i) < (int) thread_number; (i)++, (td)++) | |
53cdc686 | 771 | #define for_each_file(td, f, i) \ |
691c8fb0 JA |
772 | if ((td)->files_index) \ |
773 | for ((i) = 0, (f) = (td)->files[0]; \ | |
774 | (i) < (td)->o.nr_files && ((f) = (td)->files[i]) != NULL; \ | |
775 | (i)++) | |
53cdc686 | 776 | |
0032bf9f JA |
777 | #define fio_assert(td, cond) do { \ |
778 | if (!(cond)) { \ | |
340fd243 | 779 | int *__foo = NULL; \ |
0032bf9f | 780 | fprintf(stderr, "file:%s:%d, assert %s failed\n", __FILE__, __LINE__, #cond); \ |
ac18ea38 | 781 | td_set_runstate((td), TD_EXITED); \ |
437c9b71 | 782 | (td)->error = EFAULT; \ |
340fd243 | 783 | *__foo = 0; \ |
0032bf9f JA |
784 | } \ |
785 | } while (0) | |
786 | ||
12d9d841 JA |
787 | static inline int fio_fill_issue_time(struct thread_data *td) |
788 | { | |
789 | if (td->o.read_iolog_file || | |
790 | !td->o.disable_clat || !td->o.disable_slat || !td->o.disable_bw) | |
791 | return 1; | |
792 | ||
793 | return 0; | |
794 | } | |
795 | ||
581e7141 JA |
796 | static inline int __should_check_rate(struct thread_data *td, |
797 | enum fio_ddir ddir) | |
798 | { | |
799 | struct thread_options *o = &td->o; | |
800 | ||
801 | /* | |
802 | * If some rate setting was given, we need to check it | |
803 | */ | |
804 | if (o->rate[ddir] || o->ratemin[ddir] || o->rate_iops[ddir] || | |
805 | o->rate_iops_min[ddir]) | |
806 | return 1; | |
807 | ||
808 | return 0; | |
809 | } | |
810 | ||
811 | static inline int should_check_rate(struct thread_data *td, | |
812 | unsigned long *bytes_done) | |
813 | { | |
814 | int ret = 0; | |
815 | ||
816 | if (bytes_done[0]) | |
817 | ret |= __should_check_rate(td, 0); | |
818 | if (bytes_done[1]) | |
819 | ret |= __should_check_rate(td, 1); | |
820 | ||
821 | return ret; | |
822 | } | |
823 | ||
d529ee19 JA |
824 | static inline int is_power_of_2(unsigned int val) |
825 | { | |
826 | return (val != 0 && ((val & (val - 1)) == 0)); | |
827 | } | |
828 | ||
e8462bd8 JA |
829 | /* |
830 | * We currently only need to do locking if we have verifier threads | |
831 | * accessing our internal structures too | |
832 | */ | |
833 | static inline void td_io_u_lock(struct thread_data *td) | |
834 | { | |
835 | if (td->o.verify_async) | |
836 | pthread_mutex_lock(&td->io_u_lock); | |
837 | } | |
838 | ||
839 | static inline void td_io_u_unlock(struct thread_data *td) | |
840 | { | |
841 | if (td->o.verify_async) | |
842 | pthread_mutex_unlock(&td->io_u_lock); | |
843 | } | |
844 | ||
845 | static inline void td_io_u_free_notify(struct thread_data *td) | |
846 | { | |
847 | if (td->o.verify_async) | |
848 | pthread_cond_signal(&td->free_cond); | |
849 | } | |
850 | ||
ebac4655 | 851 | #endif |