X-Git-Url: https://git.kernel.dk/?p=fio.git;a=blobdiff_plain;f=fio.h;h=c74116257571b2cc6815b8261beede44f8cf4a29;hp=e8c025d6880e2d3613abf43578acf3efbbd8dab5;hb=0a0b49007cbce8d16c9214e7ed47a2b7cc0cc7ed;hpb=92a3d86ed1b2040c618b296e4d2ce503352792bc diff --git a/fio.h b/fio.h index e8c025d6..c7411625 100644 --- a/fio.h +++ b/fio.h @@ -14,6 +14,8 @@ #include #include +struct thread_data; + #include "compiler/compiler.h" #include "flist.h" #include "fifo.h" @@ -32,6 +34,7 @@ #include "profile.h" #include "time.h" #include "lib/getopt.h" +#include "lib/rand.h" #ifdef FIO_HAVE_GUASI #include @@ -71,10 +74,101 @@ enum { /* * How many depth levels to log */ -#define FIO_IO_U_MAP_NR 8 +#define FIO_IO_U_MAP_NR 7 #define FIO_IO_U_LAT_U_NR 10 #define FIO_IO_U_LAT_M_NR 12 +/* + * Aggregate clat samples to report percentile(s) of them. + * + * EXECUTIVE SUMMARY + * + * FIO_IO_U_PLAT_BITS determines the maximum statistical error on the + * value of resulting percentiles. The error will be approximately + * 1/2^(FIO_IO_U_PLAT_BITS+1) of the value. + * + * FIO_IO_U_PLAT_GROUP_NR and FIO_IO_U_PLAT_BITS determine the maximum + * range being tracked for latency samples. The maximum value tracked + * accurately will be 2^(GROUP_NR + PLAT_BITS -1) microseconds. + * + * FIO_IO_U_PLAT_GROUP_NR and FIO_IO_U_PLAT_BITS determine the memory + * requirement of storing those aggregate counts. The memory used will + * be (FIO_IO_U_PLAT_GROUP_NR * 2^FIO_IO_U_PLAT_BITS) * sizeof(int) + * bytes. + * + * FIO_IO_U_PLAT_NR is the total number of buckets. + * + * DETAILS + * + * Suppose the clat varies from 0 to 999 (usec), the straightforward + * method is to keep an array of (999 + 1) buckets, in which a counter + * keeps the count of samples which fall in the bucket, e.g., + * {[0],[1],...,[999]}. However this consumes a huge amount of space, + * and can be avoided if an approximation is acceptable. + * + * One such method is to let the range of the bucket to be greater + * than one. This method has low accuracy when the value is small. For + * example, let the buckets be {[0,99],[100,199],...,[900,999]}, and + * the represented value of each bucket be the mean of the range. Then + * a value 0 has an round-off error of 49.5. To improve on this, we + * use buckets with non-uniform ranges, while bounding the error of + * each bucket within a ratio of the sample value. A simple example + * would be when error_bound = 0.005, buckets are { + * {[0],[1],...,[99]}, {[100,101],[102,103],...,[198,199]},.., + * {[900,909],[910,919]...} }. The total range is partitioned into + * groups with different ranges, then buckets with uniform ranges. An + * upper bound of the error is (range_of_bucket/2)/value_of_bucket + * + * For better efficiency, we implement this using base two. We group + * samples by their Most Significant Bit (MSB), extract the next M bit + * of them as an index within the group, and discard the rest of the + * bits. + * + * E.g., assume a sample 'x' whose MSB is bit n (starting from bit 0), + * and use M bit for indexing + * + * | n | M bits | bit (n-M-1) ... bit 0 | + * + * Because x is at least 2^n, and bit 0 to bit (n-M-1) is at most + * (2^(n-M) - 1), discarding bit 0 to (n-M-1) makes the round-off + * error + * + * 2^(n-M)-1 2^(n-M) 1 + * e <= --------- <= ------- = --- + * 2^n 2^n 2^M + * + * Furthermore, we use "mean" of the range to represent the bucket, + * the error e can be lowered by half to 1 / 2^(M+1). By using M bits + * as the index, each group must contains 2^M buckets. + * + * E.g. Let M (FIO_IO_U_PLAT_BITS) be 6 + * Error bound is 1/2^(6+1) = 0.0078125 (< 1%) + * + * Group MSB #discarded range of #buckets + * error_bits value + * ---------------------------------------------------------------- + * 0* 0~5 0 [0,63] 64 + * 1* 6 0 [64,127] 64 + * 2 7 1 [128,255] 64 + * 3 8 2 [256,511] 64 + * 4 9 3 [512,1023] 64 + * ... ... ... [...,...] ... + * 18 23 17 [8838608,+inf]** 64 + * + * * Special cases: when n < (M-1) or when n == (M-1), in both cases, + * the value cannot be rounded off. Use all bits of the sample as + * index. + * + * ** If a sample's MSB is greater than 23, it will be counted as 23. + */ + +#define FIO_IO_U_PLAT_BITS 6 +#define FIO_IO_U_PLAT_VAL (1 << FIO_IO_U_PLAT_BITS) +#define FIO_IO_U_PLAT_GROUP_NR 19 +#define FIO_IO_U_PLAT_NR (FIO_IO_U_PLAT_GROUP_NR * FIO_IO_U_PLAT_VAL) +#define FIO_IO_U_LIST_MAX_LEN 20 /* The size of the default and user-specified + list of percentiles */ + #define MAX_PATTERN_SIZE 512 struct thread_stat { @@ -115,18 +209,22 @@ struct thread_stat { /* * IO depth and latency stats */ + unsigned int clat_percentiles; + double* percentile_list; + unsigned int io_u_map[FIO_IO_U_MAP_NR]; unsigned int io_u_submit[FIO_IO_U_MAP_NR]; unsigned int io_u_complete[FIO_IO_U_MAP_NR]; unsigned int io_u_lat_u[FIO_IO_U_LAT_U_NR]; unsigned int io_u_lat_m[FIO_IO_U_LAT_M_NR]; + unsigned int io_u_plat[3][FIO_IO_U_PLAT_NR]; unsigned long total_io_u[3]; unsigned long short_io_u[3]; unsigned long total_submit; unsigned long total_complete; unsigned long long io_bytes[2]; - unsigned long runtime[2]; + unsigned long long runtime[2]; unsigned long total_run_time; /* @@ -162,6 +260,7 @@ struct thread_options { unsigned int iodepth_batch_complete; unsigned long long size; + unsigned int size_percent; unsigned int fill_device; unsigned long long file_size_low; unsigned long long file_size_high; @@ -195,6 +294,7 @@ struct thread_options { char verify_pattern[MAX_PATTERN_SIZE]; unsigned int verify_pattern_bytes; unsigned int verify_fatal; + unsigned int verify_dump; unsigned int verify_async; unsigned long long verify_backlog; unsigned int verify_batch; @@ -203,6 +303,7 @@ struct thread_options { unsigned int do_disk_util; unsigned int override_sync; unsigned int rand_repeatable; + unsigned int use_os_rand; unsigned int write_lat_log; unsigned int write_bw_log; unsigned int norandommap; @@ -217,7 +318,8 @@ struct thread_options { unsigned int thinktime_blocks; unsigned int fsync_blocks; unsigned int fdatasync_blocks; - unsigned long start_delay; + unsigned int barrier_blocks; + unsigned long long start_delay; unsigned long long timeout; unsigned long long ramp_time; unsigned int overwrite; @@ -242,7 +344,7 @@ struct thread_options { unsigned int file_service_type; unsigned int group_reporting; unsigned int fadvise_hint; - unsigned int fallocate; + enum fio_fallocate_mode fallocate_mode; unsigned int zero_buffers; unsigned int refill_buffers; unsigned int time_based; @@ -259,6 +361,9 @@ struct thread_options { unsigned int trim_batch; unsigned int trim_zero; unsigned long long trim_backlog; + unsigned int clat_percentiles; + unsigned int overwrite_plist; + double percentile_list[FIO_IO_U_LIST_MAX_LEN]; char *read_iolog_file; char *write_iolog_file; @@ -330,6 +435,7 @@ struct thread_data { union { unsigned int next_file; os_random_state_t next_file_state; + struct frand_state __next_file_state; }; int error; int done; @@ -353,9 +459,18 @@ struct thread_data { unsigned long rand_seeds[7]; - os_random_state_t bsrange_state; - os_random_state_t verify_state; - os_random_state_t trim_state; + union { + os_random_state_t bsrange_state; + struct frand_state __bsrange_state; + }; + union { + os_random_state_t verify_state; + struct frand_state __verify_state; + }; + union { + os_random_state_t trim_state; + struct frand_state __trim_state; + }; unsigned int verify_batch; unsigned int trim_batch; @@ -411,7 +526,10 @@ struct thread_data { /* * State for random io, a bitmap of blocks done vs not done */ - os_random_state_t random_state; + union { + os_random_state_t random_state; + struct frand_state __random_state; + }; struct timeval start; /* start of this loop */ struct timeval epoch; /* time job was started */ @@ -424,7 +542,10 @@ struct thread_data { /* * read/write mixed workload state */ - os_random_state_t rwmix_state; + union { + os_random_state_t rwmix_state; + struct frand_state __rwmix_state; + }; unsigned long rwmix_issues; enum fio_ddir rwmix_ddir; unsigned int ddir_seq_nr; @@ -460,7 +581,10 @@ struct thread_data { /* * For generating file sizes */ - os_random_state_t file_size_state; + union { + os_random_state_t file_size_state; + struct frand_state __file_size_state; + }; /* * Error counts @@ -518,6 +642,7 @@ extern char *job_section; extern int fio_gtod_offload; extern int fio_gtod_cpu; extern enum fio_cs fio_clock_source; +extern int warnings_fatal; extern struct thread_data *threads; @@ -526,12 +651,12 @@ static inline void fio_ro_check(struct thread_data *td, struct io_u *io_u) assert(!(io_u->ddir == DDIR_WRITE && !td_write(td))); } -#define BLOCKS_PER_MAP (8 * sizeof(int)) +#define BLOCKS_PER_MAP (8 * sizeof(unsigned long)) #define TO_MAP_BLOCK(f, b) (b) #define RAND_MAP_IDX(f, b) (TO_MAP_BLOCK(f, b) / BLOCKS_PER_MAP) #define RAND_MAP_BIT(f, b) (TO_MAP_BLOCK(f, b) & (BLOCKS_PER_MAP - 1)) -#define MAX_JOBS (1024) +#define REAL_MAX_JOBS 2048 #define td_non_fatal_error(e) ((e) == EIO || (e) == EILSEQ) @@ -568,6 +693,8 @@ extern void options_mem_dupe(struct thread_data *); extern void options_mem_free(struct thread_data *); extern void td_fill_rand_seeds(struct thread_data *); extern void add_job_opts(const char **); +extern char *num2str(unsigned long, int, int, int); + #define FIO_GETOPT_JOB 0x89988998 #define FIO_NR_OPTIONS (FIO_MAX_OPTS + 128) @@ -638,7 +765,7 @@ extern int load_blktrace(struct thread_data *, const char *); if (!(cond)) { \ int *__foo = NULL; \ fprintf(stderr, "file:%s:%d, assert %s failed\n", __FILE__, __LINE__, #cond); \ - (td)->runstate = TD_EXITED; \ + td_set_runstate((td), TD_EXITED); \ (td)->error = EFAULT; \ *__foo = 0; \ } \ @@ -653,49 +780,6 @@ static inline int fio_fill_issue_time(struct thread_data *td) return 0; } -/* - * Cheesy number->string conversion, complete with carry rounding error. - */ -static inline char *num2str(unsigned long num, int maxlen, int base, int pow2) -{ - char postfix[] = { ' ', 'K', 'M', 'G', 'P', 'E' }; - unsigned int thousand; - char *buf; - int i; - - if (pow2) - thousand = 1024; - else - thousand = 1000; - - buf = malloc(128); - - for (i = 0; base > 1; i++) - base /= thousand; - - do { - int len, carry = 0; - - len = sprintf(buf, "%'lu", num); - if (len <= maxlen) { - if (i >= 1) { - buf[len] = postfix[i]; - buf[len + 1] = '\0'; - } - return buf; - } - - if ((num % thousand) >= (thousand / 2)) - carry = 1; - - num /= thousand; - num += carry; - i++; - } while (i <= 5); - - return buf; -} - static inline int __should_check_rate(struct thread_data *td, enum fio_ddir ddir) {