X-Git-Url: https://git.kernel.dk/?p=fio.git;a=blobdiff_plain;f=fio.h;h=db0a417fcfb4910b193546b7c10bd81e30a2904f;hp=05911c00d1934f6368b5d7e28942a9deaae278a4;hb=05895c12004fb843eded34aac2332ece2f410b27;hpb=44f29692cfba246981bb3c1b894333a6d2209f51 diff --git a/fio.h b/fio.h index 05911c00..db0a417f 100644 --- a/fio.h +++ b/fio.h @@ -11,10 +11,11 @@ #include #include #include -#include #include #include +struct thread_data; + #include "compiler/compiler.h" #include "flist.h" #include "fifo.h" @@ -31,6 +32,10 @@ #include "helpers.h" #include "options.h" #include "profile.h" +#include "time.h" +#include "lib/getopt.h" +#include "lib/rand.h" +#include "server.h" #ifdef FIO_HAVE_GUASI #include @@ -59,13 +64,112 @@ enum fio_memtype { MEM_MMAPHUGE, /* memory mapped huge file */ }; +/* + * offset generator types + */ +enum { + RW_SEQ_SEQ = 0, + RW_SEQ_IDENT, +}; + /* * How many depth levels to log */ -#define FIO_IO_U_MAP_NR 8 +#define FIO_IO_U_MAP_NR 7 #define FIO_IO_U_LAT_U_NR 10 #define FIO_IO_U_LAT_M_NR 12 +/* + * Aggregate clat samples to report percentile(s) of them. + * + * EXECUTIVE SUMMARY + * + * FIO_IO_U_PLAT_BITS determines the maximum statistical error on the + * value of resulting percentiles. The error will be approximately + * 1/2^(FIO_IO_U_PLAT_BITS+1) of the value. + * + * FIO_IO_U_PLAT_GROUP_NR and FIO_IO_U_PLAT_BITS determine the maximum + * range being tracked for latency samples. The maximum value tracked + * accurately will be 2^(GROUP_NR + PLAT_BITS -1) microseconds. + * + * FIO_IO_U_PLAT_GROUP_NR and FIO_IO_U_PLAT_BITS determine the memory + * requirement of storing those aggregate counts. The memory used will + * be (FIO_IO_U_PLAT_GROUP_NR * 2^FIO_IO_U_PLAT_BITS) * sizeof(int) + * bytes. + * + * FIO_IO_U_PLAT_NR is the total number of buckets. + * + * DETAILS + * + * Suppose the clat varies from 0 to 999 (usec), the straightforward + * method is to keep an array of (999 + 1) buckets, in which a counter + * keeps the count of samples which fall in the bucket, e.g., + * {[0],[1],...,[999]}. However this consumes a huge amount of space, + * and can be avoided if an approximation is acceptable. + * + * One such method is to let the range of the bucket to be greater + * than one. This method has low accuracy when the value is small. For + * example, let the buckets be {[0,99],[100,199],...,[900,999]}, and + * the represented value of each bucket be the mean of the range. Then + * a value 0 has an round-off error of 49.5. To improve on this, we + * use buckets with non-uniform ranges, while bounding the error of + * each bucket within a ratio of the sample value. A simple example + * would be when error_bound = 0.005, buckets are { + * {[0],[1],...,[99]}, {[100,101],[102,103],...,[198,199]},.., + * {[900,909],[910,919]...} }. The total range is partitioned into + * groups with different ranges, then buckets with uniform ranges. An + * upper bound of the error is (range_of_bucket/2)/value_of_bucket + * + * For better efficiency, we implement this using base two. We group + * samples by their Most Significant Bit (MSB), extract the next M bit + * of them as an index within the group, and discard the rest of the + * bits. + * + * E.g., assume a sample 'x' whose MSB is bit n (starting from bit 0), + * and use M bit for indexing + * + * | n | M bits | bit (n-M-1) ... bit 0 | + * + * Because x is at least 2^n, and bit 0 to bit (n-M-1) is at most + * (2^(n-M) - 1), discarding bit 0 to (n-M-1) makes the round-off + * error + * + * 2^(n-M)-1 2^(n-M) 1 + * e <= --------- <= ------- = --- + * 2^n 2^n 2^M + * + * Furthermore, we use "mean" of the range to represent the bucket, + * the error e can be lowered by half to 1 / 2^(M+1). By using M bits + * as the index, each group must contains 2^M buckets. + * + * E.g. Let M (FIO_IO_U_PLAT_BITS) be 6 + * Error bound is 1/2^(6+1) = 0.0078125 (< 1%) + * + * Group MSB #discarded range of #buckets + * error_bits value + * ---------------------------------------------------------------- + * 0* 0~5 0 [0,63] 64 + * 1* 6 0 [64,127] 64 + * 2 7 1 [128,255] 64 + * 3 8 2 [256,511] 64 + * 4 9 3 [512,1023] 64 + * ... ... ... [...,...] ... + * 18 23 17 [8838608,+inf]** 64 + * + * * Special cases: when n < (M-1) or when n == (M-1), in both cases, + * the value cannot be rounded off. Use all bits of the sample as + * index. + * + * ** If a sample's MSB is greater than 23, it will be counted as 23. + */ + +#define FIO_IO_U_PLAT_BITS 6 +#define FIO_IO_U_PLAT_VAL (1 << FIO_IO_U_PLAT_BITS) +#define FIO_IO_U_PLAT_GROUP_NR 19 +#define FIO_IO_U_PLAT_NR (FIO_IO_U_PLAT_GROUP_NR * FIO_IO_U_PLAT_VAL) +#define FIO_IO_U_LIST_MAX_LEN 20 /* The size of the default and user-specified + list of percentiles */ + #define MAX_PATTERN_SIZE 512 struct thread_stat { @@ -79,6 +183,7 @@ struct thread_stat { struct io_log *slat_log; struct io_log *clat_log; + struct io_log *lat_log; struct io_log *bw_log; /* @@ -86,6 +191,7 @@ struct thread_stat { */ struct io_stat clat_stat[2]; /* completion latency */ struct io_stat slat_stat[2]; /* submission latency */ + struct io_stat lat_stat[2]; /* total latency */ struct io_stat bw_stat[2]; /* bandwidth stats */ unsigned long long stat_io_bytes[2]; @@ -104,18 +210,22 @@ struct thread_stat { /* * IO depth and latency stats */ + unsigned int clat_percentiles; + double *percentile_list; + unsigned int io_u_map[FIO_IO_U_MAP_NR]; unsigned int io_u_submit[FIO_IO_U_MAP_NR]; unsigned int io_u_complete[FIO_IO_U_MAP_NR]; unsigned int io_u_lat_u[FIO_IO_U_LAT_U_NR]; unsigned int io_u_lat_m[FIO_IO_U_LAT_M_NR]; - unsigned long total_io_u[2]; - unsigned long short_io_u[2]; + unsigned int io_u_plat[2][FIO_IO_U_PLAT_NR]; + unsigned long total_io_u[3]; + unsigned long short_io_u[3]; unsigned long total_submit; unsigned long total_complete; unsigned long long io_bytes[2]; - unsigned long runtime[2]; + unsigned long long runtime[2]; unsigned long total_run_time; /* @@ -142,14 +252,17 @@ struct thread_options { char *opendir; char *ioengine; enum td_ddir td_ddir; + unsigned int rw_seq; unsigned int kb_base; - unsigned int ddir_nr; + unsigned int ddir_seq_nr; + long ddir_seq_add; unsigned int iodepth; unsigned int iodepth_low; unsigned int iodepth_batch; unsigned int iodepth_batch_complete; unsigned long long size; + unsigned int size_percent; unsigned int fill_device; unsigned long long file_size_low; unsigned long long file_size_high; @@ -183,12 +296,16 @@ struct thread_options { char verify_pattern[MAX_PATTERN_SIZE]; unsigned int verify_pattern_bytes; unsigned int verify_fatal; + unsigned int verify_dump; unsigned int verify_async; + unsigned long long verify_backlog; + unsigned int verify_batch; unsigned int use_thread; unsigned int unlink; unsigned int do_disk_util; unsigned int override_sync; unsigned int rand_repeatable; + unsigned int use_os_rand; unsigned int write_lat_log; unsigned int write_bw_log; unsigned int norandommap; @@ -203,7 +320,8 @@ struct thread_options { unsigned int thinktime_blocks; unsigned int fsync_blocks; unsigned int fdatasync_blocks; - unsigned int start_delay; + unsigned int barrier_blocks; + unsigned long long start_delay; unsigned long long timeout; unsigned long long ramp_time; unsigned int overwrite; @@ -228,21 +346,33 @@ struct thread_options { unsigned int file_service_type; unsigned int group_reporting; unsigned int fadvise_hint; - unsigned int fallocate; + enum fio_fallocate_mode fallocate_mode; unsigned int zero_buffers; unsigned int refill_buffers; + unsigned int scramble_buffers; unsigned int time_based; + unsigned int disable_lat; unsigned int disable_clat; unsigned int disable_slat; unsigned int disable_bw; unsigned int gtod_reduce; unsigned int gtod_cpu; unsigned int gtod_offload; + enum fio_cs clocksource; + unsigned int no_stall; + unsigned int trim_percentage; + unsigned int trim_batch; + unsigned int trim_zero; + unsigned long long trim_backlog; + unsigned int clat_percentiles; + unsigned int overwrite_plist; + double percentile_list[FIO_IO_U_LIST_MAX_LEN]; char *read_iolog_file; char *write_iolog_file; char *bw_log_file; char *lat_log_file; + char *replay_redirect; /* * Pre-run and post-run shell @@ -279,11 +409,14 @@ struct thread_options { */ char *cgroup; unsigned int cgroup_weight; + unsigned int cgroup_nodelete; unsigned int uid; unsigned int gid; unsigned int sync_file_range; + + unsigned int userspace_libaio_reap; }; #define FIO_VERROR_SIZE 128 @@ -307,6 +440,7 @@ struct thread_data { union { unsigned int next_file; os_random_state_t next_file_state; + struct frand_state __next_file_state; }; int error; int done; @@ -318,6 +452,7 @@ struct thread_data { unsigned int ioprio; unsigned int ioprio_set; unsigned int last_was_sync; + enum fio_ddir last_ddir; char *mmapfile; int mmapfd; @@ -327,10 +462,25 @@ struct thread_data { char *sysfs_root; - unsigned long rand_seeds[6]; + unsigned long rand_seeds[8]; + + union { + os_random_state_t bsrange_state; + struct frand_state __bsrange_state; + }; + union { + os_random_state_t verify_state; + struct frand_state __verify_state; + }; + union { + os_random_state_t trim_state; + struct frand_state __trim_state; + }; + + struct frand_state buf_state; - os_random_state_t bsrange_state; - os_random_state_t verify_state; + unsigned int verify_batch; + unsigned int trim_batch; int shm_id; @@ -370,6 +520,7 @@ struct thread_data { struct timeval lastrate[2]; unsigned long long total_io_size; + unsigned long long fill_device_size; unsigned long io_issues[2]; unsigned long long io_blocks[2]; @@ -382,7 +533,10 @@ struct thread_data { /* * State for random io, a bitmap of blocks done vs not done */ - os_random_state_t random_state; + union { + os_random_state_t random_state; + struct frand_state __random_state; + }; struct timeval start; /* start of this loop */ struct timeval epoch; /* time job was started */ @@ -395,10 +549,13 @@ struct thread_data { /* * read/write mixed workload state */ - os_random_state_t rwmix_state; + union { + os_random_state_t rwmix_state; + struct frand_state __rwmix_state; + }; unsigned long rwmix_issues; enum fio_ddir rwmix_ddir; - unsigned int ddir_nr; + unsigned int ddir_seq_nr; /* * IO history logs for verification. We use a tree for sorting, @@ -406,12 +563,19 @@ struct thread_data { */ struct rb_root io_hist_tree; struct flist_head io_hist_list; + unsigned long io_hist_len; /* * For IO replaying */ struct flist_head io_log_list; + /* + * For tracking/handling discards + */ + struct flist_head trim_list; + unsigned long trim_entries; + /* * for fileservice, how often to switch to a new file */ @@ -424,7 +588,10 @@ struct thread_data { /* * For generating file sizes */ - os_random_state_t file_size_state; + union { + os_random_state_t file_size_state; + struct frand_state __file_size_state; + }; /* * Error counts @@ -481,6 +648,11 @@ extern unsigned long done_secs; extern char *job_section; extern int fio_gtod_offload; extern int fio_gtod_cpu; +extern enum fio_cs fio_clock_source; +extern int warnings_fatal; +extern int terse_version; +extern int is_backend; +extern int nr_clients; extern struct thread_data *threads; @@ -489,12 +661,12 @@ static inline void fio_ro_check(struct thread_data *td, struct io_u *io_u) assert(!(io_u->ddir == DDIR_WRITE && !td_write(td))); } -#define BLOCKS_PER_MAP (8 * sizeof(int)) +#define BLOCKS_PER_MAP (8 * sizeof(unsigned long)) #define TO_MAP_BLOCK(f, b) (b) #define RAND_MAP_IDX(f, b) (TO_MAP_BLOCK(f, b) / BLOCKS_PER_MAP) #define RAND_MAP_BIT(f, b) (TO_MAP_BLOCK(f, b) & (BLOCKS_PER_MAP - 1)) -#define MAX_JOBS (1024) +#define REAL_MAX_JOBS 2048 #define td_non_fatal_error(e) ((e) == EIO || (e) == EILSEQ) @@ -517,29 +689,13 @@ static inline int should_fsync(struct thread_data *td) return 0; } -/* - * Time functions - */ -extern unsigned long long utime_since(struct timeval *, struct timeval *); -extern unsigned long long utime_since_now(struct timeval *); -extern unsigned long mtime_since(struct timeval *, struct timeval *); -extern unsigned long mtime_since_now(struct timeval *); -extern unsigned long time_since_now(struct timeval *); -extern unsigned long mtime_since_genesis(void); -extern void usec_spin(unsigned int); -extern void usec_sleep(struct thread_data *, unsigned long); -extern void fill_start_time(struct timeval *); -extern void fio_gettime(struct timeval *, void *); -extern void fio_gtod_init(void); -extern void fio_gtod_update(void); -extern void set_genesis_time(void); -extern int ramp_time_over(struct thread_data *); -extern int in_ramp_time(struct thread_data *); - /* * Init/option functions */ extern int __must_check parse_options(int, char **); +extern int parse_jobs_ini(char *, int, int); +extern int exec_run(void); +extern void reset_fio_state(void); extern int fio_options_parse(struct thread_data *, char **, int); extern void fio_keywords_init(void); extern int fio_cmd_option_parse(struct thread_data *, const char *, char *); @@ -550,6 +706,8 @@ extern void options_mem_dupe(struct thread_data *); extern void options_mem_free(struct thread_data *); extern void td_fill_rand_seeds(struct thread_data *); extern void add_job_opts(const char **); +extern char *num2str(unsigned long, int, int, int); + #define FIO_GETOPT_JOB 0x89988998 #define FIO_NR_OPTIONS (FIO_MAX_OPTS + 128) @@ -620,7 +778,7 @@ extern int load_blktrace(struct thread_data *, const char *); if (!(cond)) { \ int *__foo = NULL; \ fprintf(stderr, "file:%s:%d, assert %s failed\n", __FILE__, __LINE__, #cond); \ - (td)->runstate = TD_EXITED; \ + td_set_runstate((td), TD_EXITED); \ (td)->error = EFAULT; \ *__foo = 0; \ } \ @@ -635,49 +793,6 @@ static inline int fio_fill_issue_time(struct thread_data *td) return 0; } -/* - * Cheesy number->string conversion, complete with carry rounding error. - */ -static inline char *num2str(unsigned long num, int maxlen, int base, int pow2) -{ - char postfix[] = { ' ', 'K', 'M', 'G', 'P', 'E' }; - unsigned int thousand; - char *buf; - int i; - - if (pow2) - thousand = 1024; - else - thousand = 1000; - - buf = malloc(128); - - for (i = 0; base > 1; i++) - base /= thousand; - - do { - int len, carry = 0; - - len = sprintf(buf, "%'lu", num); - if (len <= maxlen) { - if (i >= 1) { - buf[len] = postfix[i]; - buf[len + 1] = '\0'; - } - return buf; - } - - if ((num % thousand) >= (thousand / 2)) - carry = 1; - - num /= thousand; - num += carry; - i++; - } while (i <= 5); - - return buf; -} - static inline int __should_check_rate(struct thread_data *td, enum fio_ddir ddir) {