X-Git-Url: https://git.kernel.dk/?p=fio.git;a=blobdiff_plain;f=fio.h;h=a6ef937dea7f1f240e1fbf2f31a17535ef8a2de1;hp=ff0068ca17290450307602f6b44585cab71b3e36;hb=a37f69b72a74cbde6151458b890aab8d093f0c9f;hpb=5bfc35d7abe2582dc54127ca1d6e03792c9d62f5 diff --git a/fio.h b/fio.h index ff0068ca..a6ef937d 100644 --- a/fio.h +++ b/fio.h @@ -11,10 +11,11 @@ #include #include #include -#include #include #include +struct thread_data; + #include "compiler/compiler.h" #include "flist.h" #include "fifo.h" @@ -24,6 +25,17 @@ #include "mutex.h" #include "log.h" #include "debug.h" +#include "file.h" +#include "io_ddir.h" +#include "ioengine.h" +#include "iolog.h" +#include "helpers.h" +#include "options.h" +#include "profile.h" +#include "time.h" +#include "lib/getopt.h" +#include "lib/rand.h" +#include "server.h" #ifdef FIO_HAVE_GUASI #include @@ -33,223 +45,12 @@ #include #endif -enum fio_ddir { - DDIR_READ = 0, - DDIR_WRITE, - DDIR_SYNC, - DDIR_INVAL = -1, -}; - -enum td_ddir { - TD_DDIR_READ = 1 << 0, - TD_DDIR_WRITE = 1 << 1, - TD_DDIR_RAND = 1 << 2, - TD_DDIR_RW = TD_DDIR_READ | TD_DDIR_WRITE, - TD_DDIR_RANDREAD = TD_DDIR_READ | TD_DDIR_RAND, - TD_DDIR_RANDWRITE = TD_DDIR_WRITE | TD_DDIR_RAND, - TD_DDIR_RANDRW = TD_DDIR_RW | TD_DDIR_RAND, -}; - -enum file_lock_mode { - FILE_LOCK_NONE, - FILE_LOCK_EXCLUSIVE, - FILE_LOCK_READWRITE, -}; - -/* - * Use for maintaining statistics - */ -struct io_stat { - unsigned long max_val; - unsigned long min_val; - unsigned long samples; - - double mean; - double S; -}; - -/* - * A single data sample - */ -struct io_sample { - unsigned long time; - unsigned long val; - enum fio_ddir ddir; -}; - -/* - * Dynamically growing data sample log - */ -struct io_log { - unsigned long nr_samples; - unsigned long max_samples; - struct io_sample *log; -}; - -/* - * When logging io actions, this matches a single sent io_u - */ -struct io_piece { - union { - struct rb_node rb_node; - struct flist_head list; - }; - union { - int fileno; - struct fio_file *file; - }; - unsigned long long offset; - unsigned long len; - enum fio_ddir ddir; - union { - unsigned long delay; - unsigned int file_action; - }; -}; - -enum { - IO_U_F_FREE = 1 << 0, - IO_U_F_FLIGHT = 1 << 1, -}; - -struct thread_data; - -/* - * The io unit - */ -struct io_u { - union { -#ifdef FIO_HAVE_LIBAIO - struct iocb iocb; -#endif -#ifdef FIO_HAVE_POSIXAIO - struct aiocb aiocb; -#endif -#ifdef FIO_HAVE_SGIO - struct sg_io_hdr hdr; -#endif -#ifdef FIO_HAVE_GUASI - guasi_req_t greq; -#endif -#ifdef FIO_HAVE_SOLARISAIO - aio_result_t resultp; -#endif - }; - struct timeval start_time; - struct timeval issue_time; - - /* - * Allocated/set buffer and length - */ - void *buf; - unsigned long buflen; - unsigned long long offset; - unsigned long long endpos; - - /* - * IO engine state, may be different from above when we get - * partial transfers / residual data counts - */ - void *xfer_buf; - unsigned long xfer_buflen; - - unsigned int resid; - unsigned int error; - - enum fio_ddir ddir; - - /* - * io engine private data - */ - union { - unsigned int index; - unsigned int seen; - void *engine_data; - }; - - unsigned int flags; - - struct fio_file *file; - - struct flist_head list; - - /* - * Callback for io completion - */ - int (*end_io)(struct thread_data *, struct io_u *); -}; - -/* - * io_ops->queue() return values - */ -enum { - FIO_Q_COMPLETED = 0, /* completed sync */ - FIO_Q_QUEUED = 1, /* queued, will complete async */ - FIO_Q_BUSY = 2, /* no more room, call ->commit() */ -}; - -#define FIO_HDR_MAGIC 0xf00baaef - -enum { - VERIFY_NONE = 0, /* no verification */ - VERIFY_MD5, /* md5 sum data blocks */ - VERIFY_CRC64, /* crc64 sum data blocks */ - VERIFY_CRC32, /* crc32 sum data blocks */ - VERIFY_CRC32C, /* crc32c sum data blocks */ - VERIFY_CRC32C_INTEL, /* crc32c sum data blocks with hw */ - VERIFY_CRC16, /* crc16 sum data blocks */ - VERIFY_CRC7, /* crc7 sum data blocks */ - VERIFY_SHA256, /* sha256 sum data blocks */ - VERIFY_SHA512, /* sha512 sum data blocks */ - VERIFY_META, /* block_num, timestamp etc. */ - VERIFY_NULL, /* pretend to verify */ -}; - -/* - * A header structure associated with each checksummed data block. It is - * followed by a checksum specific header that contains the verification - * data. - */ -struct verify_header { - unsigned int fio_magic; - unsigned int len; - unsigned int verify_type; -}; - -struct vhdr_md5 { - uint32_t md5_digest[16]; -}; -struct vhdr_sha512 { - uint8_t sha512[128]; -}; -struct vhdr_sha256 { - uint8_t sha256[128]; -}; -struct vhdr_crc64 { - uint64_t crc64; -}; -struct vhdr_crc32 { - uint32_t crc32; -}; -struct vhdr_crc16 { - uint16_t crc16; -}; -struct vhdr_crc7 { - uint8_t crc7; -}; -struct vhdr_meta { - uint64_t offset; - unsigned char thread; - unsigned short numberio; - unsigned long time_sec; - unsigned long time_usec; -}; - struct group_run_stats { unsigned long long max_run[2], min_run[2]; unsigned long long max_bw[2], min_bw[2]; unsigned long long io_kb[2]; unsigned long long agg[2]; + unsigned int kb_base; }; /* @@ -264,93 +65,113 @@ enum fio_memtype { }; /* - * The type of object we are working on - */ -enum fio_filetype { - FIO_TYPE_FILE = 1, /* plain file */ - FIO_TYPE_BD, /* block device */ - FIO_TYPE_CHAR, /* character device */ - FIO_TYPE_PIPE, /* pipe */ -}; - -enum fio_ioengine_flags { - FIO_SYNCIO = 1 << 0, /* io engine has synchronous ->queue */ - FIO_RAWIO = 1 << 1, /* some sort of direct/raw io */ - FIO_DISKLESSIO = 1 << 2, /* no disk involved */ - FIO_NOEXTEND = 1 << 3, /* engine can't extend file */ - FIO_NODISKUTIL = 1 << 4, /* diskutil can't handle filename */ - FIO_UNIDIR = 1 << 5, /* engine is uni-directional */ - FIO_NOIO = 1 << 6, /* thread does only pseudo IO */ - FIO_SIGQUIT = 1 << 7, /* needs SIGQUIT to exit */ -}; - -enum fio_file_flags { - FIO_FILE_OPEN = 1 << 0, /* file is open */ - FIO_FILE_CLOSING = 1 << 1, /* file being closed */ - FIO_FILE_EXTEND = 1 << 2, /* needs extend */ - FIO_FILE_DONE = 1 << 3, /* io completed to this file */ - FIO_SIZE_KNOWN = 1 << 4, /* size has been set */ - FIO_FILE_HASHED = 1 << 5, /* file is on hash */ -}; - -/* - * Each thread_data structure has a number of files associated with it, - * this structure holds state information for a single file. + * offset generator types */ -struct fio_file { - struct flist_head hash_list; - enum fio_filetype filetype; - - /* - * A file may not be a file descriptor, let the io engine decide - */ - union { - unsigned long file_data; - int fd; - }; - - /* - * filename and possible memory mapping - */ - char *file_name; - void *mmap; - unsigned int major, minor; - - /* - * size of the file, offset into file, and io size from that offset - */ - unsigned long long real_file_size; - unsigned long long file_offset; - unsigned long long io_size; - - unsigned long long last_pos; - - /* - * if io is protected by a semaphore, this is set - */ - struct fio_mutex *lock; - void *lock_owner; - unsigned int lock_batch; - enum fio_ddir lock_ddir; - - /* - * block map for random io - */ - unsigned int *file_map; - unsigned int num_maps; - unsigned int last_free_lookup; - - int references; - enum fio_file_flags flags; +enum { + RW_SEQ_SEQ = 0, + RW_SEQ_IDENT, }; /* * How many depth levels to log */ -#define FIO_IO_U_MAP_NR 8 +#define FIO_IO_U_MAP_NR 7 #define FIO_IO_U_LAT_U_NR 10 #define FIO_IO_U_LAT_M_NR 12 +/* + * Aggregate clat samples to report percentile(s) of them. + * + * EXECUTIVE SUMMARY + * + * FIO_IO_U_PLAT_BITS determines the maximum statistical error on the + * value of resulting percentiles. The error will be approximately + * 1/2^(FIO_IO_U_PLAT_BITS+1) of the value. + * + * FIO_IO_U_PLAT_GROUP_NR and FIO_IO_U_PLAT_BITS determine the maximum + * range being tracked for latency samples. The maximum value tracked + * accurately will be 2^(GROUP_NR + PLAT_BITS -1) microseconds. + * + * FIO_IO_U_PLAT_GROUP_NR and FIO_IO_U_PLAT_BITS determine the memory + * requirement of storing those aggregate counts. The memory used will + * be (FIO_IO_U_PLAT_GROUP_NR * 2^FIO_IO_U_PLAT_BITS) * sizeof(int) + * bytes. + * + * FIO_IO_U_PLAT_NR is the total number of buckets. + * + * DETAILS + * + * Suppose the clat varies from 0 to 999 (usec), the straightforward + * method is to keep an array of (999 + 1) buckets, in which a counter + * keeps the count of samples which fall in the bucket, e.g., + * {[0],[1],...,[999]}. However this consumes a huge amount of space, + * and can be avoided if an approximation is acceptable. + * + * One such method is to let the range of the bucket to be greater + * than one. This method has low accuracy when the value is small. For + * example, let the buckets be {[0,99],[100,199],...,[900,999]}, and + * the represented value of each bucket be the mean of the range. Then + * a value 0 has an round-off error of 49.5. To improve on this, we + * use buckets with non-uniform ranges, while bounding the error of + * each bucket within a ratio of the sample value. A simple example + * would be when error_bound = 0.005, buckets are { + * {[0],[1],...,[99]}, {[100,101],[102,103],...,[198,199]},.., + * {[900,909],[910,919]...} }. The total range is partitioned into + * groups with different ranges, then buckets with uniform ranges. An + * upper bound of the error is (range_of_bucket/2)/value_of_bucket + * + * For better efficiency, we implement this using base two. We group + * samples by their Most Significant Bit (MSB), extract the next M bit + * of them as an index within the group, and discard the rest of the + * bits. + * + * E.g., assume a sample 'x' whose MSB is bit n (starting from bit 0), + * and use M bit for indexing + * + * | n | M bits | bit (n-M-1) ... bit 0 | + * + * Because x is at least 2^n, and bit 0 to bit (n-M-1) is at most + * (2^(n-M) - 1), discarding bit 0 to (n-M-1) makes the round-off + * error + * + * 2^(n-M)-1 2^(n-M) 1 + * e <= --------- <= ------- = --- + * 2^n 2^n 2^M + * + * Furthermore, we use "mean" of the range to represent the bucket, + * the error e can be lowered by half to 1 / 2^(M+1). By using M bits + * as the index, each group must contains 2^M buckets. + * + * E.g. Let M (FIO_IO_U_PLAT_BITS) be 6 + * Error bound is 1/2^(6+1) = 0.0078125 (< 1%) + * + * Group MSB #discarded range of #buckets + * error_bits value + * ---------------------------------------------------------------- + * 0* 0~5 0 [0,63] 64 + * 1* 6 0 [64,127] 64 + * 2 7 1 [128,255] 64 + * 3 8 2 [256,511] 64 + * 4 9 3 [512,1023] 64 + * ... ... ... [...,...] ... + * 18 23 17 [8838608,+inf]** 64 + * + * * Special cases: when n < (M-1) or when n == (M-1), in both cases, + * the value cannot be rounded off. Use all bits of the sample as + * index. + * + * ** If a sample's MSB is greater than 23, it will be counted as 23. + */ + +#define FIO_IO_U_PLAT_BITS 6 +#define FIO_IO_U_PLAT_VAL (1 << FIO_IO_U_PLAT_BITS) +#define FIO_IO_U_PLAT_GROUP_NR 19 +#define FIO_IO_U_PLAT_NR (FIO_IO_U_PLAT_GROUP_NR * FIO_IO_U_PLAT_VAL) +#define FIO_IO_U_LIST_MAX_LEN 20 /* The size of the default and user-specified + list of percentiles */ + +#define MAX_PATTERN_SIZE 512 + struct thread_stat { char *name; char *verror; @@ -362,6 +183,7 @@ struct thread_stat { struct io_log *slat_log; struct io_log *clat_log; + struct io_log *lat_log; struct io_log *bw_log; /* @@ -369,6 +191,7 @@ struct thread_stat { */ struct io_stat clat_stat[2]; /* completion latency */ struct io_stat slat_stat[2]; /* submission latency */ + struct io_stat lat_stat[2]; /* total latency */ struct io_stat bw_stat[2]; /* bandwidth stats */ unsigned long long stat_io_bytes[2]; @@ -387,19 +210,32 @@ struct thread_stat { /* * IO depth and latency stats */ + unsigned int clat_percentiles; + double* percentile_list; + unsigned int io_u_map[FIO_IO_U_MAP_NR]; unsigned int io_u_submit[FIO_IO_U_MAP_NR]; unsigned int io_u_complete[FIO_IO_U_MAP_NR]; unsigned int io_u_lat_u[FIO_IO_U_LAT_U_NR]; unsigned int io_u_lat_m[FIO_IO_U_LAT_M_NR]; - unsigned long total_io_u[2]; - unsigned long short_io_u[2]; + unsigned int io_u_plat[2][FIO_IO_U_PLAT_NR]; + unsigned long total_io_u[3]; + unsigned long short_io_u[3]; unsigned long total_submit; unsigned long total_complete; unsigned long long io_bytes[2]; - unsigned long runtime[2]; + unsigned long long runtime[2]; unsigned long total_run_time; + + /* + * IO Error related stats + */ + unsigned continue_on_error; + unsigned long total_err_count; + int first_error; + + unsigned int kb_base; }; struct bssplit { @@ -416,13 +252,17 @@ struct thread_options { char *opendir; char *ioengine; enum td_ddir td_ddir; - unsigned int ddir_nr; + unsigned int rw_seq; + unsigned int kb_base; + unsigned int ddir_seq_nr; + long ddir_seq_add; unsigned int iodepth; unsigned int iodepth_low; unsigned int iodepth_batch; unsigned int iodepth_batch_complete; unsigned long long size; + unsigned int size_percent; unsigned int fill_device; unsigned long long file_size_low; unsigned long long file_size_high; @@ -432,8 +272,8 @@ struct thread_options { unsigned int ba[2]; unsigned int min_bs[2]; unsigned int max_bs[2]; - struct bssplit *bssplit; - unsigned int bssplit_nr; + struct bssplit *bssplit[2]; + unsigned int bssplit_nr[2]; unsigned int nr_files; unsigned int open_files; @@ -446,20 +286,26 @@ struct thread_options { unsigned int create_fsync; unsigned int create_on_open; unsigned int end_fsync; + unsigned int pre_read; unsigned int sync_io; unsigned int verify; unsigned int do_verify; unsigned int verifysort; unsigned int verify_interval; unsigned int verify_offset; - unsigned int verify_pattern; + char verify_pattern[MAX_PATTERN_SIZE]; unsigned int verify_pattern_bytes; unsigned int verify_fatal; + unsigned int verify_dump; + unsigned int verify_async; + unsigned long long verify_backlog; + unsigned int verify_batch; unsigned int use_thread; unsigned int unlink; unsigned int do_disk_util; unsigned int override_sync; unsigned int rand_repeatable; + unsigned int use_os_rand; unsigned int write_lat_log; unsigned int write_bw_log; unsigned int norandommap; @@ -473,7 +319,9 @@ struct thread_options { unsigned int thinktime_spin; unsigned int thinktime_blocks; unsigned int fsync_blocks; - unsigned int start_delay; + unsigned int fdatasync_blocks; + unsigned int barrier_blocks; + unsigned long long start_delay; unsigned long long timeout; unsigned long long ramp_time; unsigned int overwrite; @@ -482,12 +330,15 @@ struct thread_options { unsigned long long zone_size; unsigned long long zone_skip; enum fio_memtype mem_type; + unsigned int mem_align; unsigned int stonewall; unsigned int new_group; unsigned int numjobs; os_cpu_mask_t cpumask; unsigned int cpumask_set; + os_cpu_mask_t verify_cpumask; + unsigned int verify_cpumask_set; unsigned int iolog; unsigned int rwmixcycle; unsigned int rwmix[2]; @@ -495,20 +346,33 @@ struct thread_options { unsigned int file_service_type; unsigned int group_reporting; unsigned int fadvise_hint; + enum fio_fallocate_mode fallocate_mode; unsigned int zero_buffers; unsigned int refill_buffers; + unsigned int scramble_buffers; unsigned int time_based; + unsigned int disable_lat; unsigned int disable_clat; unsigned int disable_slat; unsigned int disable_bw; unsigned int gtod_reduce; unsigned int gtod_cpu; unsigned int gtod_offload; + enum fio_cs clocksource; + unsigned int no_stall; + unsigned int trim_percentage; + unsigned int trim_batch; + unsigned int trim_zero; + unsigned long long trim_backlog; + unsigned int clat_percentiles; + unsigned int overwrite_plist; + double percentile_list[FIO_IO_U_LIST_MAX_LEN]; char *read_iolog_file; char *write_iolog_file; char *bw_log_file; char *lat_log_file; + char *replay_redirect; /* * Pre-run and post-run shell @@ -516,11 +380,11 @@ struct thread_options { char *exec_prerun; char *exec_postrun; - unsigned int rate; - unsigned int ratemin; + unsigned int rate[2]; + unsigned int ratemin[2]; unsigned int ratecycle; - unsigned int rate_iops; - unsigned int rate_iops_min; + unsigned int rate_iops[2]; + unsigned int rate_iops_min[2]; char *ioscheduler; @@ -529,6 +393,30 @@ struct thread_options { */ unsigned int cpuload; unsigned int cpucycle; + + /* + * I/O Error handling + */ + unsigned int continue_on_error; + + /* + * Benchmark profile type + */ + char *profile; + + /* + * blkio cgroup support + */ + char *cgroup; + unsigned int cgroup_weight; + unsigned int cgroup_nodelete; + + unsigned int uid; + unsigned int gid; + + unsigned int sync_file_range; + + unsigned int userspace_libaio_reap; }; #define FIO_VERROR_SIZE 128 @@ -552,6 +440,7 @@ struct thread_data { union { unsigned int next_file; os_random_state_t next_file_state; + struct frand_state __next_file_state; }; int error; int done; @@ -563,6 +452,7 @@ struct thread_data { unsigned int ioprio; unsigned int ioprio_set; unsigned int last_was_sync; + enum fio_ddir last_ddir; char *mmapfile; int mmapfd; @@ -572,10 +462,25 @@ struct thread_data { char *sysfs_root; - unsigned long rand_seeds[6]; + unsigned long rand_seeds[8]; + + union { + os_random_state_t bsrange_state; + struct frand_state __bsrange_state; + }; + union { + os_random_state_t verify_state; + struct frand_state __verify_state; + }; + union { + os_random_state_t trim_state; + struct frand_state __trim_state; + }; - os_random_state_t bsrange_state; - os_random_state_t verify_state; + struct frand_state buf_state; + + unsigned int verify_batch; + unsigned int trim_batch; int shm_id; @@ -593,17 +498,29 @@ struct thread_data { struct flist_head io_u_freelist; struct flist_head io_u_busylist; struct flist_head io_u_requeues; + pthread_mutex_t io_u_lock; + pthread_cond_t free_cond; + + /* + * async verify offload + */ + struct flist_head verify_list; + pthread_t *verify_threads; + unsigned int nr_verify_threads; + pthread_cond_t verify_cond; + int verify_thread_exit; /* * Rate state */ - unsigned long rate_usec_cycle; - long rate_pending_usleep; - unsigned long rate_bytes; - unsigned long rate_blocks; - struct timeval lastrate; + unsigned long rate_nsec_cycle[2]; + long rate_pending_usleep[2]; + unsigned long rate_bytes[2]; + unsigned long rate_blocks[2]; + struct timeval lastrate[2]; unsigned long long total_io_size; + unsigned long long fill_device_size; unsigned long io_issues[2]; unsigned long long io_blocks[2]; @@ -616,25 +533,29 @@ struct thread_data { /* * State for random io, a bitmap of blocks done vs not done */ - os_random_state_t random_state; + union { + os_random_state_t random_state; + struct frand_state __random_state; + }; struct timeval start; /* start of this loop */ struct timeval epoch; /* time job was started */ - struct timeval rw_end[2]; struct timeval last_issue; struct timeval tv_cache; unsigned int tv_cache_nr; unsigned int tv_cache_mask; - unsigned int rw_end_set[2]; unsigned int ramp_time_over; /* * read/write mixed workload state */ - os_random_state_t rwmix_state; + union { + os_random_state_t rwmix_state; + struct frand_state __rwmix_state; + }; unsigned long rwmix_issues; enum fio_ddir rwmix_ddir; - unsigned int ddir_nr; + unsigned int ddir_seq_nr; /* * IO history logs for verification. We use a tree for sorting, @@ -642,12 +563,19 @@ struct thread_data { */ struct rb_root io_hist_tree; struct flist_head io_hist_list; + unsigned long io_hist_len; /* * For IO replaying */ struct flist_head io_log_list; + /* + * For tracking/handling discards + */ + struct flist_head trim_list; + unsigned long trim_entries; + /* * for fileservice, how often to switch to a new file */ @@ -655,20 +583,27 @@ struct thread_data { unsigned int file_service_left; struct fio_file *file_service_file; + unsigned int sync_file_range_nr; + /* * For generating file sizes */ - os_random_state_t file_size_state; -}; + union { + os_random_state_t file_size_state; + struct frand_state __file_size_state; + }; -/* - * roundrobin available files, or choose one at random, or do each one - * serially. - */ -enum { - FIO_FSERVICE_RANDOM = 1, - FIO_FSERVICE_RR = 2, - FIO_FSERVICE_SEQ = 3, + /* + * Error counts + */ + unsigned int total_err_count; + int first_error; + + /* + * Can be overloaded by profiles + */ + struct prof_io_ops prof_io_ops; + void *prof_data; }; /* @@ -686,10 +621,13 @@ enum { break; \ int e = (err); \ (td)->error = e; \ - snprintf(td->verror, sizeof(td->verror) - 1, "file:%s:%d, func=%s, error=%s", __FILE__, __LINE__, (func), (msg)); \ + if (!(td)->first_error) \ + snprintf(td->verror, sizeof(td->verror) - 1, "file:%s:%d, func=%s, error=%s", __FILE__, __LINE__, (func), (msg)); \ } while (0) +#define td_clear_error(td) \ + (td)->error = 0; #define td_verror(td, err, func) \ __td_verror((td), (err), strerror((err)), (func)) #define td_vmsg(td, err, msg, func) \ @@ -710,26 +648,34 @@ extern unsigned long done_secs; extern char *job_section; extern int fio_gtod_offload; extern int fio_gtod_cpu; +extern enum fio_cs fio_clock_source; +extern int warnings_fatal; +extern int terse_version; +extern int is_backend; +extern int nr_clients; extern struct thread_data *threads; -#define td_read(td) ((td)->o.td_ddir & TD_DDIR_READ) -#define td_write(td) ((td)->o.td_ddir & TD_DDIR_WRITE) -#define td_rw(td) (((td)->o.td_ddir & TD_DDIR_RW) == TD_DDIR_RW) -#define td_random(td) ((td)->o.td_ddir & TD_DDIR_RAND) -#define file_randommap(td, f) (!(td)->o.norandommap && (f)->file_map) - static inline void fio_ro_check(struct thread_data *td, struct io_u *io_u) { assert(!(io_u->ddir == DDIR_WRITE && !td_write(td))); } -#define BLOCKS_PER_MAP (8 * sizeof(int)) +#define BLOCKS_PER_MAP (8 * sizeof(unsigned long)) #define TO_MAP_BLOCK(f, b) (b) #define RAND_MAP_IDX(f, b) (TO_MAP_BLOCK(f, b) / BLOCKS_PER_MAP) #define RAND_MAP_BIT(f, b) (TO_MAP_BLOCK(f, b) & (BLOCKS_PER_MAP - 1)) -#define MAX_JOBS (1024) +#define REAL_MAX_JOBS 2048 + +#define td_non_fatal_error(e) ((e) == EIO || (e) == EILSEQ) + +static inline void update_error_count(struct thread_data *td, int err) +{ + td->total_err_count++; + if (td->total_err_count == 1) + td->first_error = err; +} static inline int should_fsync(struct thread_data *td) { @@ -743,113 +689,15 @@ static inline int should_fsync(struct thread_data *td) return 0; } -/* - * Disk utils as read in /sys/block//stat - */ -struct disk_util_stat { - unsigned ios[2]; - unsigned merges[2]; - unsigned long long sectors[2]; - unsigned ticks[2]; - unsigned io_ticks; - unsigned time_in_queue; -}; - -/* - * Per-device disk util management - */ -struct disk_util { - struct flist_head list; - /* If this disk is a slave, hook it into the master's - * list using this head. - */ - struct flist_head slavelist; - - char *name; - char *sysfs_root; - char path[256]; - int major, minor; - - struct disk_util_stat dus; - struct disk_util_stat last_dus; - - /* For software raids, this entry maintains pointers to the - * entries for the slave devices. The disk_util entries for - * the slaves devices should primarily be maintained through - * the disk_list list, i.e. for memory allocation and - * de-allocation, etc. Whereas this list should be used only - * for aggregating a software RAID's disk util figures. - */ - struct flist_head slaves; - - unsigned long msec; - struct timeval time; -}; - -#define DISK_UTIL_MSEC (250) - -/* - * Log exports - */ -enum file_log_act { - FIO_LOG_ADD_FILE, - FIO_LOG_OPEN_FILE, - FIO_LOG_CLOSE_FILE, - FIO_LOG_UNLINK_FILE, -}; - -extern int __must_check read_iolog_get(struct thread_data *, struct io_u *); -extern void log_io_u(struct thread_data *, struct io_u *); -extern void log_file(struct thread_data *, struct fio_file *, enum file_log_act); -extern int __must_check init_iolog(struct thread_data *td); -extern void log_io_piece(struct thread_data *, struct io_u *); -extern void queue_io_piece(struct thread_data *, struct io_piece *); -extern void prune_io_piece_log(struct thread_data *); -extern void write_iolog_close(struct thread_data *); - -/* - * Logging - */ -extern void add_clat_sample(struct thread_data *, enum fio_ddir, unsigned long); -extern void add_slat_sample(struct thread_data *, enum fio_ddir, unsigned long); -extern void add_bw_sample(struct thread_data *, enum fio_ddir, struct timeval *); -extern void show_run_stats(void); -extern void init_disk_util(struct thread_data *); -extern void update_rusage_stat(struct thread_data *); -extern void update_io_ticks(void); -extern void setup_log(struct io_log **); -extern void finish_log(struct thread_data *, struct io_log *, const char *); -extern void finish_log_named(struct thread_data *, struct io_log *, const char *, const char *); -extern void __finish_log(struct io_log *, const char *); -extern struct io_log *agg_io_log[2]; -extern int write_bw_log; -extern void add_agg_sample(unsigned long, enum fio_ddir); - -/* - * Time functions - */ -extern unsigned long long utime_since(struct timeval *, struct timeval *); -extern unsigned long long utime_since_now(struct timeval *); -extern unsigned long mtime_since(struct timeval *, struct timeval *); -extern unsigned long mtime_since_now(struct timeval *); -extern unsigned long time_since_now(struct timeval *); -extern unsigned long mtime_since_genesis(void); -extern void usec_spin(unsigned int); -extern void usec_sleep(struct thread_data *, unsigned long); -extern void rate_throttle(struct thread_data *, unsigned long, unsigned int); -extern void fill_start_time(struct timeval *); -extern void fio_gettime(struct timeval *, void *); -extern void fio_gtod_init(void); -extern void fio_gtod_update(void); -extern void set_genesis_time(void); -extern int ramp_time_over(struct thread_data *); -extern int in_ramp_time(struct thread_data *); - /* * Init/option functions */ extern int __must_check parse_options(int, char **); +extern int parse_jobs_ini(char *, int, int); +extern int exec_run(void); +extern void reset_fio_state(void); extern int fio_options_parse(struct thread_data *, char **, int); +extern void fio_keywords_init(void); extern int fio_cmd_option_parse(struct thread_data *, const char *, char *); extern void fio_fill_default_options(struct thread_data *); extern int fio_show_option_help(const char *); @@ -857,30 +705,11 @@ extern void fio_options_dup_and_init(struct option *); extern void options_mem_dupe(struct thread_data *); extern void options_mem_free(struct thread_data *); extern void td_fill_rand_seeds(struct thread_data *); -#define FIO_GETOPT_JOB 0x89988998 -#define FIO_NR_OPTIONS 128 +extern void add_job_opts(const char **); +extern char *num2str(unsigned long, int, int, int); -/* - * File setup/shutdown - */ -extern void close_files(struct thread_data *); -extern void close_and_free_files(struct thread_data *); -extern int __must_check setup_files(struct thread_data *); -extern int __must_check file_invalidate_cache(struct thread_data *, struct fio_file *); -extern int __must_check generic_open_file(struct thread_data *, struct fio_file *); -extern int __must_check generic_close_file(struct thread_data *, struct fio_file *); -extern int __must_check generic_get_file_size(struct thread_data *, struct fio_file *); -extern int add_file(struct thread_data *, const char *); -extern void get_file(struct fio_file *); -extern int __must_check put_file(struct thread_data *, struct fio_file *); -extern void lock_file(struct thread_data *, struct fio_file *, enum fio_ddir); -extern void unlock_file(struct thread_data *, struct fio_file *); -extern void unlock_file_all(struct thread_data *, struct fio_file *); -extern int add_dir_files(struct thread_data *, const char *); -extern int init_random_map(struct thread_data *); -extern void dup_files(struct thread_data *, struct thread_data *); -extern int get_fileno(struct thread_data *, const char *); -extern void free_release_files(struct thread_data *); +#define FIO_GETOPT_JOB 0x89988998 +#define FIO_NR_OPTIONS (FIO_MAX_OPTS + 128) /* * ETA/status stuff @@ -888,19 +717,6 @@ extern void free_release_files(struct thread_data *); extern void print_thread_status(void); extern void print_status_init(int); -/* - * disk util stuff - */ -#ifdef FIO_HAVE_DISK_UTIL -extern void show_disk_util(void); -extern void init_disk_util(struct thread_data *); -extern void update_io_ticks(void); -#else -#define show_disk_util() -#define init_disk_util(td) -#define update_io_ticks() -#endif - /* * Thread life cycle. Once a thread has a runstate beyond TD_INITIALIZED, it * will never back again. It may cycle between running/verififying/fsyncing. @@ -913,6 +729,7 @@ enum { TD_INITIALIZED, TD_RAMP, TD_RUNNING, + TD_PRE_READING, TD_VERIFYING, TD_FSYNCING, TD_EXITED, @@ -921,13 +738,6 @@ enum { extern void td_set_runstate(struct thread_data *, int); -/* - * Verify helpers - */ -extern void populate_verify_io_u(struct thread_data *, struct io_u *); -extern int __must_check get_next_verify(struct thread_data *td, struct io_u *); -extern int __must_check verify_io_u(struct thread_data *, struct io_u *); - /* * Memory helpers */ @@ -936,41 +746,11 @@ extern void fio_unpin_memory(void); extern int __must_check allocate_io_mem(struct thread_data *); extern void free_io_mem(struct thread_data *); -/* - * io unit handling - */ -#define queue_full(td) flist_empty(&(td)->io_u_freelist) -extern struct io_u *__get_io_u(struct thread_data *); -extern struct io_u *get_io_u(struct thread_data *); -extern void put_io_u(struct thread_data *, struct io_u *); -extern void requeue_io_u(struct thread_data *, struct io_u **); -extern long __must_check io_u_sync_complete(struct thread_data *, struct io_u *); -extern long __must_check io_u_queued_complete(struct thread_data *, int); -extern void io_u_queued(struct thread_data *, struct io_u *); -extern void io_u_log_error(struct thread_data *, struct io_u *); -extern void io_u_mark_depth(struct thread_data *, unsigned int); -extern void io_u_fill_buffer(struct thread_data *td, struct io_u *, unsigned int); -void io_u_mark_complete(struct thread_data *, unsigned int); -void io_u_mark_submit(struct thread_data *, unsigned int); - /* * Reset stats after ramp time completes */ extern void reset_all_stats(struct thread_data *); -/* - * io engine entry points - */ -extern int __must_check td_io_init(struct thread_data *); -extern int __must_check td_io_prep(struct thread_data *, struct io_u *); -extern int __must_check td_io_queue(struct thread_data *, struct io_u *); -extern int __must_check td_io_sync(struct thread_data *, struct fio_file *); -extern int __must_check td_io_getevents(struct thread_data *, unsigned int, unsigned int, struct timespec *); -extern int __must_check td_io_commit(struct thread_data *); -extern int __must_check td_io_open_file(struct thread_data *, struct fio_file *); -extern int td_io_close_file(struct thread_data *, struct fio_file *); -extern int __must_check td_io_get_file_size(struct thread_data *, struct fio_file *); - /* * blktrace support */ @@ -979,34 +759,6 @@ extern int is_blktrace(const char *); extern int load_blktrace(struct thread_data *, const char *); #endif -struct ioengine_ops { - struct flist_head list; - char name[16]; - int version; - int flags; - int (*setup)(struct thread_data *); - int (*init)(struct thread_data *); - int (*prep)(struct thread_data *, struct io_u *); - int (*queue)(struct thread_data *, struct io_u *); - int (*commit)(struct thread_data *); - int (*getevents)(struct thread_data *, unsigned int, unsigned int, struct timespec *); - struct io_u *(*event)(struct thread_data *, int); - int (*cancel)(struct thread_data *, struct io_u *); - void (*cleanup)(struct thread_data *); - int (*open_file)(struct thread_data *, struct fio_file *); - int (*close_file)(struct thread_data *, struct fio_file *); - int (*get_file_size)(struct thread_data *, struct fio_file *); - void *data; - void *dlhandle; -}; - -#define FIO_IOOPS_VERSION 10 - -extern struct ioengine_ops *load_ioengine(struct thread_data *, const char *); -extern void register_ioengine(struct ioengine_ops *); -extern void unregister_ioengine(struct ioengine_ops *); -extern void close_ioengine(struct thread_data *); - /* * Mark unused variables passed to ops functions as unused, to silence gcc */ @@ -1026,52 +778,74 @@ extern void close_ioengine(struct thread_data *); if (!(cond)) { \ int *__foo = NULL; \ fprintf(stderr, "file:%s:%d, assert %s failed\n", __FILE__, __LINE__, #cond); \ - (td)->runstate = TD_EXITED; \ + td_set_runstate((td), TD_EXITED); \ (td)->error = EFAULT; \ *__foo = 0; \ } \ } while (0) -static inline void fio_file_reset(struct fio_file *f) +static inline int fio_fill_issue_time(struct thread_data *td) { - f->last_free_lookup = 0; - f->last_pos = f->file_offset; - if (f->file_map) - memset(f->file_map, 0, f->num_maps * sizeof(int)); + if (td->o.read_iolog_file || + !td->o.disable_clat || !td->o.disable_slat || !td->o.disable_bw) + return 1; + + return 0; } -static inline void clear_error(struct thread_data *td) +static inline int __should_check_rate(struct thread_data *td, + enum fio_ddir ddir) { - td->error = 0; - td->verror[0] = '\0'; + struct thread_options *o = &td->o; + + /* + * If some rate setting was given, we need to check it + */ + if (o->rate[ddir] || o->ratemin[ddir] || o->rate_iops[ddir] || + o->rate_iops_min[ddir]) + return 1; + + return 0; } -#ifdef FIO_INC_DEBUG -static inline void dprint_io_u(struct io_u *io_u, const char *p) +static inline int should_check_rate(struct thread_data *td, + unsigned long *bytes_done) { - struct fio_file *f = io_u->file; + int ret = 0; - dprint(FD_IO, "%s: io_u %p: off=%llu/len=%lu/ddir=%d", p, io_u, - (unsigned long long) io_u->offset, - io_u->buflen, io_u->ddir); - if (fio_debug & (1 << FD_IO)) { - if (f) - log_info("/%s", f->file_name); + if (bytes_done[0]) + ret |= __should_check_rate(td, 0); + if (bytes_done[1]) + ret |= __should_check_rate(td, 1); - log_info("\n"); - } + return ret; } -#else -#define dprint_io_u(io_u, p) -#endif -static inline int fio_fill_issue_time(struct thread_data *td) +static inline int is_power_of_2(unsigned int val) { - if (td->o.read_iolog_file || - !td->o.disable_clat || !td->o.disable_slat || !td->o.disable_bw) - return 1; + return (val != 0 && ((val & (val - 1)) == 0)); +} - return 0; +/* + * We currently only need to do locking if we have verifier threads + * accessing our internal structures too + */ +static inline void td_io_u_lock(struct thread_data *td) +{ + if (td->o.verify_async) + pthread_mutex_lock(&td->io_u_lock); +} + +static inline void td_io_u_unlock(struct thread_data *td) +{ + if (td->o.verify_async) + pthread_mutex_unlock(&td->io_u_lock); +} + +static inline void td_io_u_free_notify(struct thread_data *td) +{ + if (td->o.verify_async) + pthread_cond_signal(&td->free_cond); } #endif