From: Jens Axboe Date: Mon, 9 Dec 2013 19:38:53 +0000 (-0700) Subject: Add options to have fio latency profile a device X-Git-Tag: fio-2.1.5~45 X-Git-Url: https://git.kernel.dk/?p=fio.git;a=commitdiff_plain;h=3e260a46ea9a8de224c3d0a29a608da3440f284a Add options to have fio latency profile a device This adds three new options: - latency_target. This defines a specific latency target, in usec. - latency_window. This defines the period over which fio samples. - latency_percentile. This defines the percentage of IOs that must meet the criteria specified by latency_target/latency_window. With these options set, fio will run the described workload and vary the queue depth between 1 and iodepth= to find the best performing spot that meets the criteria specified by the three options. A sample job file is also added to demonstrate how to use this. Signed-off-by: Jens Axboe --- diff --git a/HOWTO b/HOWTO index 250bc582..044b5722 100644 --- a/HOWTO +++ b/HOWTO @@ -881,6 +881,20 @@ rate_iops_min=int If fio doesn't meet this rate of IO, it will cause the job to exit. The same format as rate is used for read vs write separation. +latency_target=int If set, fio will attempt to find the max performance + point that the given workload will run at while maintaining a + latency below this target. The values is given in microseconds. + See latency_window and latency_percentile + +latency_window=int Used with latency_target to specify the sample window + that the job is run at varying queue depths to test the + performance. The value is given in microseconds. + +latency_percentile=float The percentage of IOs that must fall within the + criteria specified by latency_target and latency_window. If not + set, this defaults to 100.0, meaning that all IOs must be equal + or below to the value set by latency_target. + max_latency=int If set, fio will exit the job if it exceeds this maximum latency. It will exit with an ETIME error. diff --git a/README b/README index 1373c789..0acb04fb 100644 --- a/README +++ b/README @@ -197,6 +197,8 @@ Currently, additional logging is available for: mutex Dump info only related to mutex up/down ops profile Dump info related to profile extensions time Dump info related to internal time keeping + net Dump info related to networking connections + rate Dump info related to IO rate switching ? or help Show available debug options. One can specify multiple debug options: e.g. --debug=file,mem will enable diff --git a/backend.c b/backend.c index 2ec478c1..101024d1 100644 --- a/backend.c +++ b/backend.c @@ -649,6 +649,8 @@ static uint64_t do_io(struct thread_data *td) else td_set_runstate(td, TD_RUNNING); + lat_target_init(td); + while ((td->o.read_iolog_file && !flist_empty(&td->io_log_list)) || (!flist_empty(&td->trim_list)) || !io_bytes_exceeded(td) || td->o.time_based) { @@ -680,8 +682,11 @@ static uint64_t do_io(struct thread_data *td) break; io_u = get_io_u(td); - if (!io_u) + if (!io_u) { + if (td->o.latency_target) + goto reap; break; + } ddir = io_u->ddir; @@ -776,6 +781,7 @@ sync_done: * can get BUSY even without IO queued, if the system is * resource starved. */ +reap: full = queue_full(td) || (ret == FIO_Q_BUSY && td->cur_depth); if (full || !td->o.iodepth_batch_complete) { min_evts = min(td->o.iodepth_batch_complete, @@ -812,6 +818,8 @@ sync_done: break; } } + if (!in_ramp_time(td) && td->o.latency_target) + lat_target_check(td); if (td->o.thinktime) { unsigned long long b; diff --git a/cconv.c b/cconv.c index 82383b2d..dd61d103 100644 --- a/cconv.c +++ b/cconv.c @@ -199,6 +199,9 @@ void convert_thread_options_to_cpu(struct thread_options *o, o->flow_watermark = __le32_to_cpu(top->flow_watermark); o->flow_sleep = le32_to_cpu(top->flow_sleep); o->sync_file_range = le32_to_cpu(top->sync_file_range); + o->latency_target = le64_to_cpu(top->latency_target); + o->latency_window = le64_to_cpu(top->latency_window); + o->latency_percentile.u.f = fio_uint64_to_double(le64_to_cpu(top->latency_percentile.u.i)); o->compress_percentage = le32_to_cpu(top->compress_percentage); o->compress_chunk = le32_to_cpu(top->compress_chunk); @@ -348,6 +351,9 @@ void convert_thread_options_to_net(struct thread_options_pack *top, top->flow_watermark = __cpu_to_le32(o->flow_watermark); top->flow_sleep = cpu_to_le32(o->flow_sleep); top->sync_file_range = cpu_to_le32(o->sync_file_range); + top->latency_target = __cpu_to_le64(o->latency_target); + top->latency_window = __cpu_to_le64(o->latency_window); + top->latency_percentile.u.i = __cpu_to_le64(fio_double_to_uint64(o->latency_percentile.u.f)); top->compress_percentage = cpu_to_le32(o->compress_percentage); top->compress_chunk = cpu_to_le32(o->compress_chunk); diff --git a/client.c b/client.c index 1557240b..7975ce60 100644 --- a/client.c +++ b/client.c @@ -796,6 +796,11 @@ static void convert_ts(struct thread_stat *dst, struct thread_stat *src) dst->first_error = le32_to_cpu(src->first_error); dst->kb_base = le32_to_cpu(src->kb_base); dst->unit_base = le32_to_cpu(src->unit_base); + + dst->latency_depth = le32_to_cpu(src->latency_depth); + dst->latency_target = le64_to_cpu(src->latency_target); + dst->latency_window = le64_to_cpu(src->latency_window); + dst->latency_percentile.u.f = fio_uint64_to_double(le64_to_cpu(src->latency_percentile.u.i)); } static void convert_gs(struct group_run_stats *dst, struct group_run_stats *src) diff --git a/debug.h b/debug.h index 6a74131a..e248695e 100644 --- a/debug.h +++ b/debug.h @@ -19,6 +19,7 @@ enum { FD_PROFILE, FD_TIME, FD_NET, + FD_RATE, FD_DEBUG_MAX, }; diff --git a/examples/latency-profile.fio b/examples/latency-profile.fio new file mode 100644 index 00000000..f5cd8444 --- /dev/null +++ b/examples/latency-profile.fio @@ -0,0 +1,21 @@ +# Test job that demonstrates how to use the latency target +# profiling. Fio will find the queue depth between 1..128 +# that fits within the latency constraints of this 4k random +# read workload. + +[global] +bs=4k +rw=randread +random_generator=lfsr +direct=1 +ioengine=libaio +iodepth=128 +# Set max acceptable latency to 500msec +latency_target=500000 +# profile over a 5s window +latency_window=5000000 +# 99.9% of IOs must be below the target +latency_percentile=99.9 + +[device] +filename=/dev/sda diff --git a/fio.1 b/fio.1 index 15a1ac5d..9ee0eef1 100644 --- a/fio.1 +++ b/fio.1 @@ -764,6 +764,23 @@ is used for read vs write separation. Average bandwidth for \fBrate\fR and \fBratemin\fR over this number of milliseconds. Default: 1000ms. .TP +.BI latency_target \fR=\fPint +If set, fio will attempt to find the max performance point that the given +workload will run at while maintaining a latency below this target. The +values is given in microseconds. See \fBlatency_window\fR and +\fBlatency_percentile\fR. +.TP +.BI latency_window \fR=\fPint +Used with \fBlatency_target\fR to specify the sample window that the job +is run at varying queue depths to test the performance. The value is given +in microseconds. +.TP +.BI latency_percentile \fR=\fPfloat +The percentage of IOs that must fall within the criteria specified by +\fBlatency_target\fR and \fBlatency_window\fR. If not set, this defaults +to 100.0, meaning that all IOs must be equal or below to the value set +by \fBlatency_target\fR. +.TP .BI max_latency \fR=\fPint If set, fio will exit the job if it exceeds this maximum latency. It will exit with an ETIME error. diff --git a/fio.h b/fio.h index d0fe83a5..a0927bd9 100644 --- a/fio.h +++ b/fio.h @@ -248,6 +248,16 @@ struct thread_data { unsigned int tv_cache_mask; unsigned int ramp_time_over; + /* + * Time since last latency_window was started + */ + struct timeval latency_ts; + unsigned int latency_qd; + unsigned int latency_qd_high; + unsigned int latency_qd_low; + unsigned int latency_failed; + uint64_t latency_ios; + /* * read/write mixed workload state */ @@ -483,6 +493,12 @@ extern int is_blktrace(const char *, int *); extern int load_blktrace(struct thread_data *, const char *, int); #endif +/* + * Latency target helpers + */ +extern void lat_target_check(struct thread_data *); +extern void lat_target_init(struct thread_data *); + #define for_each_td(td, i) \ for ((i) = 0, (td) = &threads[0]; (i) < (int) thread_number; (i)++, (td)++) #define for_each_file(td, f, i) \ diff --git a/init.c b/init.c index 4dd0c9ae..ba22f785 100644 --- a/init.c +++ b/init.c @@ -1382,7 +1382,7 @@ static void usage(const char *name) printf("%s [options] [job options] \n", name); printf(" --debug=options\tEnable debug logging. May be one/more of:\n" "\t\t\tprocess,file,io,mem,blktrace,verify,random,parse,\n" - "\t\t\tdiskutil,job,mutex,profile,time,net\n"); + "\t\t\tdiskutil,job,mutex,profile,time,net,rate\n"); printf(" --parse-only\t\tParse options only, don't start any IO\n"); printf(" --output\t\tWrite output to file\n"); printf(" --runtime\t\tRuntime in seconds\n"); @@ -1482,6 +1482,10 @@ struct debug_level debug_levels[] = { .help = "Network logging", .shift = FD_NET, }, + { .name = "rate", + .help = "Rate logging", + .shift = FD_RATE, + }, { .name = NULL, }, }; diff --git a/io_u.c b/io_u.c index 7df57aa8..ea6c2518 100644 --- a/io_u.c +++ b/io_u.c @@ -1126,6 +1126,136 @@ static int set_io_u_file(struct thread_data *td, struct io_u *io_u) return 0; } +static void lat_fatal(struct thread_data *td, struct io_completion_data *icd, + unsigned long tusec, unsigned long max_usec) +{ + if (!td->error) + log_err("fio: latency of %lu usec exceeds specified max (%lu usec)\n", tusec, max_usec); + td_verror(td, ETIMEDOUT, "max latency exceeded"); + icd->error = ETIMEDOUT; +} + +static void lat_new_cycle(struct thread_data *td) +{ + fio_gettime(&td->latency_ts, NULL); + td->latency_ios = ddir_rw_sum(td->io_blocks); + td->latency_failed = 0; +} + +/* + * We had an IO outside the latency target. Reduce the queue depth. If we + * are at QD=1, then it's time to give up. + */ +static int __lat_target_failed(struct thread_data *td) +{ + if (td->latency_qd == 1) + return 1; + + td->latency_qd_high = td->latency_qd; + td->latency_qd = (td->latency_qd + td->latency_qd_low) / 2; + + dprint(FD_RATE, "Ramped down: %d %d %d\n", td->latency_qd_low, td->latency_qd, td->latency_qd_high); + + /* + * When we ramp QD down, quiesce existing IO to prevent + * a storm of ramp downs due to pending higher depth. + */ + io_u_quiesce(td); + lat_new_cycle(td); + return 0; +} + +static int lat_target_failed(struct thread_data *td) +{ + if (td->o.latency_percentile.u.f == 100.0) + return __lat_target_failed(td); + + td->latency_failed++; + return 0; +} + +void lat_target_init(struct thread_data *td) +{ + if (td->o.latency_target) { + dprint(FD_RATE, "Latency target=%llu\n", td->o.latency_target); + fio_gettime(&td->latency_ts, NULL); + td->latency_qd = 1; + td->latency_qd_high = td->o.iodepth; + td->latency_qd_low = 1; + td->latency_ios = ddir_rw_sum(td->io_blocks); + } else + td->latency_qd = td->o.iodepth; +} + +static void lat_target_success(struct thread_data *td) +{ + const unsigned int qd = td->latency_qd; + + td->latency_qd_low = td->latency_qd; + + /* + * If we haven't failed yet, we double up to a failing value instead + * of bisecting from highest possible queue depth. If we have set + * a limit other than td->o.iodepth, bisect between that. + */ + if (td->latency_qd_high != td->o.iodepth) + td->latency_qd = (td->latency_qd + td->latency_qd_high) / 2; + else + td->latency_qd *= 2; + + if (td->latency_qd > td->o.iodepth) + td->latency_qd = td->o.iodepth; + + dprint(FD_RATE, "Ramped up: %d %d %d\n", td->latency_qd_low, td->latency_qd, td->latency_qd_high); + /* + * Same as last one, we are done + */ + if (td->latency_qd == qd) + td->done = 1; + + lat_new_cycle(td); +} + +/* + * Check if we can bump the queue depth + */ +void lat_target_check(struct thread_data *td) +{ + uint64_t usec_window; + uint64_t ios; + double success_ios; + + usec_window = utime_since_now(&td->latency_ts); + if (usec_window < td->o.latency_window) + return; + + ios = ddir_rw_sum(td->io_blocks) - td->latency_ios; + success_ios = (double) (ios - td->latency_failed) / (double) ios; + success_ios *= 100.0; + + dprint(FD_RATE, "Success rate: %.2f%% (target %.2f%%)\n", success_ios, td->o.latency_percentile.u.f); + + if (success_ios >= td->o.latency_percentile.u.f) + lat_target_success(td); + else + __lat_target_failed(td); +} + +/* + * If latency target is enabled, we might be ramping up or down and not + * using the full queue depth available. + */ +int queue_full(struct thread_data *td) +{ + const int qempty = io_u_qempty(&td->io_u_freelist); + + if (qempty) + return 1; + if (!td->o.latency_target) + return 0; + + return td->cur_depth >= td->latency_qd; +} struct io_u *__get_io_u(struct thread_data *td) { @@ -1136,7 +1266,7 @@ struct io_u *__get_io_u(struct thread_data *td) again: if (!io_u_rempty(&td->io_u_requeues)) io_u = io_u_rpop(&td->io_u_requeues); - else if (!io_u_qempty(&td->io_u_freelist)) { + else if (!queue_full(td)) { io_u = io_u_qpop(&td->io_u_freelist); io_u->buflen = 0; @@ -1395,11 +1525,11 @@ static void account_io_completion(struct thread_data *td, struct io_u *io_u, icd->error = ops->io_u_lat(td, tusec); } - if (td->o.max_latency && tusec > td->o.max_latency) { - if (!td->error) - log_err("fio: latency of %lu usec exceeds specified max (%u usec)\n", tusec, td->o.max_latency); - td_verror(td, ETIMEDOUT, "max latency exceeded"); - icd->error = ETIMEDOUT; + if (td->o.max_latency && tusec > td->o.max_latency) + lat_fatal(td, icd, tusec, td->o.max_latency); + if (td->o.latency_target && tusec > td->o.latency_target) { + if (lat_target_failed(td)) + lat_fatal(td, icd, tusec, td->o.latency_target); } } diff --git a/ioengine.h b/ioengine.h index 31662ebe..6dd2aa40 100644 --- a/ioengine.h +++ b/ioengine.h @@ -188,7 +188,6 @@ extern int fio_show_ioengine_help(const char *engine); /* * io unit handling */ -#define queue_full(td) io_u_qempty(&(td)->io_u_freelist) extern struct io_u *__get_io_u(struct thread_data *); extern struct io_u *get_io_u(struct thread_data *); extern void put_io_u(struct thread_data *, struct io_u *); @@ -204,6 +203,7 @@ extern void fill_io_buffer(struct thread_data *, void *, unsigned int, unsigned extern void io_u_fill_buffer(struct thread_data *td, struct io_u *, unsigned int, unsigned int); void io_u_mark_complete(struct thread_data *, unsigned int); void io_u_mark_submit(struct thread_data *, unsigned int); +int queue_full(struct thread_data *); int do_io_u_sync(struct thread_data *, struct io_u *); int do_io_u_trim(struct thread_data *, struct io_u *); diff --git a/libfio.c b/libfio.c index c26d6a3a..7f365147 100644 --- a/libfio.c +++ b/libfio.c @@ -133,6 +133,8 @@ void reset_all_stats(struct thread_data *td) fio_gettime(&tv, NULL); memcpy(&td->epoch, &tv, sizeof(tv)); memcpy(&td->start, &tv, sizeof(tv)); + + lat_target_init(td); } void reset_fio_state(void) diff --git a/options.c b/options.c index d2493e45..b0f45098 100644 --- a/options.c +++ b/options.c @@ -1042,6 +1042,10 @@ struct opt_group *opt_group_from_mask(unsigned int *mask) } static struct opt_group fio_opt_cat_groups[] = { + { + .name = "Latency profiling", + .mask = FIO_OPT_G_LATPROF, + }, { .name = "Rate", .mask = FIO_OPT_G_RATE, @@ -2623,7 +2627,38 @@ struct fio_option fio_options[FIO_MAX_OPTS] = { .off1 = td_var_offset(max_latency), .help = "Maximum tolerated IO latency (usec)", .category = FIO_OPT_C_IO, - .group = FIO_OPT_G_RATE, + .group = FIO_OPT_G_LATPROF, + }, + { + .name = "latency_target", + .lname = "Latency Target (usec)", + .type = FIO_OPT_STR_VAL_TIME, + .off1 = td_var_offset(latency_target), + .help = "Ramp to max queue depth supporting this latency", + .category = FIO_OPT_C_IO, + .group = FIO_OPT_G_LATPROF, + }, + { + .name = "latency_window", + .lname = "Latency Window (usec)", + .type = FIO_OPT_STR_VAL_TIME, + .off1 = td_var_offset(latency_window), + .help = "Time to sustain latency_target", + .category = FIO_OPT_C_IO, + .group = FIO_OPT_G_LATPROF, + }, + { + .name = "latency_percentile", + .lname = "Latency Percentile", + .type = FIO_OPT_FLOAT_LIST, + .off1 = td_var_offset(latency_percentile), + .help = "Percentile of IOs must be below latency_target", + .def = "100", + .maxlen = 1, + .minfp = 0.0, + .maxfp = 100.0, + .category = FIO_OPT_C_IO, + .group = FIO_OPT_G_LATPROF, }, { .name = "invalidate", diff --git a/options.h b/options.h index 3795afd5..fc363682 100644 --- a/options.h +++ b/options.h @@ -94,6 +94,7 @@ enum opt_category_group { __FIO_OPT_G_NETIO, __FIO_OPT_G_LIBAIO, __FIO_OPT_G_ACT, + __FIO_OPT_G_LATPROF, __FIO_OPT_G_NR, FIO_OPT_G_RATE = (1U << __FIO_OPT_G_RATE), @@ -122,6 +123,7 @@ enum opt_category_group { FIO_OPT_G_NETIO = (1U << __FIO_OPT_G_NETIO), FIO_OPT_G_LIBAIO = (1U << __FIO_OPT_G_LIBAIO), FIO_OPT_G_ACT = (1U << __FIO_OPT_G_ACT), + FIO_OPT_G_LATPROF = (1U << __FIO_OPT_G_LATPROF), FIO_OPT_G_INVALID = (1U << __FIO_OPT_G_NR), }; diff --git a/parse.c b/parse.c index e3f431ed..6141c91e 100644 --- a/parse.c +++ b/parse.c @@ -547,9 +547,11 @@ static int __handle_option(struct fio_option *o, const char *ptr, void *data, ** Initialize precision to 0 and zero out list ** in case specified list is shorter than default */ - ul2 = 0; - ilp = td_var(data, o->off2); - *ilp = ul2; + if (o->off2) { + ul2 = 0; + ilp = td_var(data, o->off2); + *ilp = ul2; + } flp = td_var(data, o->off1); for(i = 0; i < o->maxlen; i++) @@ -592,9 +594,11 @@ static int __handle_option(struct fio_option *o, const char *ptr, void *data, while (*++cp2 != '\0' && *cp2 >= '0' && *cp2 <= '9') len++; - ilp = td_var(data, o->off2); - if (len > *ilp) - *ilp = len; + if (o->off2) { + ilp = td_var(data, o->off2); + if (len > *ilp) + *ilp = len; + } } break; diff --git a/server.c b/server.c index 83245430..6e96b77a 100644 --- a/server.c +++ b/server.c @@ -1042,6 +1042,11 @@ void fio_server_send_ts(struct thread_stat *ts, struct group_run_stats *rs) p.ts.kb_base = cpu_to_le32(ts->kb_base); p.ts.unit_base = cpu_to_le32(ts->unit_base); + p.ts.latency_depth = cpu_to_le32(ts->latency_depth); + p.ts.latency_target = cpu_to_le64(ts->latency_target); + p.ts.latency_window = cpu_to_le64(ts->latency_window); + p.ts.latency_percentile.u.i = __cpu_to_le64(fio_double_to_uint64(ts->latency_percentile.u.f)); + convert_gs(&p.rs, rs); fio_net_send_cmd(server_fd, FIO_NET_CMD_TS, &p, sizeof(p), NULL, NULL); diff --git a/server.h b/server.h index 405370e3..84901cf6 100644 --- a/server.h +++ b/server.h @@ -38,7 +38,7 @@ struct fio_net_cmd_reply { }; enum { - FIO_SERVER_VER = 27, + FIO_SERVER_VER = 28, FIO_SERVER_MAX_FRAGMENT_PDU = 1024, diff --git a/stat.c b/stat.c index ac5ff162..5d50ae86 100644 --- a/stat.c +++ b/stat.c @@ -497,7 +497,6 @@ static void show_latencies(struct thread_stat *ts) show_lat_m(io_u_lat_m); } - void show_thread_status_normal(struct thread_stat *ts, struct group_run_stats *rs) { double usr_cpu, sys_cpu; @@ -587,6 +586,13 @@ void show_thread_status_normal(struct thread_stat *ts, struct group_run_stats *r ts->first_error, strerror(ts->first_error)); } + if (ts->latency_depth) { + log_info(" latency : target=%llu, window=%llu, percentile=%.2f%%, depth=%u\n", + (unsigned long long)ts->latency_target, + (unsigned long long)ts->latency_window, + ts->latency_percentile.u.f, + ts->latency_depth); + } } static void show_ddir_status_terse(struct thread_stat *ts, @@ -975,6 +981,13 @@ static struct json_object *show_thread_status_json(struct thread_stat *ts, json_object_add_value_int(root, "first_error", ts->first_error); } + if (ts->latency_depth) { + json_object_add_value_int(root, "latency_depth", ts->latency_depth); + json_object_add_value_int(root, "latency_target", ts->latency_target); + json_object_add_value_float(root, "latency_percentile", ts->latency_percentile.u.f); + json_object_add_value_int(root, "latency_window", ts->latency_window); + } + /* Additional output if description is set */ if (strlen(ts->description)) json_object_add_value_string(root, "desc", ts->description); @@ -1265,6 +1278,11 @@ static void __show_run_stats(void) } } + ts->latency_depth = td->latency_qd; + ts->latency_target = td->o.latency_target; + ts->latency_percentile = td->o.latency_percentile; + ts->latency_window = td->o.latency_window; + sum_thread_stats(ts, &td->ts, idx); } diff --git a/stat.h b/stat.h index 8190d1e6..7ad0c9df 100644 --- a/stat.h +++ b/stat.h @@ -175,6 +175,11 @@ struct thread_stat { uint32_t kb_base; uint32_t unit_base; + + uint32_t latency_depth; + uint64_t latency_target; + fio_fp64_t latency_percentile; + uint64_t latency_window; }; struct jobs_eta { diff --git a/thread_options.h b/thread_options.h index 44cbf91a..7476d8c2 100644 --- a/thread_options.h +++ b/thread_options.h @@ -240,6 +240,10 @@ struct thread_options { unsigned long long number_ios; unsigned int sync_file_range; + + unsigned long long latency_target; + unsigned long long latency_window; + fio_fp64_t latency_percentile; }; #define FIO_TOP_STR_MAX 256 @@ -446,6 +450,10 @@ struct thread_options_pack { uint64_t number_ios; uint32_t sync_file_range; + + uint64_t latency_target; + uint64_t latency_window; + fio_fp64_t latency_percentile; } __attribute__((packed)); extern void convert_thread_options_to_cpu(struct thread_options *o, struct thread_options_pack *top);