the job to exit. The same format as rate is used for read vs
write separation.
+latency_target=int If set, fio will attempt to find the max performance
+ point that the given workload will run at while maintaining a
+ latency below this target. The values is given in microseconds.
+ See latency_window and latency_percentile
+
+latency_window=int Used with latency_target to specify the sample window
+ that the job is run at varying queue depths to test the
+ performance. The value is given in microseconds.
+
+latency_percentile=float The percentage of IOs that must fall within the
+ criteria specified by latency_target and latency_window. If not
+ set, this defaults to 100.0, meaning that all IOs must be equal
+ or below to the value set by latency_target.
+
max_latency=int If set, fio will exit the job if it exceeds this maximum
latency. It will exit with an ETIME error.
mutex Dump info only related to mutex up/down ops
profile Dump info related to profile extensions
time Dump info related to internal time keeping
+ net Dump info related to networking connections
+ rate Dump info related to IO rate switching
? or help Show available debug options.
One can specify multiple debug options: e.g. --debug=file,mem will enable
else
td_set_runstate(td, TD_RUNNING);
+ lat_target_init(td);
+
while ((td->o.read_iolog_file && !flist_empty(&td->io_log_list)) ||
(!flist_empty(&td->trim_list)) || !io_bytes_exceeded(td) ||
td->o.time_based) {
break;
io_u = get_io_u(td);
- if (!io_u)
+ if (!io_u) {
+ if (td->o.latency_target)
+ goto reap;
break;
+ }
ddir = io_u->ddir;
* can get BUSY even without IO queued, if the system is
* resource starved.
*/
+reap:
full = queue_full(td) || (ret == FIO_Q_BUSY && td->cur_depth);
if (full || !td->o.iodepth_batch_complete) {
min_evts = min(td->o.iodepth_batch_complete,
break;
}
}
+ if (!in_ramp_time(td) && td->o.latency_target)
+ lat_target_check(td);
if (td->o.thinktime) {
unsigned long long b;
o->flow_watermark = __le32_to_cpu(top->flow_watermark);
o->flow_sleep = le32_to_cpu(top->flow_sleep);
o->sync_file_range = le32_to_cpu(top->sync_file_range);
+ o->latency_target = le64_to_cpu(top->latency_target);
+ o->latency_window = le64_to_cpu(top->latency_window);
+ o->latency_percentile.u.f = fio_uint64_to_double(le64_to_cpu(top->latency_percentile.u.i));
o->compress_percentage = le32_to_cpu(top->compress_percentage);
o->compress_chunk = le32_to_cpu(top->compress_chunk);
top->flow_watermark = __cpu_to_le32(o->flow_watermark);
top->flow_sleep = cpu_to_le32(o->flow_sleep);
top->sync_file_range = cpu_to_le32(o->sync_file_range);
+ top->latency_target = __cpu_to_le64(o->latency_target);
+ top->latency_window = __cpu_to_le64(o->latency_window);
+ top->latency_percentile.u.i = __cpu_to_le64(fio_double_to_uint64(o->latency_percentile.u.f));
top->compress_percentage = cpu_to_le32(o->compress_percentage);
top->compress_chunk = cpu_to_le32(o->compress_chunk);
dst->first_error = le32_to_cpu(src->first_error);
dst->kb_base = le32_to_cpu(src->kb_base);
dst->unit_base = le32_to_cpu(src->unit_base);
+
+ dst->latency_depth = le32_to_cpu(src->latency_depth);
+ dst->latency_target = le64_to_cpu(src->latency_target);
+ dst->latency_window = le64_to_cpu(src->latency_window);
+ dst->latency_percentile.u.f = fio_uint64_to_double(le64_to_cpu(src->latency_percentile.u.i));
}
static void convert_gs(struct group_run_stats *dst, struct group_run_stats *src)
FD_PROFILE,
FD_TIME,
FD_NET,
+ FD_RATE,
FD_DEBUG_MAX,
};
--- /dev/null
+# Test job that demonstrates how to use the latency target
+# profiling. Fio will find the queue depth between 1..128
+# that fits within the latency constraints of this 4k random
+# read workload.
+
+[global]
+bs=4k
+rw=randread
+random_generator=lfsr
+direct=1
+ioengine=libaio
+iodepth=128
+# Set max acceptable latency to 500msec
+latency_target=500000
+# profile over a 5s window
+latency_window=5000000
+# 99.9% of IOs must be below the target
+latency_percentile=99.9
+
+[device]
+filename=/dev/sda
Average bandwidth for \fBrate\fR and \fBratemin\fR over this number of
milliseconds. Default: 1000ms.
.TP
+.BI latency_target \fR=\fPint
+If set, fio will attempt to find the max performance point that the given
+workload will run at while maintaining a latency below this target. The
+values is given in microseconds. See \fBlatency_window\fR and
+\fBlatency_percentile\fR.
+.TP
+.BI latency_window \fR=\fPint
+Used with \fBlatency_target\fR to specify the sample window that the job
+is run at varying queue depths to test the performance. The value is given
+in microseconds.
+.TP
+.BI latency_percentile \fR=\fPfloat
+The percentage of IOs that must fall within the criteria specified by
+\fBlatency_target\fR and \fBlatency_window\fR. If not set, this defaults
+to 100.0, meaning that all IOs must be equal or below to the value set
+by \fBlatency_target\fR.
+.TP
.BI max_latency \fR=\fPint
If set, fio will exit the job if it exceeds this maximum latency. It will exit
with an ETIME error.
unsigned int tv_cache_mask;
unsigned int ramp_time_over;
+ /*
+ * Time since last latency_window was started
+ */
+ struct timeval latency_ts;
+ unsigned int latency_qd;
+ unsigned int latency_qd_high;
+ unsigned int latency_qd_low;
+ unsigned int latency_failed;
+ uint64_t latency_ios;
+
/*
* read/write mixed workload state
*/
extern int load_blktrace(struct thread_data *, const char *, int);
#endif
+/*
+ * Latency target helpers
+ */
+extern void lat_target_check(struct thread_data *);
+extern void lat_target_init(struct thread_data *);
+
#define for_each_td(td, i) \
for ((i) = 0, (td) = &threads[0]; (i) < (int) thread_number; (i)++, (td)++)
#define for_each_file(td, f, i) \
printf("%s [options] [job options] <job file(s)>\n", name);
printf(" --debug=options\tEnable debug logging. May be one/more of:\n"
"\t\t\tprocess,file,io,mem,blktrace,verify,random,parse,\n"
- "\t\t\tdiskutil,job,mutex,profile,time,net\n");
+ "\t\t\tdiskutil,job,mutex,profile,time,net,rate\n");
printf(" --parse-only\t\tParse options only, don't start any IO\n");
printf(" --output\t\tWrite output to file\n");
printf(" --runtime\t\tRuntime in seconds\n");
.help = "Network logging",
.shift = FD_NET,
},
+ { .name = "rate",
+ .help = "Rate logging",
+ .shift = FD_RATE,
+ },
{ .name = NULL, },
};
return 0;
}
+static void lat_fatal(struct thread_data *td, struct io_completion_data *icd,
+ unsigned long tusec, unsigned long max_usec)
+{
+ if (!td->error)
+ log_err("fio: latency of %lu usec exceeds specified max (%lu usec)\n", tusec, max_usec);
+ td_verror(td, ETIMEDOUT, "max latency exceeded");
+ icd->error = ETIMEDOUT;
+}
+
+static void lat_new_cycle(struct thread_data *td)
+{
+ fio_gettime(&td->latency_ts, NULL);
+ td->latency_ios = ddir_rw_sum(td->io_blocks);
+ td->latency_failed = 0;
+}
+
+/*
+ * We had an IO outside the latency target. Reduce the queue depth. If we
+ * are at QD=1, then it's time to give up.
+ */
+static int __lat_target_failed(struct thread_data *td)
+{
+ if (td->latency_qd == 1)
+ return 1;
+
+ td->latency_qd_high = td->latency_qd;
+ td->latency_qd = (td->latency_qd + td->latency_qd_low) / 2;
+
+ dprint(FD_RATE, "Ramped down: %d %d %d\n", td->latency_qd_low, td->latency_qd, td->latency_qd_high);
+
+ /*
+ * When we ramp QD down, quiesce existing IO to prevent
+ * a storm of ramp downs due to pending higher depth.
+ */
+ io_u_quiesce(td);
+ lat_new_cycle(td);
+ return 0;
+}
+
+static int lat_target_failed(struct thread_data *td)
+{
+ if (td->o.latency_percentile.u.f == 100.0)
+ return __lat_target_failed(td);
+
+ td->latency_failed++;
+ return 0;
+}
+
+void lat_target_init(struct thread_data *td)
+{
+ if (td->o.latency_target) {
+ dprint(FD_RATE, "Latency target=%llu\n", td->o.latency_target);
+ fio_gettime(&td->latency_ts, NULL);
+ td->latency_qd = 1;
+ td->latency_qd_high = td->o.iodepth;
+ td->latency_qd_low = 1;
+ td->latency_ios = ddir_rw_sum(td->io_blocks);
+ } else
+ td->latency_qd = td->o.iodepth;
+}
+
+static void lat_target_success(struct thread_data *td)
+{
+ const unsigned int qd = td->latency_qd;
+
+ td->latency_qd_low = td->latency_qd;
+
+ /*
+ * If we haven't failed yet, we double up to a failing value instead
+ * of bisecting from highest possible queue depth. If we have set
+ * a limit other than td->o.iodepth, bisect between that.
+ */
+ if (td->latency_qd_high != td->o.iodepth)
+ td->latency_qd = (td->latency_qd + td->latency_qd_high) / 2;
+ else
+ td->latency_qd *= 2;
+
+ if (td->latency_qd > td->o.iodepth)
+ td->latency_qd = td->o.iodepth;
+
+ dprint(FD_RATE, "Ramped up: %d %d %d\n", td->latency_qd_low, td->latency_qd, td->latency_qd_high);
+ /*
+ * Same as last one, we are done
+ */
+ if (td->latency_qd == qd)
+ td->done = 1;
+
+ lat_new_cycle(td);
+}
+
+/*
+ * Check if we can bump the queue depth
+ */
+void lat_target_check(struct thread_data *td)
+{
+ uint64_t usec_window;
+ uint64_t ios;
+ double success_ios;
+
+ usec_window = utime_since_now(&td->latency_ts);
+ if (usec_window < td->o.latency_window)
+ return;
+
+ ios = ddir_rw_sum(td->io_blocks) - td->latency_ios;
+ success_ios = (double) (ios - td->latency_failed) / (double) ios;
+ success_ios *= 100.0;
+
+ dprint(FD_RATE, "Success rate: %.2f%% (target %.2f%%)\n", success_ios, td->o.latency_percentile.u.f);
+
+ if (success_ios >= td->o.latency_percentile.u.f)
+ lat_target_success(td);
+ else
+ __lat_target_failed(td);
+}
+
+/*
+ * If latency target is enabled, we might be ramping up or down and not
+ * using the full queue depth available.
+ */
+int queue_full(struct thread_data *td)
+{
+ const int qempty = io_u_qempty(&td->io_u_freelist);
+
+ if (qempty)
+ return 1;
+ if (!td->o.latency_target)
+ return 0;
+
+ return td->cur_depth >= td->latency_qd;
+}
struct io_u *__get_io_u(struct thread_data *td)
{
again:
if (!io_u_rempty(&td->io_u_requeues))
io_u = io_u_rpop(&td->io_u_requeues);
- else if (!io_u_qempty(&td->io_u_freelist)) {
+ else if (!queue_full(td)) {
io_u = io_u_qpop(&td->io_u_freelist);
io_u->buflen = 0;
icd->error = ops->io_u_lat(td, tusec);
}
- if (td->o.max_latency && tusec > td->o.max_latency) {
- if (!td->error)
- log_err("fio: latency of %lu usec exceeds specified max (%u usec)\n", tusec, td->o.max_latency);
- td_verror(td, ETIMEDOUT, "max latency exceeded");
- icd->error = ETIMEDOUT;
+ if (td->o.max_latency && tusec > td->o.max_latency)
+ lat_fatal(td, icd, tusec, td->o.max_latency);
+ if (td->o.latency_target && tusec > td->o.latency_target) {
+ if (lat_target_failed(td))
+ lat_fatal(td, icd, tusec, td->o.latency_target);
}
}
/*
* io unit handling
*/
-#define queue_full(td) io_u_qempty(&(td)->io_u_freelist)
extern struct io_u *__get_io_u(struct thread_data *);
extern struct io_u *get_io_u(struct thread_data *);
extern void put_io_u(struct thread_data *, struct io_u *);
extern void io_u_fill_buffer(struct thread_data *td, struct io_u *, unsigned int, unsigned int);
void io_u_mark_complete(struct thread_data *, unsigned int);
void io_u_mark_submit(struct thread_data *, unsigned int);
+int queue_full(struct thread_data *);
int do_io_u_sync(struct thread_data *, struct io_u *);
int do_io_u_trim(struct thread_data *, struct io_u *);
fio_gettime(&tv, NULL);
memcpy(&td->epoch, &tv, sizeof(tv));
memcpy(&td->start, &tv, sizeof(tv));
+
+ lat_target_init(td);
}
void reset_fio_state(void)
}
static struct opt_group fio_opt_cat_groups[] = {
+ {
+ .name = "Latency profiling",
+ .mask = FIO_OPT_G_LATPROF,
+ },
{
.name = "Rate",
.mask = FIO_OPT_G_RATE,
.off1 = td_var_offset(max_latency),
.help = "Maximum tolerated IO latency (usec)",
.category = FIO_OPT_C_IO,
- .group = FIO_OPT_G_RATE,
+ .group = FIO_OPT_G_LATPROF,
+ },
+ {
+ .name = "latency_target",
+ .lname = "Latency Target (usec)",
+ .type = FIO_OPT_STR_VAL_TIME,
+ .off1 = td_var_offset(latency_target),
+ .help = "Ramp to max queue depth supporting this latency",
+ .category = FIO_OPT_C_IO,
+ .group = FIO_OPT_G_LATPROF,
+ },
+ {
+ .name = "latency_window",
+ .lname = "Latency Window (usec)",
+ .type = FIO_OPT_STR_VAL_TIME,
+ .off1 = td_var_offset(latency_window),
+ .help = "Time to sustain latency_target",
+ .category = FIO_OPT_C_IO,
+ .group = FIO_OPT_G_LATPROF,
+ },
+ {
+ .name = "latency_percentile",
+ .lname = "Latency Percentile",
+ .type = FIO_OPT_FLOAT_LIST,
+ .off1 = td_var_offset(latency_percentile),
+ .help = "Percentile of IOs must be below latency_target",
+ .def = "100",
+ .maxlen = 1,
+ .minfp = 0.0,
+ .maxfp = 100.0,
+ .category = FIO_OPT_C_IO,
+ .group = FIO_OPT_G_LATPROF,
},
{
.name = "invalidate",
__FIO_OPT_G_NETIO,
__FIO_OPT_G_LIBAIO,
__FIO_OPT_G_ACT,
+ __FIO_OPT_G_LATPROF,
__FIO_OPT_G_NR,
FIO_OPT_G_RATE = (1U << __FIO_OPT_G_RATE),
FIO_OPT_G_NETIO = (1U << __FIO_OPT_G_NETIO),
FIO_OPT_G_LIBAIO = (1U << __FIO_OPT_G_LIBAIO),
FIO_OPT_G_ACT = (1U << __FIO_OPT_G_ACT),
+ FIO_OPT_G_LATPROF = (1U << __FIO_OPT_G_LATPROF),
FIO_OPT_G_INVALID = (1U << __FIO_OPT_G_NR),
};
** Initialize precision to 0 and zero out list
** in case specified list is shorter than default
*/
- ul2 = 0;
- ilp = td_var(data, o->off2);
- *ilp = ul2;
+ if (o->off2) {
+ ul2 = 0;
+ ilp = td_var(data, o->off2);
+ *ilp = ul2;
+ }
flp = td_var(data, o->off1);
for(i = 0; i < o->maxlen; i++)
while (*++cp2 != '\0' && *cp2 >= '0' && *cp2 <= '9')
len++;
- ilp = td_var(data, o->off2);
- if (len > *ilp)
- *ilp = len;
+ if (o->off2) {
+ ilp = td_var(data, o->off2);
+ if (len > *ilp)
+ *ilp = len;
+ }
}
break;
p.ts.kb_base = cpu_to_le32(ts->kb_base);
p.ts.unit_base = cpu_to_le32(ts->unit_base);
+ p.ts.latency_depth = cpu_to_le32(ts->latency_depth);
+ p.ts.latency_target = cpu_to_le64(ts->latency_target);
+ p.ts.latency_window = cpu_to_le64(ts->latency_window);
+ p.ts.latency_percentile.u.i = __cpu_to_le64(fio_double_to_uint64(ts->latency_percentile.u.f));
+
convert_gs(&p.rs, rs);
fio_net_send_cmd(server_fd, FIO_NET_CMD_TS, &p, sizeof(p), NULL, NULL);
};
enum {
- FIO_SERVER_VER = 27,
+ FIO_SERVER_VER = 28,
FIO_SERVER_MAX_FRAGMENT_PDU = 1024,
show_lat_m(io_u_lat_m);
}
-
void show_thread_status_normal(struct thread_stat *ts, struct group_run_stats *rs)
{
double usr_cpu, sys_cpu;
ts->first_error,
strerror(ts->first_error));
}
+ if (ts->latency_depth) {
+ log_info(" latency : target=%llu, window=%llu, percentile=%.2f%%, depth=%u\n",
+ (unsigned long long)ts->latency_target,
+ (unsigned long long)ts->latency_window,
+ ts->latency_percentile.u.f,
+ ts->latency_depth);
+ }
}
static void show_ddir_status_terse(struct thread_stat *ts,
json_object_add_value_int(root, "first_error", ts->first_error);
}
+ if (ts->latency_depth) {
+ json_object_add_value_int(root, "latency_depth", ts->latency_depth);
+ json_object_add_value_int(root, "latency_target", ts->latency_target);
+ json_object_add_value_float(root, "latency_percentile", ts->latency_percentile.u.f);
+ json_object_add_value_int(root, "latency_window", ts->latency_window);
+ }
+
/* Additional output if description is set */
if (strlen(ts->description))
json_object_add_value_string(root, "desc", ts->description);
}
}
+ ts->latency_depth = td->latency_qd;
+ ts->latency_target = td->o.latency_target;
+ ts->latency_percentile = td->o.latency_percentile;
+ ts->latency_window = td->o.latency_window;
+
sum_thread_stats(ts, &td->ts, idx);
}
uint32_t kb_base;
uint32_t unit_base;
+
+ uint32_t latency_depth;
+ uint64_t latency_target;
+ fio_fp64_t latency_percentile;
+ uint64_t latency_window;
};
struct jobs_eta {
unsigned long long number_ios;
unsigned int sync_file_range;
+
+ unsigned long long latency_target;
+ unsigned long long latency_window;
+ fio_fp64_t latency_percentile;
};
#define FIO_TOP_STR_MAX 256
uint64_t number_ios;
uint32_t sync_file_range;
+
+ uint64_t latency_target;
+ uint64_t latency_window;
+ fio_fp64_t latency_percentile;
} __attribute__((packed));
extern void convert_thread_options_to_cpu(struct thread_options *o, struct thread_options_pack *top);