Provide a way to easily run a latency probe on the device.
You define a job with peak parameters, and then probe settings
for generating iops/latency numbers based on that workload.
The latter looks something like this:
iodepth_mode=stepped:10-130/10,5/10
which has the format of:
low_percentage-high_percentage/step,ramp_time/run_time
The above would probe from 10% of peak performance to 130%,
in steps of 10%. For each step, it would run a 5 second ramp,
then do 10 seconds of testing. For percentages <= 100%,
fio will limit the IOPS. For percentages above, it'll ramp up
the queue depth. For each section run, it'll look the avg
completion latency associated with that queue depth / iops
setting.
Has normal output (which sucks), and json output. Still
experimenting, not final form yet.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
gettime-thread.c helpers.c json.c idletime.c td_error.c \
profiles/tiobench.c profiles/act.c io_u_queue.c filelock.c \
workqueue.c rate-submit.c optgroup.c helper_thread.c \
- steadystate.c zone-dist.c
+ steadystate.c zone-dist.c target.c
ifdef CONFIG_LIBHDFS
HDFSFLAGS= -I $(JAVA_HOME)/include -I $(JAVA_HOME)/include/linux -I $(FIO_LIBHDFS_INCLUDE)
#include "helper_thread.h"
#include "pshared.h"
#include "zone-dist.h"
+#include "target.h"
static struct fio_sem *startup_sem;
static struct flist_head *cgroup_list;
break;
}
}
- if (!in_ramp_time(td) && td->o.latency_target)
- lat_target_check(td);
+ if (!in_ramp_time(td) && lat_target_check(td))
+ break;
if (ddir_rw(ddir) && td->o.thinktime)
handle_thinktime(td, ddir);
* (Are we not missing other flags that can be ignored ?)
*/
if ((td->o.size || td->o.io_size) && !ddir_rw_sum(bytes_done) &&
- !did_some_io && !td->o.create_only &&
+ !did_some_io && (td->o.iodepth_mode != IOD_STEPPED) &&
+ !td->o.create_only &&
!(td_ioengine_flagged(td, FIO_NOIO) ||
td_ioengine_flagged(td, FIO_DISKLESSIO)))
log_err("%s: No I/O performed by %s, "
o->iodepth_batch_complete_min = le32_to_cpu(top->iodepth_batch_complete_min);
o->iodepth_batch_complete_max = le32_to_cpu(top->iodepth_batch_complete_max);
o->serialize_overlap = le32_to_cpu(top->serialize_overlap);
+ o->iodepth_mode = le32_to_cpu(top->iodepth_mode);
+ o->lat_step_low = le32_to_cpu(top->lat_step_low);
+ o->lat_step_high = le32_to_cpu(top->lat_step_high);
+ o->lat_step_inc = le32_to_cpu(top->lat_step_inc);
+ o->lat_step_ramp = le32_to_cpu(top->lat_step_ramp);
+ o->lat_step_run = le32_to_cpu(top->lat_step_run);
o->size = le64_to_cpu(top->size);
o->io_size = le64_to_cpu(top->io_size);
o->size_percent = le32_to_cpu(top->size_percent);
top->iodepth_batch_complete_min = cpu_to_le32(o->iodepth_batch_complete_min);
top->iodepth_batch_complete_max = cpu_to_le32(o->iodepth_batch_complete_max);
top->serialize_overlap = cpu_to_le32(o->serialize_overlap);
+ top->iodepth_mode = cpu_to_le32(o->iodepth_mode);
+ top->lat_step_low = cpu_to_le32(o->lat_step_low);
+ top->lat_step_high = cpu_to_le32(o->lat_step_high);
+ top->lat_step_inc = cpu_to_le32(o->lat_step_inc);
+ top->lat_step_ramp = cpu_to_le32(o->lat_step_ramp);
+ top->lat_step_run = cpu_to_le32(o->lat_step_run);
top->size_percent = cpu_to_le32(o->size_percent);
top->fill_device = cpu_to_le32(o->fill_device);
top->file_append = cpu_to_le32(o->file_append);
for (i = 0; i < dst->nr_block_infos; i++)
dst->block_infos[i] = le32_to_cpu(src->block_infos[i]);
+ for (i = 0; i < ARRAY_SIZE(dst->step_stats); i++) {
+ struct lat_step_stats *ls = &src->step_stats[i];
+
+ for (j = 0; j < DDIR_RWDIR_CNT; j++) {
+ dst->step_stats[i].iops[j] = le64_to_cpu(ls->iops[j]);
+ dst->step_stats[i].avg[j].u.f = fio_uint64_to_double(le64_to_cpu(ls->avg[j].u.i));
+ }
+ }
+
dst->ss_dur = le64_to_cpu(src->ss_dur);
dst->ss_state = le32_to_cpu(src->ss_state);
dst->ss_head = le32_to_cpu(src->ss_head);
--- /dev/null
+# Job demonstrating how to use the iodepth_mode=stepped feature
+#
+[step]
+ioengine=libaio
+# iodepth / step_high (130% here) must be high enough to saturate performance
+iodepth=64
+numjobs=1
+direct=1
+# Step from 10% to 130%, in 5% intervals. For each step, use a ramp time
+# of 5s, then 30 seconds of runtime
+iodepth_mode=stepped:10-130/5,5/30
+rw=randread
+norandommap
+filename=/dev/nvme0n1p9
+runtime=1h
+time_based=1
+numjobs=2
+group_reporting=1
+cpus_allowed=0,2
F_ADV_SEQUENTIAL,
};
+enum {
+ IOD_NONE = 0,
+ IOD_STEPPED,
+};
+
/*
* Per-thread/process specific data. Only used for the network client
* for now.
unsigned int latency_qd;
unsigned int latency_qd_high;
unsigned int latency_qd_low;
+ unsigned int latency_qd_step;
unsigned int latency_failed;
- uint64_t latency_ios;
+ unsigned int latency_state;
+ unsigned int latency_iops[DDIR_RWDIR_CNT];
+ unsigned int latency_step;
+ uint64_t latency_ios[DDIR_RWDIR_CNT];
int latency_end_run;
+ unsigned int nr_lat_stats;
/*
* read/write mixed workload state
enum fio_ddir ddir, uint64_t *bytes_issued, int from_verify,
struct timespec *comp_time);
-/*
- * Latency target helpers
- */
-extern void lat_target_check(struct thread_data *);
-extern void lat_target_init(struct thread_data *);
-extern void lat_target_reset(struct thread_data *);
-
/*
* Iterates all threads/processes within all the defined jobs
*/
return ddir_rw_sum(td->bytes_done) != 0;
}
+int setup_rate(struct thread_data *td);
+
static inline unsigned long long td_max_bs(struct thread_data *td)
{
unsigned long long max_bs;
return 0;
}
-static int setup_rate(struct thread_data *td)
+int setup_rate(struct thread_data *td)
{
int ret = 0;
#include "lib/pow2.h"
#include "minmax.h"
#include "zbd.h"
+#include "target.h"
struct io_completion_data {
int nr; /* input */
return 0;
}
-static void lat_fatal(struct thread_data *td, struct io_completion_data *icd,
- unsigned long long tnsec, unsigned long long max_nsec)
-{
- if (!td->error)
- log_err("fio: latency of %llu nsec exceeds specified max (%llu nsec)\n", tnsec, max_nsec);
- td_verror(td, ETIMEDOUT, "max latency exceeded");
- icd->error = ETIMEDOUT;
-}
-
-static void lat_new_cycle(struct thread_data *td)
-{
- fio_gettime(&td->latency_ts, NULL);
- td->latency_ios = ddir_rw_sum(td->io_blocks);
- td->latency_failed = 0;
-}
-
-/*
- * We had an IO outside the latency target. Reduce the queue depth. If we
- * are at QD=1, then it's time to give up.
- */
-static bool __lat_target_failed(struct thread_data *td)
-{
- if (td->latency_qd == 1)
- return true;
-
- td->latency_qd_high = td->latency_qd;
-
- if (td->latency_qd == td->latency_qd_low)
- td->latency_qd_low--;
-
- td->latency_qd = (td->latency_qd + td->latency_qd_low) / 2;
-
- dprint(FD_RATE, "Ramped down: %d %d %d\n", td->latency_qd_low, td->latency_qd, td->latency_qd_high);
-
- /*
- * When we ramp QD down, quiesce existing IO to prevent
- * a storm of ramp downs due to pending higher depth.
- */
- io_u_quiesce(td);
- lat_new_cycle(td);
- return false;
-}
-
-static bool lat_target_failed(struct thread_data *td)
-{
- if (td->o.latency_percentile.u.f == 100.0)
- return __lat_target_failed(td);
-
- td->latency_failed++;
- return false;
-}
-
-void lat_target_init(struct thread_data *td)
-{
- td->latency_end_run = 0;
-
- if (td->o.latency_target) {
- dprint(FD_RATE, "Latency target=%llu\n", td->o.latency_target);
- fio_gettime(&td->latency_ts, NULL);
- td->latency_qd = 1;
- td->latency_qd_high = td->o.iodepth;
- td->latency_qd_low = 1;
- td->latency_ios = ddir_rw_sum(td->io_blocks);
- } else
- td->latency_qd = td->o.iodepth;
-}
-
-void lat_target_reset(struct thread_data *td)
-{
- if (!td->latency_end_run)
- lat_target_init(td);
-}
-
-static void lat_target_success(struct thread_data *td)
-{
- const unsigned int qd = td->latency_qd;
- struct thread_options *o = &td->o;
-
- td->latency_qd_low = td->latency_qd;
-
- /*
- * If we haven't failed yet, we double up to a failing value instead
- * of bisecting from highest possible queue depth. If we have set
- * a limit other than td->o.iodepth, bisect between that.
- */
- if (td->latency_qd_high != o->iodepth)
- td->latency_qd = (td->latency_qd + td->latency_qd_high) / 2;
- else
- td->latency_qd *= 2;
-
- if (td->latency_qd > o->iodepth)
- td->latency_qd = o->iodepth;
-
- dprint(FD_RATE, "Ramped up: %d %d %d\n", td->latency_qd_low, td->latency_qd, td->latency_qd_high);
-
- /*
- * Same as last one, we are done. Let it run a latency cycle, so
- * we get only the results from the targeted depth.
- */
- if (td->latency_qd == qd) {
- if (td->latency_end_run) {
- dprint(FD_RATE, "We are done\n");
- td->done = 1;
- } else {
- dprint(FD_RATE, "Quiesce and final run\n");
- io_u_quiesce(td);
- td->latency_end_run = 1;
- reset_all_stats(td);
- reset_io_stats(td);
- }
- }
-
- lat_new_cycle(td);
-}
-
-/*
- * Check if we can bump the queue depth
- */
-void lat_target_check(struct thread_data *td)
-{
- uint64_t usec_window;
- uint64_t ios;
- double success_ios;
-
- usec_window = utime_since_now(&td->latency_ts);
- if (usec_window < td->o.latency_window)
- return;
-
- ios = ddir_rw_sum(td->io_blocks) - td->latency_ios;
- success_ios = (double) (ios - td->latency_failed) / (double) ios;
- success_ios *= 100.0;
-
- dprint(FD_RATE, "Success rate: %.2f%% (target %.2f%%)\n", success_ios, td->o.latency_percentile.u.f);
-
- if (success_ios >= td->o.latency_percentile.u.f)
- lat_target_success(td);
- else
- __lat_target_failed(td);
-}
-
/*
* If latency target is enabled, we might be ramping up or down and not
* using the full queue depth available.
if (qempty)
return true;
- if (!td->o.latency_target)
+ if (!td->o.latency_target || td->o.iodepth_mode != IOD_STEPPED)
return false;
return td->cur_depth >= td->latency_qd;
icd->error = ops->io_u_lat(td, tnsec);
}
- if (td->o.max_latency && tnsec > td->o.max_latency)
- lat_fatal(td, icd, tnsec, td->o.max_latency);
+ if (td->o.max_latency && tnsec > td->o.max_latency) {
+ icd->error = ETIMEDOUT;
+ lat_fatal(td, tnsec, td->o.max_latency);
+ }
if (td->o.latency_target && tnsec > td->o.latency_target) {
- if (lat_target_failed(td))
- lat_fatal(td, icd, tnsec, td->o.latency_target);
+ if (lat_target_failed(td)) {
+ icd->error = ETIMEDOUT;
+ lat_fatal(td, tnsec, td->o.latency_target);
+ }
}
}
static bool should_account(struct thread_data *td)
{
- return ramp_time_over(td) && (td->runstate == TD_RUNNING ||
- td->runstate == TD_VERIFYING);
+ return lat_step_account(td) && ramp_time_over(td) &&
+ (td->runstate == TD_RUNNING || td->runstate == TD_VERIFYING);
}
static void io_completed(struct thread_data *td, struct io_u **io_u_ptr,
#include "filelock.h"
#include "helper_thread.h"
#include "filehash.h"
+#include "target.h"
FLIST_HEAD(disk_list);
#include "lib/pattern.h"
#include "options.h"
#include "optgroup.h"
+#include "target.h"
char client_sockaddr_str[INET6_ADDRSTRLEN] = { 0 };
return 0;
}
+static int str_iodepth_mode_cb(void *data, const char *input)
+{
+ struct thread_data *td = cb_data_to_td(data);
+ struct thread_options *o = &td->o;
+ char *str, *p, *n;
+ int ret = 1;
+
+ if (o->iodepth_mode == IOD_NONE)
+ return 0;
+
+ if (parse_dryrun())
+ return 0;
+
+ p = str = strdup(input);
+
+ strip_blank_front(&str);
+ strip_blank_end(str);
+
+ n = strchr(p, ':');
+ if (!n)
+ goto err;
+
+ *n++ = '\0';
+
+ /* format is now 'low-min/step' */
+ ret = sscanf(n, "%u-%u/%u,%u/%u", &o->lat_step_low, &o->lat_step_high,
+ &o->lat_step_inc, &o->lat_step_ramp,
+ &o->lat_step_run);
+ if (ret == 5) {
+ ret = 0;
+ o->lat_step_ramp *= 1000;
+ o->lat_step_run *= 1000;
+ } else if (ret == 3) {
+ o->lat_step_ramp = IOD_STEPPED_DEF_RAMP;
+ o->lat_step_run = IOD_STEPPED_DEF_RUN;
+ ret = 0;
+ } else
+ ret = 1;
+err:
+ if (ret)
+ log_err("fio: failed parsing <%s>\n", input);
+ free(str);
+ return ret;
+}
+
static int str_exitall_cb(void)
{
exitall_on_terminate = true;
.category = FIO_OPT_C_IO,
.group = FIO_OPT_G_IO_BASIC,
},
+ {
+ .name = "iodepth_mode",
+ .lname = "IO Depth Mode",
+ .type = FIO_OPT_STR,
+ .off1 = offsetof(struct thread_options, iodepth_mode),
+ .cb = str_iodepth_mode_cb,
+ .help = "How to vary the queue depth",
+ .parent = "iodepth",
+ .hide = 1,
+ .interval = 1,
+ .category = FIO_OPT_C_IO,
+ .group = FIO_OPT_G_IO_BASIC,
+ .posval = {
+ { .ival = "none",
+ .oval = IOD_NONE,
+ .help = "No depth modification",
+ },
+ { .ival = "stepped",
+ .oval = IOD_STEPPED,
+ .help = "Stepped IO depth:hi-lo/inc,ramp/run",
+ },
+ },
+ },
+
{
.name = "serialize_overlap",
.lname = "Serialize overlap",
p.ts.sig_figs = cpu_to_le32(ts->sig_figs);
+ for (i = 0; i < ARRAY_SIZE(ts->step_stats); i++) {
+ struct lat_step_stats *ls = &ts->step_stats[i];
+
+ for (j = 0; j < DDIR_RWDIR_CNT; j++) {
+ p.ts.step_stats[i].iops[j] = cpu_to_le64(ls->iops[j]);
+ p.ts.step_stats[i].avg[j].u.i = cpu_to_le64(fio_double_to_uint64(ls->avg[j].u.f));
+ }
+ }
+
p.ts.nr_block_infos = cpu_to_le64(ts->nr_block_infos);
for (i = 0; i < p.ts.nr_block_infos; i++)
p.ts.block_infos[i] = cpu_to_le32(ts->block_infos[i]);
};
enum {
- FIO_SERVER_VER = 77,
+ FIO_SERVER_VER = 78,
FIO_SERVER_MAX_FRAGMENT_PDU = 1024,
FIO_SERVER_MAX_CMD_MB = 2048,
#include "helper_thread.h"
#include "smalloc.h"
#include "zbd.h"
+#include "target.h"
#define LOG_MSEC_SLACK 1
stat_calc_lat(ts, io_u_lat, ts->io_u_lat_m, FIO_IO_U_LAT_M_NR);
}
-static void display_lat(const char *name, unsigned long long min,
+void display_lat(const char *name, unsigned long long min,
unsigned long long max, double mean, double dev,
struct buf_output *out)
{
if (ts->ss_dur)
show_ss_normal(ts, out);
+
+ if (lat_ts_has_stats(ts)) {
+ log_buf(out, " Stepped latency report\n");
+ lat_step_report(ts, out);
+ }
}
static void show_ddir_status_terse(struct thread_stat *ts,
double io_u_lat_u[FIO_IO_U_LAT_U_NR];
double io_u_lat_m[FIO_IO_U_LAT_M_NR];
double usr_cpu, sys_cpu;
- int i;
+ int i, j;
size_t size;
root = json_create_object();
json_object_add_value_array(data, "bw", bw);
}
+ if (lat_ts_has_stats(ts)) {
+ tmp = json_create_object();
+ json_object_add_value_object(root, "lat_step", tmp);
+ }
+
+ for (i = 0; i < DDIR_RWDIR_CNT; i++) {
+ struct json_object *val;
+
+ if (!__lat_ts_has_stats(ts, i))
+ continue;
+
+ val = json_create_object();
+ json_object_add_value_object(tmp, io_ddir_name(i), val);
+
+ for (j = 0; j < ARRAY_SIZE(ts->step_stats); j++) {
+ struct lat_step_stats *ls = &ts->step_stats[j];
+ char name[32];
+
+ if (!ls->iops[i])
+ continue;
+
+ sprintf(name, "%llu", (unsigned long long) ls->iops[i]);
+ json_object_add_value_float(val, name, ls->avg[i].u.f);
+ }
+ }
+
return root;
}
dst->S.u.f = S;
}
+static void sum_lat_step_stats(struct lat_step_stats *dst,
+ struct lat_step_stats *src, bool first)
+{
+ int i;
+
+ for (i = 0; i < DDIR_RWDIR_CNT; i++) {
+ if (!dst->iops[i] && !src->iops[i])
+ continue;
+ if (first)
+ dst->avg[i].u.f = src->avg[i].u.f;
+ else {
+ dst->avg[i].u.f = ((src->avg[i].u.f * src->iops[i]) +
+ (dst->avg[i].u.f * dst->iops[i])) /
+ (dst->iops[i] + src->iops[i]);
+ }
+ dst->iops[i] += src->iops[i];
+ }
+}
+
void sum_group_stats(struct group_run_stats *dst, struct group_run_stats *src)
{
int i;
dst->total_submit += src->total_submit;
dst->total_complete += src->total_complete;
dst->nr_zone_resets += src->nr_zone_resets;
+
+ for (l = 0; l < ARRAY_SIZE(dst->step_stats); l++)
+ sum_lat_step_stats(&dst->step_stats[l], &src->step_stats[l], first);
}
void init_group_run_stat(struct group_run_stats *gs)
for (i = 0; i < groupid + 1; i++)
init_group_run_stat(&runstats[i]);
+ for (i = 0; i < FIO_OUTPUT_NR; i++)
+ buf_output_init(&output[i]);
+
/*
* find out how many threads stats we need. if group reporting isn't
* enabled, it's one-per-td.
}
}
- for (i = 0; i < FIO_OUTPUT_NR; i++)
- buf_output_init(&output[i]);
-
/*
* don't overwrite last signal output
*/
#include "iolog.h"
#include "lib/output_buffer.h"
+struct lat_step_stats {
+ uint64_t iops[DDIR_RWDIR_CNT];
+ fio_fp64_t avg[DDIR_RWDIR_CNT];
+};
+
struct group_run_stats {
uint64_t max_run[DDIR_RWDIR_CNT], min_run[DDIR_RWDIR_CNT];
uint64_t max_bw[DDIR_RWDIR_CNT], min_bw[DDIR_RWDIR_CNT];
#define FIO_JOBDESC_SIZE 256
#define FIO_VERROR_SIZE 128
+#define MAX_STEP_STATS 64
+
struct thread_stat {
char name[FIO_JOBNAME_SIZE];
char verror[FIO_VERROR_SIZE];
uint64_t latency_window;
uint32_t sig_figs;
+ uint32_t pad4;
+
+ struct lat_step_stats step_stats[MAX_STEP_STATS];
uint64_t ss_dur;
uint32_t ss_state;
union {
uint64_t *ss_iops_data;
- uint64_t pad4;
+ uint64_t pad5;
};
union {
uint64_t *ss_bw_data;
- uint64_t pad5;
+ uint64_t pad6;
};
} __attribute__((packed));
--- /dev/null
+#include <unistd.h>
+
+#include "fio.h"
+#include "target.h"
+#include "smalloc.h"
+#include "stat.h"
+
+void lat_fatal(struct thread_data *td, unsigned long long tnsec,
+ unsigned long long max_nsec)
+{
+ if (!td->error)
+ log_err("fio: latency of %llu nsec exceeds specified max (%llu nsec)\n", tnsec, max_nsec);
+ td_verror(td, ETIMEDOUT, "max latency exceeded");
+}
+
+static void lat_ios_note(struct thread_data *td)
+{
+ int i;
+
+ for (i = 0; i < DDIR_RWDIR_CNT; i++)
+ td->latency_ios[i] = td->io_blocks[i];
+}
+
+static void lat_new_cycle(struct thread_data *td)
+{
+ fio_gettime(&td->latency_ts, NULL);
+ lat_ios_note(td);
+ td->latency_failed = 0;
+}
+
+/*
+ * We had an IO outside the latency target. Reduce the queue depth. If we
+ * are at QD=1, then it's time to give up.
+ */
+static bool __lat_target_failed(struct thread_data *td)
+{
+ if (td->latency_qd == 1)
+ return true;
+
+ td->latency_qd_high = td->latency_qd;
+
+ if (td->latency_qd == td->latency_qd_low)
+ td->latency_qd_low--;
+
+ td->latency_qd = (td->latency_qd + td->latency_qd_low) / 2;
+
+ dprint(FD_RATE, "Ramped down: %d %d %d\n", td->latency_qd_low, td->latency_qd, td->latency_qd_high);
+
+ /*
+ * When we ramp QD down, quiesce existing IO to prevent
+ * a storm of ramp downs due to pending higher depth.
+ */
+ io_u_quiesce(td);
+ lat_new_cycle(td);
+ return false;
+}
+
+bool lat_target_failed(struct thread_data *td)
+{
+ if (td->o.latency_percentile.u.f == 100.0)
+ return __lat_target_failed(td);
+
+ td->latency_failed++;
+ return false;
+}
+
+static void lat_step_init(struct thread_data *td)
+{
+ struct thread_options *o = &td->o;
+
+ fio_gettime(&td->latency_ts, NULL);
+ td->latency_state = IOD_STATE_PROBE_RAMP;
+ td->latency_step = 0;
+ td->latency_qd = td->o.iodepth;
+ dprint(FD_RATE, "Stepped: %d-%d/%d,%d/%d\n", o->lat_step_low,
+ o->lat_step_high, o->lat_step_inc,
+ o->lat_step_ramp, o->lat_step_run);
+}
+
+void lat_target_init(struct thread_data *td)
+{
+ td->latency_end_run = 0;
+
+ if (td->o.latency_target) {
+ dprint(FD_RATE, "Latency target=%llu\n", td->o.latency_target);
+ fio_gettime(&td->latency_ts, NULL);
+ td->latency_qd = 1;
+ td->latency_qd_high = td->o.iodepth;
+ td->latency_qd_low = 1;
+ lat_ios_note(td);
+ } else if (td->o.iodepth_mode == IOD_STEPPED)
+ lat_step_init(td);
+ else
+ td->latency_qd = td->o.iodepth;
+}
+
+void lat_target_reset(struct thread_data *td)
+{
+ if (td->o.latency_target && !td->latency_end_run)
+ lat_target_init(td);
+}
+
+static void lat_target_success(struct thread_data *td)
+{
+ const unsigned int qd = td->latency_qd;
+ struct thread_options *o = &td->o;
+
+ td->latency_qd_low = td->latency_qd;
+
+ /*
+ * If we haven't failed yet, we double up to a failing value instead
+ * of bisecting from highest possible queue depth. If we have set
+ * a limit other than td->o.iodepth, bisect between that.
+ */
+ if (td->latency_qd_high != o->iodepth)
+ td->latency_qd = (td->latency_qd + td->latency_qd_high) / 2;
+ else
+ td->latency_qd *= 2;
+
+ if (td->latency_qd > o->iodepth)
+ td->latency_qd = o->iodepth;
+
+ dprint(FD_RATE, "Ramped up: %d %d %d\n", td->latency_qd_low, td->latency_qd, td->latency_qd_high);
+
+ /*
+ * Same as last one, we are done. Let it run a latency cycle, so
+ * we get only the results from the targeted depth.
+ */
+ if (td->latency_qd == qd) {
+ if (td->latency_end_run) {
+ dprint(FD_RATE, "We are done\n");
+ td->done = 1;
+ } else {
+ dprint(FD_RATE, "Quiesce and final run\n");
+ io_u_quiesce(td);
+ td->latency_end_run = 1;
+ reset_all_stats(td);
+ reset_io_stats(td);
+ }
+ }
+
+ lat_new_cycle(td);
+}
+
+void __lat_target_check(struct thread_data *td)
+{
+ uint64_t usec_window;
+ uint64_t ios;
+ double success_ios;
+
+ usec_window = utime_since_now(&td->latency_ts);
+ if (usec_window < td->o.latency_window)
+ return;
+
+ ios = ddir_rw_sum(td->io_blocks) - ddir_rw_sum(td->latency_ios);
+ success_ios = (double) (ios - td->latency_failed) / (double) ios;
+ success_ios *= 100.0;
+
+ dprint(FD_RATE, "Success rate: %.2f%% (target %.2f%%)\n", success_ios, td->o.latency_percentile.u.f);
+
+ if (success_ios >= td->o.latency_percentile.u.f)
+ lat_target_success(td);
+ else
+ __lat_target_failed(td);
+}
+
+static void lat_clear_rate(struct thread_data *td)
+{
+ int i;
+
+ td->flags &= ~TD_F_CHECK_RATE;
+ for (i = 0; i < DDIR_RWDIR_CNT; i++)
+ td->o.rate_iops[i] = 0;
+}
+
+/*
+ * Returns true if we're done stepping
+ */
+static bool lat_step_recalc(struct thread_data *td)
+{
+ struct thread_options *o = &td->o;
+ unsigned int cur, perc;
+
+ cur = td->latency_step * o->lat_step_inc;
+ if (cur >= o->lat_step_high)
+ return true;
+
+ perc = (td->latency_step + 1) * o->lat_step_inc;
+ if (perc < 100) {
+ int i;
+
+ for (i = 0; i < DDIR_RWDIR_CNT; i++) {
+ unsigned int this_iops;
+
+ this_iops = (perc * td->latency_iops[i]) / 100;
+ td->o.rate_iops[i] = this_iops;
+ }
+ setup_rate(td);
+ td->flags |= TD_F_CHECK_RATE;
+ td->latency_qd = td->o.iodepth * 100 / o->lat_step_high;
+ } else {
+ td->latency_qd = td->o.iodepth * perc / o->lat_step_high;
+ lat_clear_rate(td);
+ }
+
+ dprint(FD_RATE, "Stepped: step=%d, perc=%d, qd=%d\n", td->latency_step,
+ perc, td->latency_qd);
+ return false;
+}
+
+static void lat_step_reset(struct thread_data *td)
+{
+ struct thread_stat *ts = &td->ts;
+ struct io_stat *ios = &ts->clat_stat[DDIR_RWDIR_CNT];
+
+ ios->max_val = ios->min_val = ios->samples = 0;
+ ios->mean.u.f = ios->S.u.f = 0;
+
+ lat_clear_rate(td);
+ reset_all_stats(td);
+ reset_io_stats(td);
+}
+
+static uint64_t lat_iops_since(struct thread_data *td, uint64_t msec,
+ enum fio_ddir ddir)
+{
+ if (msec) {
+ uint64_t ios;
+
+ ios = td->io_blocks[ddir] - td->latency_ios[ddir];
+ return (ios * 1000) / msec;
+ }
+
+ return 0;
+}
+
+static void lat_step_add_sample(struct thread_data *td, uint64_t msec)
+{
+ struct thread_stat *ts = &td->ts;
+ unsigned long long min, max;
+ struct lat_step_stats *ls;
+ double mean[DDIR_RWDIR_CNT], dev;
+ int i;
+
+ if (td->nr_lat_stats == ARRAY_SIZE(td->ts.step_stats)) {
+ log_err("fio: ts->step_stats too small, dropping entries\n");
+ return;
+ }
+
+ for (i = 0; i < DDIR_RWDIR_CNT; i++)
+ calc_lat(&ts->clat_stat[i], &min, &max, &mean[i], &dev);
+
+ for (i = 0; i < DDIR_RWDIR_CNT; i++) {
+ ls = &td->ts.step_stats[td->nr_lat_stats];
+
+ ls->iops[i] = lat_iops_since(td, msec, i);
+ ls->avg[i].u.f = mean[i];
+ }
+
+ td->nr_lat_stats++;
+}
+
+bool __lat_ts_has_stats(struct thread_stat *ts, enum fio_ddir ddir)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ts->step_stats); i++) {
+ struct lat_step_stats *ls = &ts->step_stats[i];
+
+ if (ls->iops[ddir])
+ return true;
+ }
+
+ return false;
+}
+
+bool lat_ts_has_stats(struct thread_stat *ts)
+{
+ int i;
+
+ for (i = 0; i < DDIR_RWDIR_CNT; i++)
+ if (__lat_ts_has_stats(ts, i))
+ return true;
+
+ return false;
+}
+
+void lat_step_report(struct thread_stat *ts, struct buf_output *out)
+{
+ int i, j;
+
+ for (i = 0; i < ARRAY_SIZE(ts->step_stats); i++) {
+ struct lat_step_stats *ls = &ts->step_stats[i];
+
+ for (j = 0; j < DDIR_RWDIR_CNT; j++) {
+ if (!ls->iops[j])
+ continue;
+
+ __log_buf(out, " %s: iops=%llu, lat=%.1f nsec\n",
+ io_ddir_name(j),
+ (unsigned long long) ls->iops[j],
+ ls->avg[j].u.f);
+ }
+ }
+}
+
+static void lat_next_state(struct thread_data *td, int new_state)
+{
+ td->latency_state = new_state;
+ fio_gettime(&td->latency_ts, NULL);
+}
+
+bool lat_step_check(struct thread_data *td)
+{
+ struct thread_options *o = &td->o;
+ uint64_t msec;
+
+ msec = mtime_since_now(&td->latency_ts);
+
+ switch (td->latency_state) {
+ case IOD_STATE_PROBE_RAMP:
+ if (msec < o->lat_step_ramp)
+ break;
+
+ lat_step_reset(td);
+ lat_ios_note(td);
+
+ lat_next_state(td, IOD_STATE_PROBE_RUN);
+ break;
+ case IOD_STATE_PROBE_RUN: {
+ int i;
+
+ if (msec < o->lat_step_run)
+ break;
+
+ io_u_quiesce(td);
+
+ for (i = 0; i < DDIR_RWDIR_CNT; i++)
+ td->latency_iops[i] = lat_iops_since(td, msec, i);
+
+ lat_step_reset(td);
+ lat_step_recalc(td);
+
+ io_u_quiesce(td);
+ lat_next_state(td, IOD_STATE_RAMP);
+ break;
+ }
+ case IOD_STATE_RAMP:
+ if (msec < o->lat_step_ramp)
+ break;
+
+ lat_ios_note(td);
+ lat_next_state(td, IOD_STATE_RUN);
+ break;
+ case IOD_STATE_RUN:
+ if (msec < o->lat_step_run)
+ break;
+
+ io_u_quiesce(td);
+ fio_gettime(&td->latency_ts, NULL);
+ td->latency_step++;
+
+ lat_step_add_sample(td, msec);
+ lat_step_reset(td);
+
+ if (!lat_step_recalc(td))
+ break;
+
+ td->done = 1;
+ lat_next_state(td, IOD_STATE_DONE);
+ break;
+ };
+
+ return td->latency_state == IOD_STATE_DONE;
+}
--- /dev/null
+#ifndef FIO_LAT_TARGET_H
+#define FIO_LAT_TARGET_H
+
+#include "fio.h"
+
+enum {
+ IOD_STEPPED_DEF_RAMP = 5000,
+ IOD_STEPPED_DEF_RUN = 30000,
+};
+
+/*
+ * Starts out as PROBE_RAMP -> PROBE_RUN, then iterations of
+ * RAMP -> RUN with various iops limiting settings
+ */
+enum {
+ IOD_STATE_PROBE_RAMP = 1,
+ IOD_STATE_PROBE_RUN,
+ IOD_STATE_RAMP,
+ IOD_STATE_RUN,
+ IOD_STATE_DONE,
+};
+
+/*
+ * Latency target helpers
+ */
+void lat_target_init(struct thread_data *);
+void lat_target_reset(struct thread_data *);
+bool lat_target_failed(struct thread_data *td);
+void lat_step_report(struct thread_stat *ts, struct buf_output *out);
+bool lat_ts_has_stats(struct thread_stat *ts);
+bool __lat_ts_has_stats(struct thread_stat *ts, enum fio_ddir);
+
+void lat_fatal(struct thread_data *td, unsigned long long tnsec,
+ unsigned long long max_nsec);
+
+bool lat_step_check(struct thread_data *td);
+void __lat_target_check(struct thread_data *td);
+
+static inline bool lat_target_check(struct thread_data *td)
+{
+ if (td->o.latency_target) {
+ __lat_target_check(td);
+ return false;
+ } else if (td->o.iodepth_mode == IOD_STEPPED)
+ return lat_step_check(td);
+
+ return false;
+}
+
+static inline bool lat_step_account(struct thread_data *td)
+{
+ if (td->o.iodepth_mode != IOD_STEPPED)
+ return true;
+
+ return td->latency_state == IOD_STATE_RUN;
+}
+
+#endif
unsigned int iodepth_batch_complete_min;
unsigned int iodepth_batch_complete_max;
unsigned int serialize_overlap;
+ unsigned int iodepth_mode;
+
+ unsigned int lat_step_low;
+ unsigned int lat_step_high;
+ unsigned int lat_step_inc;
+ unsigned int lat_step_ramp;
+ unsigned int lat_step_run;
unsigned int unique_filename;
uint32_t kb_base;
uint32_t unit_base;
uint32_t ddir_seq_nr;
+ uint32_t pad;
uint64_t ddir_seq_add;
uint32_t iodepth;
uint32_t iodepth_low;
uint32_t iodepth_batch_complete_min;
uint32_t iodepth_batch_complete_max;
uint32_t serialize_overlap;
+
+ uint32_t iodepth_mode;
+ uint32_t lat_step_low;
+ uint32_t lat_step_high;
+ uint32_t lat_step_inc;
+ uint32_t lat_step_ramp;
+ uint32_t lat_step_run;
+
+ uint32_t pad2;
uint32_t lat_percentiles;
uint64_t size;
uint32_t verify_fatal;
uint32_t verify_dump;
uint32_t verify_async;
+ uint32_t pad3;
uint64_t verify_backlog;
uint32_t verify_batch;
uint32_t experimental_verify;
uint32_t override_sync;
uint32_t rand_repeatable;
uint32_t allrand_repeatable;
- uint32_t pad;
+ uint32_t pad4;
uint64_t rand_seed;
uint32_t log_avg_msec;
uint32_t log_hist_msec;
struct zone_split zone_split[DDIR_RWDIR_CNT][ZONESPLIT_MAX];
uint32_t zone_split_nr[DDIR_RWDIR_CNT];
+ uint32_t pad5;
fio_fp64_t zipf_theta;
fio_fp64_t pareto_h;
uint32_t random_generator;
uint32_t perc_rand[DDIR_RWDIR_CNT];
+ uint32_t pad6;
uint32_t hugepage_size;
uint64_t rw_min_bs;
- uint32_t pad2;
uint32_t thinktime;
uint32_t thinktime_spin;
uint32_t thinktime_blocks;
uint64_t ss_dur;
uint64_t ss_ramp_time;
uint32_t ss_state;
+ uint32_t pad7;
fio_fp64_t ss_limit;
uint32_t overwrite;
uint32_t bw_avg_time;
uint32_t trim_percentage;
uint32_t trim_batch;
uint32_t trim_zero;
+ uint32_t pad8;
uint64_t trim_backlog;
uint32_t clat_percentiles;
uint32_t percentile_precision;
uint32_t rate_iops_min[DDIR_RWDIR_CNT];
uint32_t rate_process;
uint32_t rate_ign_think;
- uint32_t pad3;
uint8_t ioscheduler[FIO_TOP_STR_MAX];
int32_t flow;
int32_t flow_watermark;
uint32_t flow_sleep;
+ uint32_t pad9;
uint64_t offset_increment;
uint64_t number_ios;