command line parameter. The files will be stored with a
.fz suffix.
+block_error_percentiles=bool If set, record errors in trim block-sized
+ units from writes and trims and output a histogram of
+ how many trims it took to get to errors, and what kind
+ of error was encountered.
+
lockmem=int Pin down the specified amount of memory with mlock(2). Can
potentially be used instead of removing memory or booting
with less memory to simulate a smaller amount of memory.
completion latencies.
percentile_list=float_list Overwrite the default list of percentiles
- for completion latencies. Each number is a floating
- number in the range (0,100], and the maximum length of
- the list is 20. Use ':' to separate the numbers, and
- list the numbers in ascending order. For example,
- --percentile_list=99.5:99.9 will cause fio to report
- the values of completion latency below which 99.5% and
- 99.9% of the observed latencies fell, respectively.
+ for completion latencies and the block error histogram.
+ Each number is a floating number in the range (0,100],
+ and the maximum length of the list is 20. Use ':'
+ to separate the numbers, and list the numbers in ascending
+ order. For example, --percentile_list=99.5:99.9 will cause
+ fio to report the values of completion latency below which
+ 99.5% and 99.9% of the observed latencies fell, respectively.
clocksource=str Use the given clocksource as the base of timing. The
supported options are:
dst->latency_target = le64_to_cpu(src->latency_target);
dst->latency_window = le64_to_cpu(src->latency_window);
dst->latency_percentile.u.f = fio_uint64_to_double(le64_to_cpu(src->latency_percentile.u.i));
+
+ dst->nr_block_infos = le64_to_cpu(src->nr_block_infos);
+ for (i = 0; i < dst->nr_block_infos; i++)
+ dst->block_infos[i] = le32_to_cpu(src->block_infos[i]);
}
static void convert_gs(struct group_run_stats *dst, struct group_run_stats *src)
}
}
+ if (td->o.block_error_hist) {
+ int len;
+
+ assert(td->o.nr_files == 1); /* checked in fixup_options */
+ f = td->files[0];
+ len = f->io_size / td->o.bs[DDIR_TRIM];
+ if (len > MAX_NR_BLOCK_INFOS || len <= 0) {
+ log_err("fio: cannot calculate block histogram with "
+ "%d trim blocks, maximum %d\n",
+ len, MAX_NR_BLOCK_INFOS);
+ td_verror(td, EINVAL, "block_error_hist");
+ goto err_out;
+ }
+
+ td->ts.nr_block_infos = len;
+ for (int i = 0; i < len; i++)
+ td->ts.block_infos[i] =
+ BLOCK_INFO(0, BLOCK_STATE_UNINIT);
+ } else
+ td->ts.nr_block_infos = 0;
+
if (!o->size || (total_size && o->size > total_size))
o->size = total_size;
\fB\-\-inflate-log\fR command line parameter. The files will be stored with a
\fB\.fz\fR suffix.
.TP
+.BI block_error_percentiles \fR=\fPbool
+If set, record errors in trim block-sized units from writes and trims and output
+a histogram of how many trims it took to get to errors, and what kind of error
+was encountered.
+.TP
.BI disable_lat \fR=\fPbool
Disable measurements of total latency numbers. Useful only for cutting
back the number of calls to \fBgettimeofday\fR\|(2), as that does impact performance at
Enable the reporting of percentiles of completion latencies.
.TP
.BI percentile_list \fR=\fPfloat_list
-Overwrite the default list of percentiles for completion
-latencies. Each number is a floating number in the range (0,100], and
-the maximum length of the list is 20. Use ':' to separate the
+Overwrite the default list of percentiles for completion latencies and the
+block error histogram. Each number is a floating number in the range (0,100],
+and the maximum length of the list is 20. Use ':' to separate the
numbers. For example, \-\-percentile_list=99.5:99.9 will cause fio to
report the values of completion latency below which 99.5% and 99.9% of
the observed latencies fell, respectively.
if (!td->loops)
td->loops = 1;
+ if (td->o.block_error_hist && td->o.nr_files != 1) {
+ log_err("fio: block error histogram only available with "
+ "with a single file per job, but %d files "
+ "provided\n", td->o.nr_files);
+ ret = 1;
+ }
+
return ret;
}
if (!gtod_reduce(td))
add_iops_sample(td, idx, bytes, &icd->time);
+
+ if (td->ts.nr_block_infos && io_u->ddir == DDIR_TRIM) {
+ uint32_t *info = io_u_block_info(td, io_u);
+ if (BLOCK_INFO_STATE(*info) < BLOCK_STATE_TRIM_FAILURE) {
+ if (io_u->ddir == DDIR_TRIM) {
+ *info = BLOCK_INFO(BLOCK_STATE_TRIMMED,
+ BLOCK_INFO_TRIMS(*info) + 1);
+ } else if (io_u->ddir == DDIR_WRITE) {
+ *info = BLOCK_INFO_SET_STATE(BLOCK_STATE_WRITTEN,
+ *info);
+ }
+ }
+ }
}
static long long usec_for_io(struct thread_data *td, enum fio_ddir ddir)
{
struct io_piece *ipo = io_u->ipo;
+ if (td->ts.nr_block_infos) {
+ uint32_t *info = io_u_block_info(td, io_u);
+ if (BLOCK_INFO_STATE(*info) < BLOCK_STATE_TRIM_FAILURE) {
+ if (io_u->ddir == DDIR_TRIM)
+ *info = BLOCK_INFO_SET_STATE(*info,
+ BLOCK_STATE_TRIM_FAILURE);
+ else if (io_u->ddir == DDIR_WRITE)
+ *info = BLOCK_INFO_SET_STATE(*info,
+ BLOCK_STATE_WRITE_FAILURE);
+ }
+ }
+
if (!ipo)
return;
.group = FIO_OPT_G_INVALID,
},
#endif
+ {
+ .name = "block_error_percentiles",
+ .lname = "Block error percentiles",
+ .type = FIO_OPT_BOOL,
+ .off1 = td_var_offset(block_error_hist),
+ .help = "Record trim block errors and make a histogram",
+ .def = "0",
+ .category = FIO_OPT_C_LOG,
+ .group = FIO_OPT_G_INVALID,
+ },
{
.name = "bwavgtime",
.lname = "Bandwidth average time",
},
{
.name = "percentile_list",
- .lname = "Completion latency percentile list",
+ .lname = "Percentile list",
.type = FIO_OPT_FLOAT_LIST,
.off1 = td_var_offset(percentile_list),
.off2 = td_var_offset(percentile_precision),
- .help = "Specify a custom list of percentiles to report",
+ .help = "Specify a custom list of percentiles to report for "
+ "completion latency and block errors",
.def = "1:5:10:20:30:40:50:60:70:80:90:95:99:99.5:99.9:99.95:99.99",
.maxlen = FIO_IO_U_LIST_MAX_LEN,
.minfp = 0.0,
p.ts.latency_window = cpu_to_le64(ts->latency_window);
p.ts.latency_percentile.u.i = cpu_to_le64(fio_double_to_uint64(ts->latency_percentile.u.f));
+ p.ts.nr_block_infos = le64_to_cpu(ts->nr_block_infos);
+ for (i = 0; i < p.ts.nr_block_infos; i++)
+ p.ts.block_infos[i] = le32_to_cpu(ts->block_infos[i]);
+
convert_gs(&p.rs, rs);
fio_net_send_cmd(server_fd, FIO_NET_CMD_TS, &p, sizeof(p), NULL, NULL);
show_lat_m(io_u_lat_m);
}
+static int block_state_category(int block_state)
+{
+ switch (block_state) {
+ case BLOCK_STATE_UNINIT:
+ return 0;
+ case BLOCK_STATE_TRIMMED:
+ case BLOCK_STATE_WRITTEN:
+ return 1;
+ case BLOCK_STATE_WRITE_FAILURE:
+ case BLOCK_STATE_TRIM_FAILURE:
+ return 2;
+ default:
+ assert(0);
+ }
+}
+
+static int compare_block_infos(const void *bs1, const void *bs2)
+{
+ uint32_t block1 = *(uint32_t *)bs1;
+ uint32_t block2 = *(uint32_t *)bs2;
+ int state1 = BLOCK_INFO_STATE(block1);
+ int state2 = BLOCK_INFO_STATE(block2);
+ int bscat1 = block_state_category(state1);
+ int bscat2 = block_state_category(state2);
+ int cycles1 = BLOCK_INFO_TRIMS(block1);
+ int cycles2 = BLOCK_INFO_TRIMS(block2);
+
+ if (bscat1 < bscat2)
+ return -1;
+ if (bscat1 > bscat2)
+ return 1;
+
+ if (cycles1 < cycles2)
+ return -1;
+ if (cycles1 > cycles2)
+ return 1;
+
+ if (state1 < state2)
+ return -1;
+ if (state1 > state2)
+ return 1;
+
+ assert(block1 == block2);
+ return 0;
+}
+
+static int calc_block_percentiles(int nr_block_infos, uint32_t *block_infos,
+ fio_fp64_t *plist, unsigned int **percentiles,
+ unsigned int *types)
+{
+ int len = 0;
+ int i, nr_uninit;
+
+ qsort(block_infos, nr_block_infos, sizeof(uint32_t), compare_block_infos);
+
+ while (len < FIO_IO_U_LIST_MAX_LEN && plist[len].u.f != 0.0)
+ len++;
+
+ if (!len)
+ return 0;
+
+ /*
+ * Sort the percentile list. Note that it may already be sorted if
+ * we are using the default values, but since it's a short list this
+ * isn't a worry. Also note that this does not work for NaN values.
+ */
+ if (len > 1)
+ qsort((void *)plist, len, sizeof(plist[0]), double_cmp);
+
+ nr_uninit = 0;
+ /* Start only after the uninit entries end */
+ for (nr_uninit = 0;
+ nr_uninit < nr_block_infos
+ && BLOCK_INFO_STATE(block_infos[nr_uninit]) == BLOCK_STATE_UNINIT;
+ nr_uninit ++)
+ ;
+
+ if (nr_uninit == nr_block_infos)
+ return 0;
+
+ *percentiles = calloc(len, sizeof(**percentiles));
+
+ for (i = 0; i < len; i++) {
+ int idx = (plist[i].u.f * (nr_block_infos - nr_uninit) / 100)
+ + nr_uninit;
+ (*percentiles)[i] = BLOCK_INFO_TRIMS(block_infos[idx]);
+ }
+
+ memset(types, 0, sizeof(*types) * BLOCK_STATE_COUNT);
+ for (i = 0; i < nr_block_infos; i++)
+ types[BLOCK_INFO_STATE(block_infos[i])]++;
+
+ return len;
+}
+
+static const char *block_state_names[] = {
+ [BLOCK_STATE_UNINIT] = "unwritten",
+ [BLOCK_STATE_TRIMMED] = "trimmed",
+ [BLOCK_STATE_WRITTEN] = "written",
+ [BLOCK_STATE_TRIM_FAILURE] = "trim failure",
+ [BLOCK_STATE_WRITE_FAILURE] = "write failure",
+};
+
+static void show_block_infos(int nr_block_infos, uint32_t *block_infos,
+ fio_fp64_t *plist)
+{
+ int len, pos, i;
+ unsigned int *percentiles = NULL;
+ unsigned int block_state_counts[BLOCK_STATE_COUNT];
+
+ len = calc_block_percentiles(nr_block_infos, block_infos, plist,
+ &percentiles, block_state_counts);
+
+ log_info(" block lifetime percentiles :\n |");
+ pos = 0;
+ for (i = 0; i < len; i++) {
+ uint32_t block_info = percentiles[i];
+#define LINE_LENGTH 75
+ char str[LINE_LENGTH];
+ int strln = snprintf(str, LINE_LENGTH, " %3.2fth=%u%c",
+ plist[i].u.f, block_info,
+ i == len - 1 ? '\n' : ',');
+ assert(strln < LINE_LENGTH);
+ if (pos + strln > LINE_LENGTH) {
+ pos = 0;
+ log_info("\n |");
+ }
+ log_info("%s", str);
+ pos += strln;
+#undef LINE_LENGTH
+ }
+ if (percentiles)
+ free(percentiles);
+
+ log_info(" states :");
+ for (i = 0; i < BLOCK_STATE_COUNT; i++)
+ log_info(" %s=%u%c",
+ block_state_names[i], block_state_counts[i],
+ i == BLOCK_STATE_COUNT - 1 ? '\n' : ',');
+}
+
static void show_thread_status_normal(struct thread_stat *ts,
struct group_run_stats *rs)
{
ts->latency_percentile.u.f,
ts->latency_depth);
}
+
+ if (ts->nr_block_infos)
+ show_block_infos(ts->nr_block_infos, ts->block_infos,
+ ts->percentile_list);
}
static void show_ddir_status_terse(struct thread_stat *ts,
if (strlen(ts->description))
json_object_add_value_string(root, "desc", ts->description);
+ if (ts->nr_block_infos) {
+ /* Block error histogram and types */
+ int len;
+ unsigned int *percentiles = NULL;
+ unsigned int block_state_counts[BLOCK_STATE_COUNT];
+
+ len = calc_block_percentiles(ts->nr_block_infos, ts->block_infos,
+ ts->percentile_list,
+ &percentiles, block_state_counts);
+
+ if (len) {
+ struct json_object *block, *percentile_object, *states;
+ int state, i;
+ block = json_create_object();
+ json_object_add_value_object(root, "block", block);
+
+ percentile_object = json_create_object();
+ json_object_add_value_object(block, "percentiles",
+ percentile_object);
+ for (i = 0; i < len; i++) {
+ char buf[20];
+ snprintf(buf, sizeof(buf), "%f",
+ ts->percentile_list[i].u.f);
+ json_object_add_value_int(percentile_object,
+ (const char *)buf,
+ percentiles[i]);
+ }
+
+ states = json_create_object();
+ json_object_add_value_object(block, "states", states);
+ for (state = 0; state < BLOCK_STATE_COUNT; state++) {
+ json_object_add_value_int(states,
+ block_state_names[state],
+ block_state_counts[state]);
+ }
+ free(percentiles);
+ }
+ }
+
return root;
}
helper_do_stat = 1;
pthread_cond_signal(&helper_cond);
}
+
+uint32_t *io_u_block_info(struct thread_data *td, struct io_u *io_u)
+{
+ /* Ignore io_u's which span multiple blocks--they will just get
+ * inaccurate counts. */
+ int idx = (io_u->offset - io_u->file->file_offset)
+ / td->o.bs[DDIR_TRIM];
+ uint32_t *info = &td->ts.block_infos[idx];
+ assert(idx < td->ts.nr_block_infos);
+ return info;
+}
#define FIO_IO_U_LIST_MAX_LEN 20 /* The size of the default and user-specified
list of percentiles */
+/*
+ * Trim cycle count measurements
+ */
+#define MAX_NR_BLOCK_INFOS 8192
+#define BLOCK_INFO_STATE_SHIFT 29
+#define BLOCK_INFO_TRIMS(block_info) \
+ ((block_info) & ((1 << BLOCK_INFO_STATE_SHIFT) - 1))
+#define BLOCK_INFO_STATE(block_info) \
+ ((block_info) >> BLOCK_INFO_STATE_SHIFT)
+#define BLOCK_INFO(state, trim_cycles) \
+ ((trim_cycles) | ((state) << BLOCK_INFO_STATE_SHIFT))
+#define BLOCK_INFO_SET_STATE(block_info, state) \
+ BLOCK_INFO(state, BLOCK_INFO_TRIMS(block_info))
+enum block_info_state {
+ BLOCK_STATE_UNINIT,
+ BLOCK_STATE_TRIMMED,
+ BLOCK_STATE_WRITTEN,
+ BLOCK_STATE_TRIM_FAILURE,
+ BLOCK_STATE_WRITE_FAILURE,
+ BLOCK_STATE_COUNT,
+} state;
+
#define MAX_PATTERN_SIZE 512
#define FIO_JOBNAME_SIZE 128
#define FIO_JOBDESC_SIZE 256
uint64_t total_err_count;
uint32_t first_error;
+ uint64_t nr_block_infos;
+ uint32_t block_infos[MAX_NR_BLOCK_INFOS];
+
uint32_t kb_base;
uint32_t unit_base;
#define __THREAD_RUNSTR_SZ(nr) ((nr) * 5)
#define THREAD_RUNSTR_SZ __THREAD_RUNSTR_SZ(thread_number)
+uint32_t *io_u_block_info(struct thread_data *td, struct io_u *io_u);
+
#endif
unsigned long long latency_target;
unsigned long long latency_window;
fio_fp64_t latency_percentile;
+
+ unsigned block_error_hist;
};
#define FIO_TOP_STR_MAX 256
uint64_t latency_window;
uint32_t pad3;
fio_fp64_t latency_percentile;
+
+ uint32_t block_error_hist;
} __attribute__((packed));
extern void convert_thread_options_to_cpu(struct thread_options *o, struct thread_options_pack *top);