When write_lat_log option is set, fio outputs the 'clat' completion
latency log file. This clat log can be used to analyze IO latency. This
file can also be considered as an IO trace file as each IO entry can
fully describe an IO when the --log_offset and --log_prio options are
also used.
However, using the clat log file as an IO trace is inaccuate due to two
reasons. Firstly, the time field of each entry uses millisecond units,
which is too coarse for fast IOs (e.g. when using SSDs). Secondly, the
time field value is recorded not at command completion, but at log
sample recording. The time field value is slightly different from the IO
completion time. It can be used only as an approximated completion time.
To analyze IO issue time and IO completion time accurately using the
clat log, introduce the new option 'log_issue_time'. When this option is
set, add another field to the log file entries and put the IO issue time
in nanosecond to the field. The IO completion time can be calculated by
adding the completion latency to the IO issue time.
The IO issue time field is added to 'slat' submit latency log file also.
This helps to calculate IO start time by subtracting the submission
latency from the IO issue time.
The log_issue_time option can be used for IO trace when the
write_lat_log option and the log_offset options are set together. When
the log_issue_time option is set but the write_lat_log option or the
log_offset option is not set, fio errors out. When the log_issue_time
option and the write_lat_log option are set together with other
write_X_log options, the IO issue time field is added to all log files.
As for the other log files than clat and slat log, the IO issue time
does not have meaning then '0' is set to the field. When log_avg_msec
option is set, average of the log values of the specified duration is
logged. The IO issue time does not have meaning in this case either and
'0' is set to the field.
Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Link: https://lore.kernel.org/r/20240829085826.999859-6-shinichiro.kawasaki@wdc.com
Signed-off-by: Vincent Fu <vincent.fu@samsung.com>
o->log_max = le32_to_cpu(top->log_max);
o->log_offset = le32_to_cpu(top->log_offset);
o->log_prio = le32_to_cpu(top->log_prio);
+ o->log_issue_time = le32_to_cpu(top->log_issue_time);
o->log_gz = le32_to_cpu(top->log_gz);
o->log_gz_store = le32_to_cpu(top->log_gz_store);
o->log_alternate_epoch = le32_to_cpu(top->log_alternate_epoch);
top->log_max = cpu_to_le32(o->log_max);
top->log_offset = cpu_to_le32(o->log_offset);
top->log_prio = cpu_to_le32(o->log_prio);
+ top->log_issue_time = cpu_to_le32(o->log_issue_time);
top->log_gz = cpu_to_le32(o->log_gz);
top->log_gz_store = cpu_to_le32(o->log_gz_store);
top->log_alternate_epoch = cpu_to_le32(o->log_alternate_epoch);
static void client_flush_hist_samples(FILE *f, int hist_coarseness, void *samples,
uint64_t sample_size)
{
- struct io_sample *s;
- int log_offset;
+ struct io_sample *s, *s_tmp;
+ bool log_offset, log_issue_time;
uint64_t i, j, nr_samples;
struct io_u_plat_entry *entry;
uint64_t *io_u_plat;
if (!sample_size)
return;
- s = __get_sample(samples, 0, 0);
+ s = __get_sample(samples, 0, 0, 0);
log_offset = (s->__ddir & LOG_OFFSET_SAMPLE_BIT) != 0;
+ log_issue_time = (s->__ddir & LOG_ISSUE_TIME_SAMPLE_BIT) != 0;
- nr_samples = sample_size / __log_entry_sz(log_offset);
+ nr_samples = sample_size / __log_entry_sz(log_offset, log_issue_time);
for (i = 0; i < nr_samples; i++) {
- s = (struct io_sample *)((char *)__get_sample(samples, log_offset, i) +
- i * sizeof(struct io_u_plat_entry));
+ s_tmp = __get_sample(samples, log_offset, log_issue_time, i);
+ s = (struct io_sample *)((char *)s_tmp +
+ i * sizeof(struct io_u_plat_entry));
entry = s->data.plat_entry;
io_u_plat = entry->io_u_plat;
uint64_t nr_samples;
size_t total;
char *p;
+ size_t log_entry_size;
stream.zalloc = Z_NULL;
stream.zfree = Z_NULL;
*/
nr_samples = le64_to_cpu(pdu->nr_samples);
+ log_entry_size = __log_entry_sz(le32_to_cpu(pdu->log_offset),
+ le32_to_cpu(pdu->log_issue_time));
if (pdu->log_type == IO_LOG_TYPE_HIST)
- total = nr_samples * (__log_entry_sz(le32_to_cpu(pdu->log_offset)) +
- sizeof(struct io_u_plat_entry));
+ total = nr_samples * (log_entry_size +
+ sizeof(struct io_u_plat_entry));
else
- total = nr_samples * __log_entry_sz(le32_to_cpu(pdu->log_offset));
+ total = nr_samples * log_entry_size;
ret = malloc(total + sizeof(*pdu));
ret->nr_samples = nr_samples;
ret->compressed = le32_to_cpu(ret->compressed);
ret->log_offset = le32_to_cpu(ret->log_offset);
ret->log_prio = le32_to_cpu(ret->log_prio);
+ ret->log_issue_time = le32_to_cpu(ret->log_issue_time);
ret->log_hist_coarseness = le32_to_cpu(ret->log_hist_coarseness);
ret->per_job_logs = le32_to_cpu(ret->per_job_logs);
for (i = 0; i < ret->nr_samples; i++) {
struct io_sample *s;
- s = __get_sample(samples, ret->log_offset, i);
+ s = __get_sample(samples, ret->log_offset, ret->log_issue_time, i);
if (ret->log_type == IO_LOG_TYPE_HIST)
s = (struct io_sample *)((char *)s + sizeof(struct io_u_plat_entry) * i);
s->aux[IOS_AUX_OFFSET_INDEX] =
le64_to_cpu(s->aux[IOS_AUX_OFFSET_INDEX]);
+ if (ret->log_issue_time)
+ s->aux[IOS_AUX_ISSUE_TIME_INDEX] =
+ le64_to_cpu(s->aux[IOS_AUX_ISSUE_TIME_INDEX]);
+
if (ret->log_type == IO_LOG_TYPE_HIST) {
s->data.plat_entry = (struct io_u_plat_entry *)(((char *)s) + sizeof(*s));
s->data.plat_entry->list.next = NULL;
.log_type = IO_LOG_TYPE_LAT,
.log_offset = o->log_offset,
.log_prio = o->log_prio,
+ .log_issue_time = o->log_issue_time,
.log_gz = o->log_gz,
.log_gz_store = o->log_gz_store,
};
const char *pre = make_log_name(o->lat_log_file, o->name);
const char *suf;
+ if (o->log_issue_time && !o->log_offset) {
+ log_err("fio: log_issue_time option requires write_lat_log and log_offset options\n");
+ goto err;
+ }
+
if (p.log_gz_store)
suf = "log.fz";
else
setup_log(&td->clat_log, &p, logname);
}
+ } else if (o->log_issue_time) {
+ log_err("fio: log_issue_time option requires write_lat_log and log_offset options\n");
+ goto err;
}
if (o->write_hist_log) {
.log_type = IO_LOG_TYPE_HIST,
.log_offset = o->log_offset,
.log_prio = o->log_prio,
+ .log_issue_time = o->log_issue_time,
.log_gz = o->log_gz,
.log_gz_store = o->log_gz_store,
};
.log_type = IO_LOG_TYPE_BW,
.log_offset = o->log_offset,
.log_prio = o->log_prio,
+ .log_issue_time = o->log_issue_time,
.log_gz = o->log_gz,
.log_gz_store = o->log_gz_store,
};
.log_type = IO_LOG_TYPE_IOPS,
.log_offset = o->log_offset,
.log_prio = o->log_prio,
+ .log_issue_time = o->log_issue_time,
.log_gz = o->log_gz,
.log_gz_store = o->log_gz_store,
};
l->log_type = p->log_type;
l->log_offset = p->log_offset;
l->log_prio = p->log_prio;
+ l->log_issue_time = p->log_issue_time;
l->log_gz = p->log_gz;
l->log_gz_store = p->log_gz_store;
l->avg_msec = p->avg_msec;
if (l->td && l->td->o.log_max == IO_LOG_SAMPLE_BOTH)
l->log_ddir_mask |= LOG_AVG_MAX_SAMPLE_BIT;
+ if (l->log_issue_time)
+ l->log_ddir_mask |= LOG_ISSUE_TIME_SAMPLE_BIT;
+
INIT_FLIST_HEAD(&l->chunk_list);
if (l->log_gz && !p->td)
uint64_t sample_size)
{
struct io_sample *s;
- int log_offset;
+ bool log_offset, log_issue_time;
uint64_t i, j, nr_samples;
struct io_u_plat_entry *entry, *entry_before;
uint64_t *io_u_plat;
if (!sample_size)
return;
- s = __get_sample(samples, 0, 0);
+ s = __get_sample(samples, 0, 0, 0);
log_offset = (s->__ddir & LOG_OFFSET_SAMPLE_BIT) != 0;
+ log_issue_time = (s->__ddir & LOG_ISSUE_TIME_SAMPLE_BIT) != 0;
- nr_samples = sample_size / __log_entry_sz(log_offset);
+ nr_samples = sample_size / __log_entry_sz(log_offset, log_issue_time);
for (i = 0; i < nr_samples; i++) {
- s = __get_sample(samples, log_offset, i);
+ s = __get_sample(samples, log_offset, log_issue_time, i);
entry = s->data.plat_entry;
io_u_plat = entry->io_u_plat;
void flush_samples(FILE *f, void *samples, uint64_t sample_size)
{
struct io_sample *s;
- bool log_offset, log_prio, log_avg_max;
+ bool log_offset, log_prio, log_avg_max, log_issue_time;
uint64_t i, nr_samples;
char buf[256];
char *p;
if (!sample_size)
return;
- s = __get_sample(samples, 0, 0);
+ s = __get_sample(samples, 0, 0, 0);
log_offset = (s->__ddir & LOG_OFFSET_SAMPLE_BIT) != 0;
log_prio = (s->__ddir & LOG_PRIO_SAMPLE_BIT) != 0;
log_avg_max = (s->__ddir & LOG_AVG_MAX_SAMPLE_BIT) != 0;
+ log_issue_time = (s->__ddir & LOG_ISSUE_TIME_SAMPLE_BIT) != 0;
- nr_samples = sample_size / __log_entry_sz(log_offset);
+ nr_samples = sample_size / __log_entry_sz(log_offset, log_issue_time);
for (i = 0; i < nr_samples; i++) {
- s = __get_sample(samples, log_offset, i);
+ s = __get_sample(samples, log_offset, log_issue_time, i);
p = buf;
left = sizeof(buf);
if (ret)
return;
+ if (log_issue_time) {
+ ret = print_sample_fields(&p, &left, ", %llu",
+ (unsigned long long) s->aux[IOS_AUX_ISSUE_TIME_INDEX]);
+ if (ret)
+ return;
+ }
+
fprintf(f, "%s\n", buf);
}
}
*/
enum {
IOS_AUX_OFFSET_INDEX,
+ IOS_AUX_ISSUE_TIME_INDEX,
};
enum {
*/
unsigned int log_prio;
+ /*
+ * Log I/O issuing time
+ */
+ unsigned int log_issue_time;
+
/*
* Max size of log entries before a chunk is compressed
*/
* If the bit following prioity sample vit is set, we report both avg and max
*/
#define LOG_AVG_MAX_SAMPLE_BIT 0x20000000U
+/*
+ * If the bit following AVG_MAX_SAMPLE_BIT is set, we report the issue time also
+ */
+#define LOG_ISSUE_TIME_SAMPLE_BIT 0x10000000U
#define LOG_SAMPLE_BITS (LOG_OFFSET_SAMPLE_BIT | LOG_PRIO_SAMPLE_BIT |\
- LOG_AVG_MAX_SAMPLE_BIT)
+ LOG_AVG_MAX_SAMPLE_BIT |\
+ LOG_ISSUE_TIME_SAMPLE_BIT)
#define io_sample_ddir(io) ((io)->__ddir & ~LOG_SAMPLE_BITS)
static inline void io_sample_set_ddir(struct io_log *log,
io->__ddir = ddir | log->log_ddir_mask;
}
-static inline size_t __log_entry_sz(bool log_offset)
+static inline size_t __log_entry_sz(bool log_offset, bool log_issue_time)
{
size_t ret = sizeof(struct io_sample);
if (log_offset)
ret += sizeof(uint64_t);
+ if (log_issue_time)
+ ret += sizeof(uint64_t);
+
return ret;
}
static inline size_t log_entry_sz(struct io_log *log)
{
- return __log_entry_sz(log->log_offset);
+ return __log_entry_sz(log->log_offset, log->log_issue_time);
}
static inline size_t log_sample_sz(struct io_log *log, struct io_logs *cur_log)
return cur_log->nr_samples * log_entry_sz(log);
}
-static inline struct io_sample *__get_sample(void *samples, int log_offset,
+static inline struct io_sample *__get_sample(void *samples, bool log_offset,
+ bool log_issue_time,
uint64_t sample)
{
- uint64_t sample_offset = sample * __log_entry_sz(log_offset);
+ uint64_t sample_offset = sample *
+ __log_entry_sz(log_offset, log_issue_time);
return (struct io_sample *) ((char *) samples + sample_offset);
}
struct io_logs *cur_log,
uint64_t sample)
{
- return __get_sample(cur_log->log, iolog->log_offset, sample);
+ return __get_sample(cur_log->log,
+ iolog->log_offset, iolog->log_issue_time, sample);
}
enum {
int log_type;
int log_offset;
int log_prio;
+ int log_issue_time;
int log_gz;
int log_gz_store;
int log_compress;
.category = FIO_OPT_C_LOG,
.group = FIO_OPT_G_INVALID,
},
+ {
+ .name = "log_issue_time",
+ .lname = "Log IO issue time",
+ .type = FIO_OPT_BOOL,
+ .off1 = offsetof(struct thread_options, log_issue_time),
+ .help = "Include IO issue time for each log entry",
+ .def = "0",
+ .category = FIO_OPT_C_LOG,
+ .group = FIO_OPT_G_INVALID,
+ },
#ifdef CONFIG_ZLIB
{
.name = "log_compression",
if (log->log_offset)
s->aux[IOS_AUX_OFFSET_INDEX] =
cpu_to_le64(s->aux[IOS_AUX_OFFSET_INDEX]);
+
+ if (log->log_issue_time)
+ s->aux[IOS_AUX_ISSUE_TIME_INDEX] =
+ cpu_to_le64(s->aux[IOS_AUX_ISSUE_TIME_INDEX]);
}
}
};
enum {
- FIO_SERVER_VER = 106,
+ FIO_SERVER_VER = 107,
FIO_SERVER_MAX_FRAGMENT_PDU = 1024,
FIO_SERVER_MAX_CMD_MB = 2048,
uint32_t compressed;
uint32_t log_offset;
uint32_t log_prio;
+ uint32_t log_issue_time;
uint32_t log_hist_coarseness;
uint32_t per_job_logs;
uint8_t name[FIO_NET_NAME_MAX];
uint64_t bs;
uint64_t offset;
uint16_t priority;
+ uint64_t issue_time;
};
struct fio_sem *stat_sem;
if (iolog->log_offset)
s->aux[IOS_AUX_OFFSET_INDEX] = sample->offset;
+ if (iolog->log_issue_time)
+ s->aux[IOS_AUX_ISSUE_TIME_INDEX] = sample->issue_time;
+
cur_log->nr_samples++;
return;
}
* had actual samples done.
*/
if (iolog->avg_window[ddir].samples) {
- struct log_sample sample = { {{ 0, 0 }}, ddir, 0, 0, 0 };
+ struct log_sample sample = { {{ 0, 0 }}, ddir, 0, 0, 0, 0 };
union io_sample_data *d = &sample.data;
if (log_max == IO_LOG_SAMPLE_AVG) {
unsigned long long bs)
{
struct io_log *iolog;
- struct log_sample sample = { data, ddir, bs, 0, 0 };
+ struct log_sample sample = { data, ddir, bs, 0, 0, 0 };
if (!ddir_rw(ddir))
return;
if (td->clat_log) {
struct log_sample sample = { sample_val(nsec), ddir, bs,
- offset, ioprio };
+ offset, ioprio,
+ ntime_since(&td->epoch, &io_u->issue_time) };
add_log_sample(td, td->clat_log, &sample);
}
uint64_t *io_u_plat;
struct io_u_plat_entry *dst;
struct log_sample sample = { {{ 0, 0 }}, ddir, bs,
- offset, ioprio };
+ offset, ioprio, 0 };
/*
* Make a byte-for-byte copy of the latency histogram
if (td->slat_log) {
struct log_sample sample = { sample_val(nsec), ddir,
- io_u->xfer_buflen, io_u->offset, io_u->ioprio };
+ io_u->xfer_buflen, io_u->offset, io_u->ioprio,
+ ntime_since(&td->epoch, &io_u->issue_time) };
add_log_sample(td, td->slat_log, &sample);
}
if (td->lat_log) {
struct log_sample sample = { sample_val(nsec), ddir, bs,
- io_u->offset, io_u->ioprio };
+ io_u->offset, io_u->ioprio, 0 };
add_log_sample(td, td->lat_log, &sample);
}
if (td->bw_log) {
struct log_sample sample = { sample_val(rate), io_u->ddir,
- bytes, io_u->offset, io_u->ioprio };
+ bytes, io_u->offset, io_u->ioprio, 0 };
add_log_sample(td, td->bw_log, &sample);
}
if (log) {
struct log_sample sample = {
- sample_val(rate), ddir, 0, 0, 0 };
+ sample_val(rate), ddir, 0, 0, 0, 0 };
if (td->o.min_bs[ddir] == td->o.max_bs[ddir])
sample.bs = td->o.min_bs[ddir];
if (td->iops_log) {
struct log_sample sample = { sample_val(1), io_u->ddir, bytes,
- io_u->offset, io_u->ioprio };
+ io_u->offset, io_u->ioprio, 0 };
add_log_sample(td, td->iops_log, &sample);
}
unsigned int log_entries;
unsigned int log_prio;
+ unsigned int log_issue_time;
};
#define FIO_TOP_STR_MAX 256
uint32_t log_entries;
uint32_t log_prio;
+ uint32_t log_issue_time;
uint32_t fdp;
uint32_t dp_type;