bw (KiB/s) : min= 0, max= 1196, per=51.00%, avg=664.02, stdev=681.68
cpu : usr=1.49%, sys=0.25%, ctx=7969, majf=0, minf=17
IO depths : 1=0.1%, 2=0.3%, 4=0.5%, 8=99.0%, 16=0.0%, 32=0.0%, >32=0.0%
+ submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
+ complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
issued r/w: total=0/32768, short=0/0
lat (msec): 2=1.6%, 4=0.0%, 10=3.2%, 20=12.8%, 50=38.4%, 100=24.8%,
lat (msec): 250=15.2%, 500=0.0%, 750=0.0%, 1000=0.0%, >=2048=0.0%
16= entries includes depths up to that value but higher
than the previous entry. In other words, it covers the
range from 16 to 31.
+IO submit= How many pieces of IO were submitting in a single submit
+ call. Each entry denotes that amount and below, until
+ the previous entry - eg, 8=100% mean that we submitted
+ anywhere in between 5-8 ios per submit call.
+IO complete= Like the above submit number, but for completions instead.
IO issued= The number of read/write requests issued, and how many
of them were short.
IO latencies= The distribution of IO completion latencies. This is the
struct io_u *io_u;
struct timeval now;
+ io_u_mark_submit(td, nr);
fio_gettime(&now, NULL);
for (i = 0; i < nr; i++) {
io_u = io_us[i];
ret = io_submit(ld->aio_ctx, ld->iocbs_nr, iocbs);
if (ret > 0) {
fio_libaio_queued(td, io_us, ret);
+ io_u_mark_submit(td, ret);
ld->iocbs_nr -= ret;
io_us += ret;
iocbs += ret;
ret = 0;
- } else if (!ret || ret == -EAGAIN || ret == -EINTR)
+ } else if (!ret || ret == -EAGAIN || ret == -EINTR) {
+ if (!ret)
+ io_u_mark_submit(td, ret);
continue;
- else
+ } else
break;
} while (ld->iocbs_nr);
struct null_data *nd = td->io_ops->data;
if (!nd->events) {
+ io_u_mark_submit(td, nd->queued);
nd->events = nd->queued;
nd->queued = 0;
}
if (!sd->queued)
return 0;
+ io_u_mark_submit(td, sd->queued);
f = sd->last_file;
if (lseek(f->fd, sd->io_us[0]->offset, SEEK_SET) == -1) {
*/
min_events = 0;
if (queue_full(td) || ret == FIO_Q_BUSY) {
- min_events = 1;
-
- if (td->cur_depth > td->o.iodepth_low)
+ if (td->cur_depth >= td->o.iodepth_low)
min_events = td->cur_depth - td->o.iodepth_low;
+ if (!min_events)
+ min_events = 1;
}
/*
/*
* See if we need to complete some commands
*/
- if (ret == FIO_Q_QUEUED || ret == FIO_Q_BUSY) {
+ if (queue_full(td) || ret == FIO_Q_BUSY) {
min_evts = 0;
- if (queue_full(td) || ret == FIO_Q_BUSY) {
+ if (td->cur_depth >= td->o.iodepth_low)
+ min_evts = td->cur_depth - td->o.iodepth_low;
+ if (!min_evts)
min_evts = 1;
-
- if (td->cur_depth > td->o.iodepth_low)
- min_evts = td->cur_depth
- - td->o.iodepth_low;
- }
-
fio_gettime(&comp_time, NULL);
bytes_done = io_u_queued_complete(td, min_evts);
if (bytes_done < 0)
* IO depth and latency stats
*/
unsigned int io_u_map[FIO_IO_U_MAP_NR];
+ unsigned int io_u_submit[FIO_IO_U_MAP_NR];
+ unsigned int io_u_complete[FIO_IO_U_MAP_NR];
unsigned int io_u_lat_u[FIO_IO_U_LAT_U_NR];
unsigned int io_u_lat_m[FIO_IO_U_LAT_M_NR];
unsigned long total_io_u[2];
unsigned long short_io_u[2];
+ unsigned long total_submit;
+ unsigned long total_complete;
unsigned long long io_bytes[2];
unsigned long runtime[2];
extern void io_u_set_timeout(struct thread_data *);
extern void io_u_mark_depth(struct thread_data *, unsigned int);
extern void io_u_fill_buffer(struct thread_data *td, struct io_u *, unsigned int);
+void io_u_mark_complete(struct thread_data *, unsigned int);
+void io_u_mark_submit(struct thread_data *, unsigned int);
/*
* io engine entry points
return 0;
}
+static void __io_u_mark_map(unsigned int *map, unsigned int nr)
+{
+ int index = 0;
+
+ switch (nr) {
+ default:
+ index = 6;
+ break;
+ case 33 ... 64:
+ index = 5;
+ break;
+ case 17 ... 32:
+ index = 4;
+ break;
+ case 9 ... 16:
+ index = 3;
+ break;
+ case 5 ... 8:
+ index = 2;
+ break;
+ case 1 ... 4:
+ index = 1;
+ case 0:
+ break;
+ }
+
+ map[index]++;
+}
+
+void io_u_mark_submit(struct thread_data *td, unsigned int nr)
+{
+ __io_u_mark_map(td->ts.io_u_submit, nr);
+ td->ts.total_submit++;
+}
+
+void io_u_mark_complete(struct thread_data *td, unsigned int nr)
+{
+ __io_u_mark_map(td->ts.io_u_complete, nr);
+ td->ts.total_complete++;
+}
+
void io_u_mark_depth(struct thread_data *td, unsigned int nr)
{
int index = 0;
if (td->io_ops->getevents)
r = td->io_ops->getevents(td, min, max, t);
out:
+ if (r >= 0)
+ io_u_mark_complete(td, r);
dprint(FD_IO, "getevents: %d\n", r);
return r;
}
unlock_file(td, io_u->file);
+ if (!td->io_ops->commit) {
+ io_u_mark_submit(td, 1);
+ io_u_mark_complete(td, 1);
+ }
+
if (ret == FIO_Q_COMPLETED) {
if (io_u->ddir != DDIR_SYNC) {
io_u_mark_depth(td, 1);
#define ts_total_io_u(ts) \
((ts)->total_io_u[0] + (ts)->total_io_u[1])
-static void stat_calc_dist(struct thread_stat *ts, double *io_u_dist)
+static void stat_calc_dist(unsigned int *map, unsigned long total,
+ double *io_u_dist)
{
int i;
* Do depth distribution calculations
*/
for (i = 0; i < FIO_IO_U_MAP_NR; i++) {
- io_u_dist[i] = (double) ts->io_u_map[i]
- / (double) ts_total_io_u(ts);
- io_u_dist[i] *= 100.0;
- if (io_u_dist[i] < 0.1 && ts->io_u_map[i])
- io_u_dist[i] = 0.1;
+ if (total) {
+ io_u_dist[i] = (double) map[i] / (double) total;
+ io_u_dist[i] *= 100.0;
+ if (io_u_dist[i] < 0.1 && map[i])
+ io_u_dist[i] = 0.1;
+ } else
+ io_u_dist[i] = 0.0;
}
}
static void stat_calc_lat(struct thread_stat *ts, double *dst,
unsigned int *src, int nr)
{
+ unsigned long total = ts_total_io_u(ts);
int i;
/*
* Do latency distribution calculations
*/
for (i = 0; i < nr; i++) {
- dst[i] = (double) src[i] / (double) ts_total_io_u(ts);
- dst[i] *= 100.0;
- if (dst[i] < 0.01 && src[i])
- dst[i] = 0.01;
+ if (total) {
+ dst[i] = (double) src[i] / (double) total;
+ dst[i] *= 100.0;
+ if (dst[i] < 0.01 && src[i])
+ dst[i] = 0.01;
+ } else
+ dst[i] = 0.0;
}
}
log_info(" cpu : usr=%3.2f%%, sys=%3.2f%%, ctx=%lu, majf=%lu,"
" minf=%lu\n", usr_cpu, sys_cpu, ts->ctx, ts->majf, ts->minf);
- stat_calc_dist(ts, io_u_dist);
- stat_calc_lat_u(ts, io_u_lat_u);
- stat_calc_lat_m(ts, io_u_lat_m);
-
+ stat_calc_dist(ts->io_u_map, ts_total_io_u(ts), io_u_dist);
log_info(" IO depths : 1=%3.1f%%, 2=%3.1f%%, 4=%3.1f%%, 8=%3.1f%%,"
" 16=%3.1f%%, 32=%3.1f%%, >=64=%3.1f%%\n", io_u_dist[0],
io_u_dist[1], io_u_dist[2],
io_u_dist[3], io_u_dist[4],
io_u_dist[5], io_u_dist[6]);
+
+ stat_calc_dist(ts->io_u_submit, ts->total_submit, io_u_dist);
+ log_info(" submit : 0=%3.1f%%, 4=%3.1f%%, 8=%3.1f%%, 16=%3.1f%%,"
+ " 32=%3.1f%%, 64=%3.1f%%, >=64=%3.1f%%\n", io_u_dist[0],
+ io_u_dist[1], io_u_dist[2],
+ io_u_dist[3], io_u_dist[4],
+ io_u_dist[5], io_u_dist[6]);
+ stat_calc_dist(ts->io_u_complete, ts->total_complete, io_u_dist);
+ log_info(" complete : 0=%3.1f%%, 4=%3.1f%%, 8=%3.1f%%, 16=%3.1f%%,"
+ " 32=%3.1f%%, 64=%3.1f%%, >=64=%3.1f%%\n", io_u_dist[0],
+ io_u_dist[1], io_u_dist[2],
+ io_u_dist[3], io_u_dist[4],
+ io_u_dist[5], io_u_dist[6]);
log_info(" issued r/w: total=%lu/%lu, short=%lu/%lu\n",
ts->total_io_u[0], ts->total_io_u[1],
ts->short_io_u[0], ts->short_io_u[1]);
-
+ stat_calc_lat_u(ts, io_u_lat_u);
+ stat_calc_lat_m(ts, io_u_lat_m);
show_latencies(io_u_lat_u, io_u_lat_m);
}
log_info(";%f%%;%f%%;%lu;%lu;%lu", usr_cpu, sys_cpu, ts->ctx, ts->majf,
ts->minf);
- stat_calc_dist(ts, io_u_dist);
+ stat_calc_dist(ts->io_u_map, ts_total_io_u(ts), io_u_dist);
stat_calc_lat_u(ts, io_u_lat_u);
stat_calc_lat_m(ts, io_u_lat_m);
for (k = 0; k < FIO_IO_U_MAP_NR; k++)
ts->io_u_map[k] += td->ts.io_u_map[k];
+ for (k = 0; k < FIO_IO_U_MAP_NR; k++)
+ ts->io_u_submit[k] += td->ts.io_u_submit[k];
+ for (k = 0; k < FIO_IO_U_MAP_NR; k++)
+ ts->io_u_complete[k] += td->ts.io_u_complete[k];
for (k = 0; k < FIO_IO_U_LAT_U_NR; k++)
ts->io_u_lat_u[k] += td->ts.io_u_lat_u[k];
for (k = 0; k < FIO_IO_U_LAT_M_NR; k++)
}
ts->total_run_time += td->ts.total_run_time;
+ ts->total_submit += td->ts.total_submit;
+ ts->total_complete += td->ts.total_complete;
}
for (i = 0; i < nr_ts; i++) {