Add logging for queue submit and complet counts
authorJens Axboe <jens.axboe@oracle.com>
Thu, 22 May 2008 11:08:23 +0000 (13:08 +0200)
committerJens Axboe <jens.axboe@oracle.com>
Thu, 22 May 2008 11:08:23 +0000 (13:08 +0200)
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
HOWTO
engines/guasi.c
engines/libaio.c
engines/null.c
engines/sync.c
fio.c
fio.h
io_u.c
ioengines.c
stat.c

diff --git a/HOWTO b/HOWTO
index e4fccb5..15ee367 100644 (file)
--- a/HOWTO
+++ b/HOWTO
@@ -824,6 +824,8 @@ Client1 (g=0): err= 0:
     bw (KiB/s) : min=    0, max= 1196, per=51.00%, avg=664.02, stdev=681.68
   cpu        : usr=1.49%, sys=0.25%, ctx=7969, majf=0, minf=17
   IO depths    : 1=0.1%, 2=0.3%, 4=0.5%, 8=99.0%, 16=0.0%, 32=0.0%, >32=0.0%
+     submit    : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
+     complete  : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
      issued r/w: total=0/32768, short=0/0
      lat (msec): 2=1.6%, 4=0.0%, 10=3.2%, 20=12.8%, 50=38.4%, 100=24.8%,
      lat (msec): 250=15.2%, 500=0.0%, 750=0.0%, 1000=0.0%, >=2048=0.0%
@@ -861,6 +863,11 @@ IO depths= The distribution of io depths over the job life time. The
                16= entries includes depths up to that value but higher
                than the previous entry. In other words, it covers the
                range from 16 to 31.
+IO submit=     How many pieces of IO were submitting in a single submit
+               call. Each entry denotes that amount and below, until
+               the previous entry - eg, 8=100% mean that we submitted
+               anywhere in between 5-8 ios per submit call.
+IO complete=   Like the above submit number, but for completions instead.
 IO issued=     The number of read/write requests issued, and how many
                of them were short.
 IO latencies=  The distribution of IO completion latencies. This is the
index e0bcaf4..b65061e 100644 (file)
@@ -141,6 +141,7 @@ static void fio_guasi_queued(struct thread_data *td, struct io_u **io_us, int nr
        struct io_u *io_u;
        struct timeval now;
 
+       io_u_mark_submit(td, nr);
        fio_gettime(&now, NULL);
        for (i = 0; i < nr; i++) {
                io_u = io_us[i];
index 7ef3ff8..7e69c23 100644 (file)
@@ -144,13 +144,16 @@ static int fio_libaio_commit(struct thread_data *td)
                ret = io_submit(ld->aio_ctx, ld->iocbs_nr, iocbs);
                if (ret > 0) {
                        fio_libaio_queued(td, io_us, ret);
+                       io_u_mark_submit(td, ret);
                        ld->iocbs_nr -= ret;
                        io_us += ret;
                        iocbs += ret;
                        ret = 0;
-               } else if (!ret || ret == -EAGAIN || ret == -EINTR)
+               } else if (!ret || ret == -EAGAIN || ret == -EINTR) {
+                       if (!ret)
+                               io_u_mark_submit(td, ret);
                        continue;
-               else
+               else
                        break;
        } while (ld->iocbs_nr);
 
index 85d6847..eb80501 100644 (file)
@@ -46,6 +46,7 @@ static int fio_null_commit(struct thread_data *td)
        struct null_data *nd = td->io_ops->data;
 
        if (!nd->events) {
+               io_u_mark_submit(td, nd->queued);
                nd->events = nd->queued;
                nd->queued = 0;
        }
index 46cb864..e966846 100644 (file)
@@ -230,6 +230,7 @@ static int fio_vsyncio_commit(struct thread_data *td)
        if (!sd->queued)
                return 0;
 
+       io_u_mark_submit(td, sd->queued);
        f = sd->last_file;
 
        if (lseek(f->fd, sd->io_us[0]->offset, SEEK_SET) == -1) {
diff --git a/fio.c b/fio.c
index c79fad8..90bad97 100644 (file)
--- a/fio.c
+++ b/fio.c
@@ -412,10 +412,10 @@ sync_done:
                 */
                min_events = 0;
                if (queue_full(td) || ret == FIO_Q_BUSY) {
-                       min_events = 1;
-
-                       if (td->cur_depth > td->o.iodepth_low)
+                       if (td->cur_depth >= td->o.iodepth_low)
                                min_events = td->cur_depth - td->o.iodepth_low;
+                       if (!min_events)
+                               min_events = 1;
                }
 
                /*
@@ -545,16 +545,12 @@ sync_done:
                /*
                 * See if we need to complete some commands
                 */
-               if (ret == FIO_Q_QUEUED || ret == FIO_Q_BUSY) {
+               if (queue_full(td) || ret == FIO_Q_BUSY) {
                        min_evts = 0;
-                       if (queue_full(td) || ret == FIO_Q_BUSY) {
+                       if (td->cur_depth >= td->o.iodepth_low)
+                               min_evts = td->cur_depth - td->o.iodepth_low;
+                       if (!min_evts)
                                min_evts = 1;
-
-                               if (td->cur_depth > td->o.iodepth_low)
-                                       min_evts = td->cur_depth
-                                                       - td->o.iodepth_low;
-                       }
-
                        fio_gettime(&comp_time, NULL);
                        bytes_done = io_u_queued_complete(td, min_evts);
                        if (bytes_done < 0)
diff --git a/fio.h b/fio.h
index 25d05bd..472fe93 100644 (file)
--- a/fio.h
+++ b/fio.h
@@ -378,10 +378,14 @@ struct thread_stat {
         * IO depth and latency stats
         */
        unsigned int io_u_map[FIO_IO_U_MAP_NR];
+       unsigned int io_u_submit[FIO_IO_U_MAP_NR];
+       unsigned int io_u_complete[FIO_IO_U_MAP_NR];
        unsigned int io_u_lat_u[FIO_IO_U_LAT_U_NR];
        unsigned int io_u_lat_m[FIO_IO_U_LAT_M_NR];
        unsigned long total_io_u[2];
        unsigned long short_io_u[2];
+       unsigned long total_submit;
+       unsigned long total_complete;
 
        unsigned long long io_bytes[2];
        unsigned long runtime[2];
@@ -909,6 +913,8 @@ extern void io_u_init_timeout(void);
 extern void io_u_set_timeout(struct thread_data *);
 extern void io_u_mark_depth(struct thread_data *, unsigned int);
 extern void io_u_fill_buffer(struct thread_data *td, struct io_u *, unsigned int);
+void io_u_mark_complete(struct thread_data *, unsigned int);
+void io_u_mark_submit(struct thread_data *, unsigned int);
 
 /*
  * io engine entry points
diff --git a/io_u.c b/io_u.c
index 06fb3ce..ce08c59 100644 (file)
--- a/io_u.c
+++ b/io_u.c
@@ -422,6 +422,47 @@ out:
        return 0;
 }
 
+static void __io_u_mark_map(unsigned int *map, unsigned int nr)
+{
+       int index = 0;
+
+       switch (nr) {
+       default:
+               index = 6;
+               break;
+       case 33 ... 64:
+               index = 5;
+               break;
+       case 17 ... 32:
+               index = 4;
+               break;
+       case 9 ... 16:
+               index = 3;
+               break;
+       case 5 ... 8:
+               index = 2;
+               break;
+       case 1 ... 4:
+               index = 1;
+       case 0:
+               break;
+       }
+
+       map[index]++;
+}
+
+void io_u_mark_submit(struct thread_data *td, unsigned int nr)
+{
+       __io_u_mark_map(td->ts.io_u_submit, nr);
+       td->ts.total_submit++;
+}
+
+void io_u_mark_complete(struct thread_data *td, unsigned int nr)
+{
+       __io_u_mark_map(td->ts.io_u_complete, nr);
+       td->ts.total_complete++;
+}
+
 void io_u_mark_depth(struct thread_data *td, unsigned int nr)
 {
        int index = 0;
index 9e1c556..fb68763 100644 (file)
@@ -198,6 +198,8 @@ int td_io_getevents(struct thread_data *td, unsigned int min, unsigned int max,
        if (td->io_ops->getevents)
                r = td->io_ops->getevents(td, min, max, t);
 out:
+       if (r >= 0)
+               io_u_mark_complete(td, r);
        dprint(FD_IO, "getevents: %d\n", r);
        return r;
 }
@@ -237,6 +239,11 @@ int td_io_queue(struct thread_data *td, struct io_u *io_u)
 
        unlock_file(td, io_u->file);
 
+       if (!td->io_ops->commit) {
+               io_u_mark_submit(td, 1);
+               io_u_mark_complete(td, 1);
+       }
+
        if (ret == FIO_Q_COMPLETED) {
                if (io_u->ddir != DDIR_SYNC) {
                        io_u_mark_depth(td, 1);
diff --git a/stat.c b/stat.c
index e3e084e..fe6acb8 100644 (file)
--- a/stat.c
+++ b/stat.c
@@ -124,7 +124,8 @@ static void show_group_stats(struct group_run_stats *rs, int id)
 #define ts_total_io_u(ts)      \
        ((ts)->total_io_u[0] + (ts)->total_io_u[1])
 
-static void stat_calc_dist(struct thread_stat *ts, double *io_u_dist)
+static void stat_calc_dist(unsigned int *map, unsigned long total,
+                          double *io_u_dist)
 {
        int i;
 
@@ -132,27 +133,33 @@ static void stat_calc_dist(struct thread_stat *ts, double *io_u_dist)
         * Do depth distribution calculations
         */
        for (i = 0; i < FIO_IO_U_MAP_NR; i++) {
-               io_u_dist[i] = (double) ts->io_u_map[i]
-                                       / (double) ts_total_io_u(ts);
-               io_u_dist[i] *= 100.0;
-               if (io_u_dist[i] < 0.1 && ts->io_u_map[i])
-                       io_u_dist[i] = 0.1;
+               if (total) {
+                       io_u_dist[i] = (double) map[i] / (double) total;
+                       io_u_dist[i] *= 100.0;
+                       if (io_u_dist[i] < 0.1 && map[i])
+                               io_u_dist[i] = 0.1;
+               } else
+                       io_u_dist[i] = 0.0;
        }
 }
 
 static void stat_calc_lat(struct thread_stat *ts, double *dst,
                          unsigned int *src, int nr)
 {
+       unsigned long total = ts_total_io_u(ts);
        int i;
 
        /*
         * Do latency distribution calculations
         */
        for (i = 0; i < nr; i++) {
-               dst[i] = (double) src[i] / (double) ts_total_io_u(ts);
-               dst[i] *= 100.0;
-               if (dst[i] < 0.01 && src[i])
-                       dst[i] = 0.01;
+               if (total) {
+                       dst[i] = (double) src[i] / (double) total;
+                       dst[i] *= 100.0;
+                       if (dst[i] < 0.01 && src[i])
+                               dst[i] = 0.01;
+               } else
+                       dst[i] = 0.0;
        }
 }
 
@@ -342,19 +349,30 @@ static void show_thread_status(struct thread_stat *ts,
        log_info("  cpu          : usr=%3.2f%%, sys=%3.2f%%, ctx=%lu, majf=%lu,"
                 " minf=%lu\n", usr_cpu, sys_cpu, ts->ctx, ts->majf, ts->minf);
 
-       stat_calc_dist(ts, io_u_dist);
-       stat_calc_lat_u(ts, io_u_lat_u);
-       stat_calc_lat_m(ts, io_u_lat_m);
-
+       stat_calc_dist(ts->io_u_map, ts_total_io_u(ts), io_u_dist);
        log_info("  IO depths    : 1=%3.1f%%, 2=%3.1f%%, 4=%3.1f%%, 8=%3.1f%%,"
                 " 16=%3.1f%%, 32=%3.1f%%, >=64=%3.1f%%\n", io_u_dist[0],
                                        io_u_dist[1], io_u_dist[2],
                                        io_u_dist[3], io_u_dist[4],
                                        io_u_dist[5], io_u_dist[6]);
+
+       stat_calc_dist(ts->io_u_submit, ts->total_submit, io_u_dist);
+       log_info("     submit    : 0=%3.1f%%, 4=%3.1f%%, 8=%3.1f%%, 16=%3.1f%%,"
+                " 32=%3.1f%%, 64=%3.1f%%, >=64=%3.1f%%\n", io_u_dist[0],
+                                       io_u_dist[1], io_u_dist[2],
+                                       io_u_dist[3], io_u_dist[4],
+                                       io_u_dist[5], io_u_dist[6]);
+       stat_calc_dist(ts->io_u_complete, ts->total_complete, io_u_dist);
+       log_info("     complete  : 0=%3.1f%%, 4=%3.1f%%, 8=%3.1f%%, 16=%3.1f%%,"
+                " 32=%3.1f%%, 64=%3.1f%%, >=64=%3.1f%%\n", io_u_dist[0],
+                                       io_u_dist[1], io_u_dist[2],
+                                       io_u_dist[3], io_u_dist[4],
+                                       io_u_dist[5], io_u_dist[6]);
        log_info("     issued r/w: total=%lu/%lu, short=%lu/%lu\n",
                                        ts->total_io_u[0], ts->total_io_u[1],
                                        ts->short_io_u[0], ts->short_io_u[1]);
-
+       stat_calc_lat_u(ts, io_u_lat_u);
+       stat_calc_lat_m(ts, io_u_lat_m);
        show_latencies(io_u_lat_u, io_u_lat_m);
 }
 
@@ -419,7 +437,7 @@ static void show_thread_status_terse(struct thread_stat *ts,
        log_info(";%f%%;%f%%;%lu;%lu;%lu", usr_cpu, sys_cpu, ts->ctx, ts->majf,
                                                                ts->minf);
 
-       stat_calc_dist(ts, io_u_dist);
+       stat_calc_dist(ts->io_u_map, ts_total_io_u(ts), io_u_dist);
        stat_calc_lat_u(ts, io_u_lat_u);
        stat_calc_lat_m(ts, io_u_lat_m);
 
@@ -569,6 +587,10 @@ void show_run_stats(void)
 
                for (k = 0; k < FIO_IO_U_MAP_NR; k++)
                        ts->io_u_map[k] += td->ts.io_u_map[k];
+               for (k = 0; k < FIO_IO_U_MAP_NR; k++)
+                       ts->io_u_submit[k] += td->ts.io_u_submit[k];
+               for (k = 0; k < FIO_IO_U_MAP_NR; k++)
+                       ts->io_u_complete[k] += td->ts.io_u_complete[k];
                for (k = 0; k < FIO_IO_U_LAT_U_NR; k++)
                        ts->io_u_lat_u[k] += td->ts.io_u_lat_u[k];
                for (k = 0; k < FIO_IO_U_LAT_M_NR; k++)
@@ -581,6 +603,8 @@ void show_run_stats(void)
                }
 
                ts->total_run_time += td->ts.total_run_time;
+               ts->total_submit += td->ts.total_submit;
+               ts->total_complete += td->ts.total_complete;
        }
 
        for (i = 0; i < nr_ts; i++) {