[PATCH] Added iostat-style statistics to btt
authorAlan David Brunelle <Alan.Brunelle@hp.com>
Thu, 21 Sep 2006 07:16:27 +0000 (09:16 +0200)
committerJens Axboe <axboe@kernel.dk>
Thu, 21 Sep 2006 07:16:27 +0000 (09:16 +0200)
Added iostat-style statistics: rrqm/s, wrqm/s, r/s, w/s, rsec/s, wsec/s,
rkB/s, wkB/s, avgrq-sz, avgqu-sz, await, svctm, %util.

Signed-off-by: Alan D. Brunelle <Alan.Brunelle@hp.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
btt/Makefile
btt/args.c
btt/bt_timeline.c
btt/devs.c
btt/globals.h
btt/iostat.c [new file with mode: 0644]
btt/trace.c

index 86d70f060aaf58795f8cfe5d960336842ec2a066..b86e8282221eb4ddfb8934b66e8c48ed5d7f1dfb 100644 (file)
@@ -8,7 +8,7 @@ PROGS   = btt
 #PLIBS = -lpthread
 LIBS   = $(PLIBS) $(ELIBS)
 OBJS   = bt_timeline.o args.o output.o proc.o trace.o misc.o devs.o \
-         traverse.o iofree.o devmap.o cylist.o seek.o
+         traverse.o iofree.o devmap.o cylist.o seek.o iostat.o
 
 all: depend $(PROGS)
 
index bed39c8c7ba7bc5dfb5c80ebd0e144fee8b6226e..a198da26fc9b3b9dc2f46fceffc5230522233603 100644 (file)
@@ -28,7 +28,7 @@
 
 #include "globals.h"
 
-#define S_OPTS "d:D:e:hlmM:i:o:s:Vv"
+#define S_OPTS "d:D:e:hi:I:lmM:o:s:S:Vv"
 static struct option l_opts[] = {
        {
                .name = "range-delta",
@@ -54,6 +54,18 @@ static struct option l_opts[] = {
                .flag = NULL,
                .val = 'h'
        },
+       {
+               .name = "input-file",
+               .has_arg = required_argument,
+               .flag = NULL,
+               .val = 'i'
+       },
+       {
+               .name = "iostat",
+               .has_arg = required_argument,
+               .flag = NULL,
+               .val = 'I'
+       },
        {
                .name = "lvm",
                .has_arg = no_argument,
@@ -72,12 +84,6 @@ static struct option l_opts[] = {
                .flag = NULL,
                .val = 'M'
        },
-       {
-               .name = "input-file",
-               .has_arg = required_argument,
-               .flag = NULL,
-               .val = 'i'
-       },
        {
                .name = "output-file",
                .has_arg = required_argument,
@@ -90,6 +96,12 @@ static struct option l_opts[] = {
                .flag = NULL,
                .val = 's'
        },
+       {
+               .name = "iostat-interval",
+               .has_arg = required_argument,
+               .flag = NULL,
+               .val = 'S'
+       },
        {
                .name = "version",
                .has_arg = no_argument,
@@ -112,9 +124,12 @@ static char usage_str[] = \
        "[ -e <exe,...>     | --exes=<exe,...>  ]\n" \
        "[ -h               | --help ]\n" \
        "[ -i <input name>  | --input-file=<input name> ]\n" \
+       "[ -I <output name> | --iostat=<output name> ]\n" \
        "(-l | -m)          | (--lvm | -md)\n" \
+       "[ -M <dev map>     | --dev-maps=<dev map>\n" \
        "[ -o <output name> | --output-file=<output name> ]\n" \
        "[ -s <output name> | --seeks=<output name> ]\n" \
+       "[ -S <interval>    | --iostat-interval=<interval> ]\n" \
        "[ -V               | --version ]\n" \
        "[ -v               | --verbose ]\n\n";
 
@@ -146,6 +161,9 @@ void handle_args(int argc, char *argv[])
                case 'i':
                        input_name = optarg;
                        break;
+               case 'I':
+                       iostat_name = optarg;
+                       break;
                case 'l':
                        is_lvm = 1;
                        break;
@@ -161,6 +179,12 @@ void handle_args(int argc, char *argv[])
                case 's':
                        seek_name = optarg;
                        break;
+               case 'S': {
+                       unsigned int interval;
+                       sscanf(optarg, "%u", &interval);
+                       iostat_interval = (__u64)interval * (1000 * 1000 * 1000);
+                       break;
+               }
                case 'v':
                        verbose = 1;
                        break;
@@ -212,4 +236,12 @@ void handle_args(int argc, char *argv[])
 
                free(fname);
        }
+
+       if (iostat_name != NULL) {
+               iostat_ofp = fopen(iostat_name, "w");
+               if (iostat_ofp == NULL) {
+                       perror(iostat_name);
+                       exit(1);
+               }
+       }
 }
index 1c3216db66101e50f3dc37bc968f5153a5280c0e..ce5662e230ae8a1cc588a54e567551fca29528a5 100644 (file)
@@ -65,6 +65,7 @@ int main(int argc, char *argv[])
        handle_args(argc, argv);
 
        cy_init();
+       iostat_init();
        if (process() || output_avgs(avgs_ofp) || output_ranges(ranges_ofp))
                return 1;
 
@@ -92,6 +93,11 @@ int process(void)
        }
        IO_FREE(iop);
 
+       if (iostat_ofp) {
+               fprintf(iostat_ofp, "\n");
+               iostat_dump_stats(iostat_last_stamp, 1);
+       }
+
        if (verbose)
                printf("\n%10lu traces, %10lu mallocs %1lu frees\n",
                                       n_traces, n_io_allocs, n_io_frees);
index 90d44fa988772d6542d591be004e01e8d0a9298e..2801804f550aaeed03651ee9e00c670c83c53fec 100644 (file)
@@ -69,6 +69,8 @@ struct d_info *dip_add(__u32 device, struct io *iop)
                init_region(&dip->regions);
                dip->map = dev_map_find(device);
                dip->seek_handle = seeki_init(device);
+               memset(&dip->stats, 0, sizeof(dip->stats));
+               memset(&dip->all_stats, 0, sizeof(dip->all_stats));
 
                if (dev_heads == NULL) init_dev_heads();
                list_add_tail(&dip->hash_head, &dev_heads[DEV_HASH(device)]);
index 7bc89ef918da7ffc7c998f131c030e48f303518e..dcb25a66ddd05430684e545471ecbfb322e3c3be 100644 (file)
@@ -32,6 +32,7 @@
 #define BIT_START(iop) ((iop)->t.sector)
 #define BIT_END(iop)   ((iop)->t.sector + ((iop)->t.bytes >> 9))
 #define IOP_READ(iop)  ((iop)->t.action & BLK_TC_ACT(BLK_TC_READ))
+#define IOP_RW(iop)    (IOP_READ(iop) ? 1 : 0)
 
 #if defined(DEBUG)
 #define ASSERT(truth)   do {                                           \
@@ -135,6 +136,17 @@ struct devmap {
        char devno[32];
 };
 
+struct stats {
+       __u64 rqm[2];
+       __u64 ios[2];
+       __u64 sec[2];
+       __u64 wait;
+       __u64 svctm;
+
+       int cur_qusz, cur_dev;
+       double last_qu_change, last_dev_change, tot_qusz, idle_time;
+};
+
 struct d_info {
        struct list_head head, hash_head;
        struct list_head iop_heads[N_IOP_TYPES];
@@ -145,6 +157,7 @@ struct d_info {
        __u64 n_ds;
        struct devmap *map;
        void *seek_handle;
+       struct stats stats, all_stats;
 };
 
 struct io {
@@ -191,9 +204,9 @@ struct io {
 };
 
 extern char bt_timeline_version[], *devices, *exes, *input_name, *output_name;
-extern char *seek_name;
+extern char *seek_name, *iostat_name;
 extern double range_delta;
-extern FILE *ranges_ofp, *avgs_ofp;
+extern FILE *ranges_ofp, *avgs_ofp, *iostat_ofp;
 extern int is_lvm, verbose, ifd;
 extern unsigned int n_devs;
 extern unsigned long n_traces, n_io_allocs, n_io_frees;
@@ -204,9 +217,11 @@ extern struct region_info all_regions;
 extern struct my_mem *free_ios, *free_bits;
 extern char iop_map[];
 extern unsigned int pending_xs;
+extern __u64 iostat_interval, iostat_last_stamp;
 
 void add_trace(struct io *iop);
 int in_devices(struct blk_io_trace *t);
+char *make_dev_hdr(char *pad, size_t len, struct d_info *dip);
 int output_avgs(FILE *ofp);
 int output_ranges(FILE *ofp);
 unsigned int do_read(int ifd, void *buf, int len);
@@ -231,4 +246,12 @@ long long seeki_nseeks(void *handle);
 long long seeki_median(void *handle);
 int seeki_mode(void *handle, long long **modes_p, int *nseeks_p);
 
+void iostat_init(void);
+void iostat_insert(struct io *iop);
+void iostat_merge(struct io *iop);
+void iostat_issue(struct io *iop);
+void iostat_complete(struct io *iop);
+void iostat_check_time(__u64 stamp);
+void iostat_dump_stats(__u64 stamp, int all);
+
 #include "inlines.h"
diff --git a/btt/iostat.c b/btt/iostat.c
new file mode 100644 (file)
index 0000000..4f375dc
--- /dev/null
@@ -0,0 +1,247 @@
+/*
+ * blktrace output analysis: generate a timeline & gather statistics
+ *
+ * Copyright (C) 2006 Alan D. Brunelle <Alan.Brunelle@hp.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include "globals.h"
+
+#define TO_SEC(nanosec)                ((double)(nanosec) / 1.0e9)
+#define TO_MSEC(nanosec)       (1000.0 * TO_SEC(nanosec))
+
+#define INC_STAT(dip, fld)                                             \
+       do {                                                            \
+               (dip)->stats. fld ++;                                   \
+               (dip)->all_stats. fld ++;                               \
+       } while (0)
+
+#define DEC_STAT(dip, fld)                                             \
+       do {                                                            \
+               (dip)->stats. fld --;                                   \
+               (dip)->all_stats. fld --;                               \
+       } while (0)
+
+#define ADD_STAT(dip, fld, val)                                        \
+        do {                                                           \
+                __u64 __v = (val);                                     \
+               (dip)->stats. fld += __v;                               \
+               (dip)->all_stats. fld += __v;                           \
+       } while (0)
+
+#define SUB_STAT(dip, fld, val)                                        \
+        do {                                                           \
+                __u64 __v = (val);                                     \
+               (dip)->stats. fld -= __v;                               \
+               (dip)->all_stats. fld -= __v;                           \
+       } while (0)
+
+__u64 last_start, iostat_last_stamp;
+__u64 iostat_interval = 1;
+char *iostat_name = NULL;
+FILE *iostat_ofp = NULL;
+
+void calc_waits(struct io *d_iop, __u64 c_time)
+{
+       __u64 waits = 0;
+       struct list_head *p;
+       struct io_list *iolp;
+
+       __list_for_each(p, &d_iop->u.d.d_im_head) {
+               iolp = list_entry(p, struct io_list, head);
+               waits += (c_time - iolp->iop->t.time);
+       }
+       ADD_STAT(d_iop->dip, wait, waits);
+}
+
+void dump_hdr(void)
+{
+       fprintf(iostat_ofp, "Device:       rrqm/s   wrqm/s     r/s     w/s    "
+                           "rsec/s    wsec/s     rkB/s     wkB/s "
+                           "avgrq-sz avgqu-sz   await   svctm  %%util   Stamp\n");
+}
+
+void iostat_init(void)
+{
+       last_start = (__u64)-1;
+       if (iostat_ofp)
+               dump_hdr();
+}
+
+void update_tot_qusz(struct d_info *dip, double now)
+{
+       dip->stats.tot_qusz += ((now - dip->stats.last_qu_change) *
+                                               dip->stats.cur_qusz);
+       dip->all_stats.tot_qusz += ((now - dip->all_stats.last_qu_change) *
+                                               dip->all_stats.cur_qusz);
+
+       dip->stats.last_qu_change = dip->all_stats.last_qu_change = now;
+}
+
+void update_idle_time(struct d_info *dip, double now)
+{
+       if (dip->stats.cur_dev == 0) {
+               dip->stats.idle_time += (now - dip->stats.last_dev_change);
+               dip->all_stats.idle_time += 
+                                      (now - dip->all_stats.last_dev_change);
+       }
+       dip->stats.last_dev_change = dip->all_stats.last_dev_change = now;
+}
+
+void __dump_stats(__u64 stamp, int all, struct d_info *dip)
+{
+       char hdr[16];
+       struct stats *sp;
+       double dt, nios, avgrq_sz, p_util, nrqm;
+       double now = TO_SEC(stamp);
+
+       if (all) {
+               dt = (double)stamp / 1.0e9;
+               sp = &dip->all_stats;
+       }
+       else {
+               dt = (double)(stamp-last_start) / 1.0e9;
+               sp = &dip->stats;
+       }
+
+       nios = (double)(sp->ios[0] + sp->ios[1]);
+       nrqm = (double)(sp->rqm[0] + sp->rqm[1]);
+       if (nios > 0.0) {
+               update_idle_time(dip, now);
+               update_tot_qusz(dip, now);
+               avgrq_sz = (double)(sp->sec[0] + sp->sec[1]) / nios;
+               p_util = 100.0 * (1.0 - (sp->idle_time / dt));
+
+               /*
+                * For AWAIT: nios should be the same as number of inserts
+                * and we add in nrqm (number of merges), which should give
+                * us the total number of IOs sent to the block IO layer.
+                */
+               fprintf(iostat_ofp, "%-11s ", make_dev_hdr(hdr, 11, dip));
+               fprintf(iostat_ofp, "%8.2lf ", (double)sp->rqm[1] / dt);
+               fprintf(iostat_ofp, "%8.2lf ", (double)sp->rqm[0] / dt);
+               fprintf(iostat_ofp, "%7.2lf ", (double)sp->ios[1] / dt);
+               fprintf(iostat_ofp, "%7.2lf ", (double)sp->ios[0] / dt);
+               fprintf(iostat_ofp, "%9.2lf ", (double)sp->sec[1] / dt);
+               fprintf(iostat_ofp, "%9.2lf ", (double)sp->sec[0] / dt);
+               fprintf(iostat_ofp, "%9.2lf ", (double)(sp->sec[1] / 2) / dt);
+               fprintf(iostat_ofp, "%9.2lf ", (double)(sp->sec[0] / 2) / dt);
+               fprintf(iostat_ofp, "%8.2lf ", avgrq_sz);
+               fprintf(iostat_ofp, "%8.2lf ", (double)sp->tot_qusz / dt);
+               fprintf(iostat_ofp, "%7.2lf ", TO_MSEC(sp->wait) / (nios+nrqm));
+               fprintf(iostat_ofp, "%7.2lf ", TO_MSEC(sp->svctm) / nios);
+               fprintf(iostat_ofp, "%6.2lf", p_util);
+               if (all)
+                       fprintf(iostat_ofp, "%8s\n", "TOTAL");
+               else {
+                       fprintf(iostat_ofp, "%8.2lf\n", TO_SEC(stamp));
+                       sp->rqm[0] = sp->rqm[1] = 0;
+                       sp->ios[0] = sp->ios[1] = 0;
+                       sp->sec[0] = sp->sec[1] = 0;
+                       sp->wait = sp->svctm = 0;
+
+                       sp->tot_qusz = sp->idle_time = 0.0;
+               }
+       }
+}
+
+void iostat_dump_stats(__u64 stamp, int all)
+{
+       struct d_info *dip;
+
+       if (all)
+               dump_hdr();
+       if (devices == NULL) {
+               struct list_head *p;
+
+               __list_for_each(p, &all_devs) {
+                       dip = list_entry(p, struct d_info, head);
+                       __dump_stats(stamp, all, dip);
+               }
+       }
+       else {
+               int i;
+               unsigned int mjr, mnr;
+               char *p = devices;
+
+               while (p && ((i = sscanf(p, "%u,%u", &mjr, &mnr)) == 2)) {
+                       dip = __dip_find((__u32)((mjr << MINORBITS) | mnr));
+                       __dump_stats(stamp, all, dip);
+
+                       p = strchr(p, ';');
+                       if (p) p++;
+               }
+       }
+}
+
+void iostat_check_time(__u64 stamp)
+{
+       if (iostat_ofp) {
+               if (last_start == (__u64)-1)
+                       last_start = stamp;
+               else if ((stamp - last_start) >= iostat_interval) {
+                       iostat_dump_stats(stamp, 0);
+                       last_start = stamp;
+               }
+
+               iostat_last_stamp = stamp;
+       }
+}
+
+void iostat_insert(struct io *iop)
+{
+       update_tot_qusz(iop->dip, TO_SEC(iop->t.time));
+       INC_STAT(iop->dip, cur_qusz);
+}
+
+void iostat_merge(struct io *iop)
+{
+       INC_STAT(iop->dip, rqm[IOP_RW(iop)]);
+}
+
+void iostat_issue(struct io *iop)
+{
+       int rw = IOP_RW(iop);
+       struct d_info *dip = iop->dip;
+       double now = TO_SEC(iop->t.time);
+
+       INC_STAT(dip, ios[rw]);
+       ADD_STAT(dip, sec[rw], iop->t.bytes >> 9);
+
+       update_idle_time(dip, now);
+       INC_STAT(dip, cur_dev);
+}
+
+void iostat_complete(struct io *iop)
+{
+       struct io *d_iop;
+       struct d_info *dip = iop->dip;
+
+       if ((d_iop = iop->u.c.c_d) != NULL) {
+               double now = TO_SEC(iop->t.time);
+               calc_waits(d_iop, iop->t.time);
+
+               update_tot_qusz(dip, now);
+               DEC_STAT(dip, cur_qusz);
+
+               update_idle_time(dip, now);
+               DEC_STAT(dip, cur_dev);
+
+               ADD_STAT(dip, svctm, iop->t.time - d_iop->t.time);
+       }
+}
index 076100cd32ff45b80d2b92b4fdcc71002bee0f9f..5250232aab9390f5d764f96e6bdce42bd42ba891 100644 (file)
@@ -165,6 +165,8 @@ void handle_merge(struct io *iop)
        q_iop = dip_find_exact(dip_get_head(iop->dip, IOP_Q), iop);
        if (q_iop)
                io_link(&iop->u.m.m_q, q_iop);
+
+       iostat_merge(iop);
 }
 
 void handle_insert(struct io *iop)
@@ -174,6 +176,8 @@ void handle_insert(struct io *iop)
        io_setup(iop, IOP_I);
        INIT_LIST_HEAD(&iop->u.i.i_qs_head);
        dip_add_qs(dip_get_head(iop->dip, IOP_Q), iop);
+
+       iostat_insert(iop);
 }
 
 void handle_complete(struct io *iop)
@@ -190,7 +194,7 @@ void handle_complete(struct io *iop)
        d_iop = dip_find_exact(dip_get_head(iop->dip, IOP_D), iop);
        if (d_iop) {
                io_link(&iop->u.c.c_d, d_iop);
-
+               iostat_complete(iop);
                add_cy(iop);
        }
        else
@@ -214,6 +218,8 @@ void handle_issue(struct io *iop)
 
        dip_add_ms(dip_get_head(iop->dip, IOP_M), iop);
        seeki_add(iop->dip->seek_handle, iop);
+
+       iostat_issue(iop);
 }
 
 void handle_split(struct io *iop)
@@ -310,6 +316,7 @@ void __add_trace(struct io *iop)
 {
        n_traces++;
 
+       iostat_check_time(iop->t.time);
        if (verbose && (n_traces % 10000) == 0) {
                printf("%10lu t, %10lu m %1lu f\r",
                       n_traces, n_io_allocs, n_io_frees);