iowatcher: Convert start_mpstat to run_program
[blktrace.git] / iowatcher / blkparse.c
index 7d32f40b54729fa5ae9282b104970ea5b14c3178..f837f39de9727c0add5f292d340432c338b01308 100644 (file)
@@ -12,7 +12,7 @@
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  *  Parts of this file were imported from Jens Axboe's blktrace sources (also GPL)
  */
@@ -30,6 +30,7 @@
 #include <sys/mman.h>
 #include <time.h>
 #include <math.h>
+#include <dirent.h>
 
 #include "plot.h"
 #include "blkparse.h"
@@ -158,6 +159,9 @@ struct pending_io {
        /* sector offset of this IO */
        u64 sector;
 
+       /* dev_t for this IO */
+       u32 device;
+
        /* time this IO was dispatched */
        u64 dispatch_time;
        /* time this IO was finished */
@@ -174,12 +178,10 @@ struct pid_map {
        char name[0];
 };
 
-#define MINORBITS 20
-#define MINORMASK ((1 << MINORBITS) - 1)
-#define SECONDS(x)              ((unsigned long long)(x) / 1000000000)
-#define NANO_SECONDS(x)         ((unsigned long long)(x) % 1000000000)
-#define DOUBLE_TO_NANO_ULL(d)   ((unsigned long long)((d) * 1000000000))
-#define CHECK_MAGIC(t)          (((t)->magic & 0xffffff00) == BLK_IO_TRACE_MAGIC)
+u64 get_record_time(struct trace *trace)
+{
+       return trace->io->time;
+}
 
 void init_io_hash_table(void)
 {
@@ -219,20 +221,21 @@ static inline u64 hash_sector(u64 val)
 static int io_hash_table_insert(struct pending_io *ins_pio)
 {
        u64 sector = ins_pio->sector;
+       u32 dev = ins_pio->device;
        int slot = hash_sector(sector);
        struct list_head *head;
        struct pending_io *pio;
 
        head = io_hash_table + slot;
        list_for_each_entry(pio, head, hash_list) {
-               if (pio->sector == sector)
+               if (pio->sector == sector && pio->device == dev)
                        return -EEXIST;
        }
        list_add_tail(&ins_pio->hash_list, head);
        return 0;
 }
 
-static struct pending_io *io_hash_table_search(u64 sector)
+static struct pending_io *io_hash_table_search(u64 sector, u32 dev)
 {
        int slot = hash_sector(sector);
        struct list_head *head;
@@ -240,48 +243,50 @@ static struct pending_io *io_hash_table_search(u64 sector)
 
        head = io_hash_table + slot;
        list_for_each_entry(pio, head, hash_list) {
-               if (pio->sector == sector)
+               if (pio->sector == sector && pio->device == dev)
                        return pio;
        }
        return NULL;
 }
 
-static int hash_queued_io(struct blk_io_trace *io)
+static struct pending_io *hash_queued_io(struct blk_io_trace *io)
 {
        struct pending_io *pio;
        int ret;
 
        pio = calloc(1, sizeof(*pio));
        pio->sector = io->sector;
+       pio->device = io->device;
        pio->pid = io->pid;
 
        ret = io_hash_table_insert(pio);
        if (ret < 0) {
                /* crud, the IO is there already */
                free(pio);
-               return ret;
+               return NULL;
        }
-       return 0;
+       return pio;
 }
 
-static int hash_dispatched_io(struct blk_io_trace *io)
+static struct pending_io *hash_dispatched_io(struct blk_io_trace *io)
 {
        struct pending_io *pio;
 
-       pio = io_hash_table_search(io->sector);
+       pio = io_hash_table_search(io->sector, io->device);
        if (!pio) {
-               /* crud, the IO isn't here */
-               return -EEXIST;
+               pio = hash_queued_io(io);
+               if (!pio)
+                       return NULL;
        }
        pio->dispatch_time = io->time;
-       return 0;
+       return pio;
 }
 
 static struct pending_io *hash_completed_io(struct blk_io_trace *io)
 {
        struct pending_io *pio;
 
-       pio = io_hash_table_search(io->sector);
+       pio = io_hash_table_search(io->sector, io->device);
 
        if (!pio)
                return NULL;
@@ -512,6 +517,66 @@ out:
        return -1;
 }
 
+static struct dev_info *lookup_dev(struct trace *trace, struct blk_io_trace *io)
+{
+       u32 dev = io->device;
+       int i;
+       struct dev_info *di = NULL;
+
+       for (i = 0; i < trace->num_devices; i++) {
+               if (trace->devices[i].device == dev) {
+                       di = trace->devices + i;
+                       goto found;
+               }
+       }
+       i = trace->num_devices++;
+       if (i >= MAX_DEVICES_PER_TRACE) {
+               fprintf(stderr, "Trace contains too many devices (%d)\n", i);
+               exit(1);
+       }
+       di = trace->devices + i;
+       di->device = dev;
+found:
+       return di;
+}
+
+static void map_devices(struct trace *trace)
+{
+       struct dev_info *di;
+       u64 found;
+       u64 map_start = 0;
+       int i;
+
+       first_record(trace);
+       while (1) {
+               if (!(trace->io->action & BLK_TC_ACT(BLK_TC_NOTIFY))) {
+                       di = lookup_dev(trace, trace->io);
+                       found = trace->io->sector << 9;
+                       if (found < di->min)
+                               di->min = found;
+
+                       found += trace->io->bytes;
+                       if (di->max < found)
+                               di->max = found;
+               }
+               if (next_record(trace))
+                       break;
+       }
+       first_record(trace);
+       for (i = 0; i < trace->num_devices; i++) {
+               di = trace->devices + i;
+               di->map = map_start;
+               map_start += di->max - di->min;
+       }
+}
+
+u64 map_io(struct trace *trace, struct blk_io_trace *io)
+{
+       struct dev_info *di = lookup_dev(trace, io);
+       u64 val = trace->io->sector << 9;
+       return di->map + val - di->min;
+}
+
 void find_extreme_offsets(struct trace *trace, u64 *min_ret, u64 *max_ret, u64 *max_bank_ret,
                          u64 *max_offset_ret)
 {
@@ -520,10 +585,13 @@ void find_extreme_offsets(struct trace *trace, u64 *min_ret, u64 *max_ret, u64 *
        u64 max_bank = 0;
        u64 max_bank_offset = 0;
        u64 num_banks = 0;
+
+       map_devices(trace);
+
        first_record(trace);
        while (1) {
                if (!(trace->io->action & BLK_TC_ACT(BLK_TC_NOTIFY))) {
-                       found = trace->io->sector << 9;
+                       found = map_io(trace, trace->io);
                        if (found < min)
                                min = found;
 
@@ -551,6 +619,27 @@ void find_extreme_offsets(struct trace *trace, u64 *min_ret, u64 *max_ret, u64 *
        *max_offset_ret = max_bank_offset;
 }
 
+static void check_io_types(struct trace *trace)
+{
+       struct blk_io_trace *io = trace->io;
+       int action = io->action & BLK_TA_MASK;
+
+       if (!(io->action & BLK_TC_ACT(BLK_TC_NOTIFY))) {
+               switch (action) {
+               case __BLK_TA_COMPLETE:
+                       trace->found_completion = 1;
+                       break;
+               case __BLK_TA_ISSUE:
+                       trace->found_issue = 1;
+                       break;
+               case __BLK_TA_QUEUE:
+                       trace->found_queue = 1;
+                       break;
+               };
+       }
+}
+
+
 int filter_outliers(struct trace *trace, u64 min_offset, u64 max_offset,
                    u64 *yzoom_min, u64 *yzoom_max)
 {
@@ -566,16 +655,17 @@ int filter_outliers(struct trace *trace, u64 min_offset, u64 max_offset,
        memset(min_per_bucket, 0xff, sizeof(u64) * 11);
        first_record(trace);
        while (1) {
+               check_io_types(trace);
                if (!(trace->io->action & BLK_TC_ACT(BLK_TC_NOTIFY)) &&
                    (trace->io->action & BLK_TA_MASK) == __BLK_TA_QUEUE) {
-                       u64 off = (trace->io->sector << 9) - min_offset;
+                       u64 off = map_io(trace, trace->io) - min_offset;
 
                        slot = (int)(off / bytes_per_bucket);
                        hits[slot]++;
                        if (off < min_per_bucket[slot])
                                min_per_bucket[slot] = off;
 
-                       off += trace->io->bytes;
+                       off += trace->io->bytes;
                        slot = (int)(off / bytes_per_bucket);
                        hits[slot]++;
                        if (off > max_per_bucket[slot])
@@ -613,41 +703,197 @@ int filter_outliers(struct trace *trace, u64 min_offset, u64 max_offset,
        return 0;
 }
 
+static char footer[] = ".blktrace.0";
+static int footer_len = sizeof(footer) - 1;
+
+static int match_trace(char *name, int *len)
+{
+       int match_len;
+       int footer_start;
+
+       match_len = strlen(name);
+       if (match_len <= footer_len)
+               return 0;
+
+       footer_start = match_len - footer_len;
+       if (strcmp(name + footer_start, footer) != 0)
+               return 0;
+
+       if (len)
+               *len = match_len;
+       return 1;
+}
+
+struct tracelist {
+       struct tracelist *next;
+       char *name;
+};
+
+static struct tracelist *traces_list(char *dir_name, int *len)
+{
+       int count = 0;
+       struct tracelist *traces = NULL;
+       int dlen = strlen(dir_name);
+       DIR *dir = opendir(dir_name);
+       if (!dir)
+               return NULL;
+
+       while (1) {
+               int n = 0;
+               struct tracelist *tl;
+               struct dirent *d = readdir(dir);
+               if (!d)
+                       break;
+
+               if (!match_trace(d->d_name, &n))
+                       continue;
+
+               n += dlen + 1; /* dir + '/' + file */
+               /* Allocate space for tracelist + filename */
+               tl = calloc(1, sizeof(struct tracelist) + (sizeof(char) * (n + 1)));
+               if (!tl)
+                       return NULL;
+               tl->next = traces;
+               tl->name = (char *)(tl + 1);
+               snprintf(tl->name, n, "%s/%s", dir_name, d->d_name);
+               traces = tl;
+               count++;
+       }
+
+       closedir(dir);
+
+       if (len)
+               *len = count;
+
+       return traces;
+}
+
+static void traces_free(struct tracelist *traces)
+{
+       while (traces) {
+               struct tracelist *tl = traces;
+               traces = traces->next;
+               free(tl);
+       }
+}
+
+static int dump_traces(struct tracelist *traces, int count, char *dumpfile)
+{
+       struct tracelist *tl;
+       char **argv = NULL;
+       int argc = 0;
+       int i;
+       int err = 0;
+
+       argc = count * 2; /* {"-i", trace } */
+       argc += 4; /* See below */
+       argv = calloc(argc + 1, sizeof(char *));
+       if (!argv)
+               return -errno;
+
+       i = 0;
+       argv[i++] = "blkparse";
+       argv[i++] = "-O";
+       argv[i++] = "-d";
+       argv[i++] = dumpfile;
+       for (tl = traces; tl != NULL; tl = tl->next) {
+               argv[i++] = "-i";
+               argv[i++] = tl->name;
+       }
+
+       err = run_program(argc, argv, 1, NULL, NULL);
+       if (err)
+               fprintf(stderr, "%s exited with %d, expected 0\n", argv[0], err);
+       free(argv);
+       return err;
+}
+
 static char *find_trace_file(char *filename)
 {
        int ret;
        struct stat st;
-       char line[1024];
        char *dot;
-       char *try;
-
+       int found_dir = 0;
+       char *dumpfile;
+       int len = strlen(filename);
+
+       /* look for an exact match of whatever they pass in.
+        * If it is a file, assume it is the dump file.
+        * If a directory, remember that it existed so we
+        * can combine traces in that directory later
+        */
        ret = stat(filename, &st);
-       if (ret == 0)
-               return strdup(filename);
+       if (ret == 0) {
+               if (S_ISREG(st.st_mode))
+                       return strdup(filename);
+
+               if (S_ISDIR(st.st_mode))
+                       found_dir = 1;
+       }
 
-       snprintf(line, 1024, "%s.%s", filename, "dump");
-       ret = stat(line, &st);
+       if (found_dir) {
+               int i;
+               /* Eat up trailing '/'s */
+               for (i = len - 1; filename[i] == '/'; i--)
+                       filename[i] = '\0';
+       }
+
+       /*
+        * try tacking .dump onto the end and see if that already
+        * has been generated
+        */
+       ret = asprintf(&dumpfile, "%s.dump", filename);
+       if (ret == -1) {
+               perror("Error building dump file name");
+               return NULL;
+       }
+       ret = stat(dumpfile, &st);
        if (ret == 0)
-               return strdup(line);
+               return dumpfile;
+
+       /*
+        * try to generate the .dump from all the traces in
+        * a single dir.
+        */
+       if (found_dir) {
+               int count;
+               struct tracelist *traces = traces_list(filename, &count);
+               if (traces) {
+                       ret = dump_traces(traces, count, dumpfile);
+                       traces_free(traces);
+                       if (ret == 0)
+                               return dumpfile;
+               }
+       }
+       free(dumpfile);
 
-       try = strdup(filename);
-       dot = strrchr(try, '.');
+       /*
+        * try to generate the .dump from all the blktrace
+        * files for a named trace
+        */
+       dot = strrchr(filename, '.');
        if (!dot || strcmp(".dump", dot) != 0) {
-               if (dot && dot != try)
-                       *dot = '\0';
-               snprintf(line, 1024, "%s%s", try, ".blktrace.0");
-               ret = stat(line, &st);
+               struct tracelist trace = {0};
+               if (dot && dot != filename)
+                       len = dot - filename;
+
+               ret = asprintf(&trace.name, "%*s.blktrace.0", len, filename);
+               if (ret == -1)
+                       return NULL;
+               ret = asprintf(&dumpfile, "%*s.dump", len, filename);
+               if (ret == -1) {
+                       free(trace.name);
+                       return NULL;
+               }
+
+               ret = dump_traces(&trace, 1, dumpfile);
                if (ret == 0) {
-                       blktrace_to_dump(try);
-                       snprintf(line, 1024, "%s.%s", try, "dump");
-                       ret = stat(line, &st);
-                       if (ret == 0) {
-                               free(try);
-                               return strdup(line);
-                       }
+                       free(trace.name);
+                       return dumpfile;
                }
+               free(trace.name);
+               free(dumpfile);
        }
-       free(try);
        return NULL;
 }
 struct trace *open_trace(char *filename)
@@ -739,9 +985,11 @@ static inline int io_event(struct trace *trace)
        return __BLK_TA_COMPLETE;
 }
 
-void add_tput(struct trace *trace, struct graph_line_data *gld)
+void add_tput(struct trace *trace, struct graph_line_data *writes_gld,
+             struct graph_line_data *reads_gld)
 {
        struct blk_io_trace *io = trace->io;
+       struct graph_line_data *gld;
        int action = io->action & BLK_TA_MASK;
        int seconds;
 
@@ -751,11 +999,14 @@ void add_tput(struct trace *trace, struct graph_line_data *gld)
        if (action != tput_event(trace))
                return;
 
-       seconds = SECONDS(io->time);
-       if (seconds > gld->max_seconds)
-               return;
+       if (BLK_DATADIR(io->action) & BLK_TC_READ)
+               gld = reads_gld;
+       else
+               gld = writes_gld;
 
+       seconds = SECONDS(io->time);
        gld->data[seconds].sum += io->bytes;
+
        gld->data[seconds].count = 1;
        if (gld->data[seconds].sum > gld->max)
                gld->max = gld->data[seconds].sum;
@@ -811,7 +1062,7 @@ void add_io(struct trace *trace, struct trace_file *tf)
        if (action != io_event(trace))
                return;
 
-       offset = io->sector << 9;
+       offset = map_io(trace, io);
 
        pm = get_pid_map(tf, io->pid);
        if (!pm) {
@@ -834,32 +1085,40 @@ void add_io(struct trace *trace, struct trace_file *tf)
 
 void add_pending_io(struct trace *trace, struct graph_line_data *gld)
 {
-       int ret;
-       int seconds;
+       unsigned int seconds;
        struct blk_io_trace *io = trace->io;
        int action = io->action & BLK_TA_MASK;
        double avg;
+       struct pending_io *pio;
 
        if (io->action & BLK_TC_ACT(BLK_TC_NOTIFY))
                return;
 
        if (action == __BLK_TA_QUEUE) {
-               hash_queued_io(trace->io);
+               if (trace->found_issue || trace->found_completion)
+                       hash_queued_io(trace->io);
+               return;
+       }
+       if (action == __BLK_TA_REQUEUE) {
+               if (ios_in_flight > 0)
+                       ios_in_flight--;
                return;
        }
        if (action != __BLK_TA_ISSUE)
                return;
 
-       seconds = SECONDS(io->time);
-       if (seconds > gld->max_seconds)
+       pio = hash_dispatched_io(trace->io);
+       if (!pio)
                return;
 
-       ret = hash_dispatched_io(trace->io);
-       if (ret)
-               return;
+       if (!trace->found_completion) {
+               list_del(&pio->hash_list);
+               free(pio);
+       }
 
        ios_in_flight++;
 
+       seconds = SECONDS(io->time);
        gld->data[seconds].sum += ios_in_flight;
        gld->data[seconds].count++;
 
@@ -923,9 +1182,6 @@ void add_iop(struct trace *trace, struct graph_line_data *gld)
                return;
 
        seconds = SECONDS(io->time);
-       if (seconds > gld->max_seconds)
-               return;
-
        gld->data[seconds].sum += 1;
        gld->data[seconds].count = 1;
        if (gld->data[seconds].sum > gld->max)
@@ -934,21 +1190,5 @@ void add_iop(struct trace *trace, struct graph_line_data *gld)
 
 void check_record(struct trace *trace)
 {
-       struct blk_io_trace *io = trace->io;
-       int action = io->action & BLK_TA_MASK;
-
-       if (!(io->action & BLK_TC_ACT(BLK_TC_NOTIFY))) {
-               switch (action) {
-               case __BLK_TA_COMPLETE:
-                       trace->found_completion = 1;
-                       break;
-               case __BLK_TA_ISSUE:
-                       trace->found_issue = 1;
-                       break;
-               case __BLK_TA_QUEUE:
-                       trace->found_queue = 1;
-                       break;
-               };
-       }
        handle_notify(trace);
 }