iowatcher: Add bounds checking in find_step
[blktrace.git] / iowatcher / blkparse.c
index 675a7716babb5df57b0e728b0c75b4a2cad44413..d27b54779a017e6d66c2003fe8d5a80c7458db15 100644 (file)
@@ -12,7 +12,7 @@
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  *  Parts of this file were imported from Jens Axboe's blktrace sources (also GPL)
  */
@@ -30,6 +30,7 @@
 #include <sys/mman.h>
 #include <time.h>
 #include <math.h>
+#include <dirent.h>
 
 #include "plot.h"
 #include "blkparse.h"
 static struct list_head io_hash_table[IO_HASH_TABLE_SIZE];
 static u64 ios_in_flight = 0;
 
+#define PROCESS_HASH_TABLE_BITS 7
+#define PROCESS_HASH_TABLE_SIZE (1 << PROCESS_HASH_TABLE_BITS)
+static struct list_head process_hash_table[PROCESS_HASH_TABLE_SIZE];
+
+extern int plot_io_action;
+extern int io_per_process;
+
+static const int line_len = 1024;
+static char line[1024];
 
 /*
  * Trace categories
@@ -152,11 +162,23 @@ struct pending_io {
        /* sector offset of this IO */
        u64 sector;
 
+       /* dev_t for this IO */
+       u32 device;
+
        /* time this IO was dispatched */
        u64 dispatch_time;
        /* time this IO was finished */
        u64 completion_time;
        struct list_head hash_list;
+       /* process which queued this IO */
+       u32 pid;
+};
+
+struct pid_map {
+       struct list_head hash_list;
+       u32 pid;
+       int index;
+       char name[0];
 };
 
 #define MINORBITS 20
@@ -201,23 +223,24 @@ static inline u64 hash_sector(u64 val)
        return hash >> (64 - IO_HASH_TABLE_BITS);
 }
 
-static int hash_table_insert(struct pending_io *ins_pio)
+static int io_hash_table_insert(struct pending_io *ins_pio)
 {
        u64 sector = ins_pio->sector;
+       u32 dev = ins_pio->device;
        int slot = hash_sector(sector);
        struct list_head *head;
        struct pending_io *pio;
 
        head = io_hash_table + slot;
        list_for_each_entry(pio, head, hash_list) {
-               if (pio->sector == sector)
+               if (pio->sector == sector && pio->device == dev)
                        return -EEXIST;
        }
        list_add_tail(&ins_pio->hash_list, head);
        return 0;
 }
 
-static struct pending_io *hash_table_search(u64 sector)
+static struct pending_io *io_hash_table_search(u64 sector, u32 dev)
 {
        int slot = hash_sector(sector);
        struct list_head *head;
@@ -225,46 +248,132 @@ static struct pending_io *hash_table_search(u64 sector)
 
        head = io_hash_table + slot;
        list_for_each_entry(pio, head, hash_list) {
-               if (pio->sector == sector)
+               if (pio->sector == sector && pio->device == dev)
                        return pio;
        }
        return NULL;
 }
 
-static int hash_dispatched_io(struct blk_io_trace *io)
+static struct pending_io *hash_queued_io(struct blk_io_trace *io)
 {
        struct pending_io *pio;
        int ret;
 
        pio = calloc(1, sizeof(*pio));
        pio->sector = io->sector;
-       pio->dispatch_time = io->time;
+       pio->device = io->device;
+       pio->pid = io->pid;
 
-       ret = hash_table_insert(pio);
-       if (ret == -EEXIST) {
-               /* crud, the IO isn't here */
+       ret = io_hash_table_insert(pio);
+       if (ret < 0) {
+               /* crud, the IO is there already */
                free(pio);
+               return NULL;
+       }
+       return pio;
+}
+
+static struct pending_io *hash_dispatched_io(struct blk_io_trace *io)
+{
+       struct pending_io *pio;
+
+       pio = io_hash_table_search(io->sector, io->device);
+       if (!pio) {
+               pio = hash_queued_io(io);
+               if (!pio)
+                       return NULL;
        }
-       return ret;
+       pio->dispatch_time = io->time;
+       return pio;
 }
 
 static struct pending_io *hash_completed_io(struct blk_io_trace *io)
 {
        struct pending_io *pio;
 
-       pio = hash_table_search(io->sector);
+       pio = io_hash_table_search(io->sector, io->device);
 
        if (!pio)
                return NULL;
        return pio;
 }
 
+void init_process_hash_table(void)
+{
+       int i;
+       struct list_head *head;
+
+       for (i = 0; i < PROCESS_HASH_TABLE_SIZE; i++) {
+               head = process_hash_table + i;
+               INIT_LIST_HEAD(head);
+       }
+}
+
+static u32 hash_pid(u32 pid)
+{
+       u32 hash = pid;
+
+       hash ^= pid >> 3;
+       hash ^= pid >> 3;
+       hash ^= pid >> 4;
+       hash ^= pid >> 6;
+       return (hash & (PROCESS_HASH_TABLE_SIZE - 1));
+}
+
+static struct pid_map *process_hash_search(u32 pid)
+{
+       int slot = hash_pid(pid);
+       struct list_head *head;
+       struct pid_map *pm;
+
+       head = process_hash_table + slot;
+       list_for_each_entry(pm, head, hash_list) {
+               if (pm->pid == pid)
+                       return pm;
+       }
+       return NULL;
+}
+
+static struct pid_map *process_hash_insert(u32 pid, char *name)
+{
+       int slot = hash_pid(pid);
+       struct pid_map *pm;
+       int old_index = 0;
+       char buf[16];
+
+       pm = process_hash_search(pid);
+       if (pm) {
+               /* Entry exists and name shouldn't be changed? */
+               if (!name || !strcmp(name, pm->name))
+                       return pm;
+               list_del(&pm->hash_list);
+               old_index = pm->index;
+               free(pm);
+       }
+       if (!name) {
+               sprintf(buf, "[%u]", pid);
+               name = buf;
+       }
+       pm = malloc(sizeof(struct pid_map) + strlen(name) + 1);
+       pm->pid = pid;
+       pm->index = old_index;
+       strcpy(pm->name, name);
+       list_add_tail(&pm->hash_list, process_hash_table + slot);
+
+       return pm;
+}
+
 static void handle_notify(struct trace *trace)
 {
        struct blk_io_trace *io = trace->io;
        void *payload = (char *)io + sizeof(*io);
        u32 two32[2];
 
+       if (io->action == BLK_TN_PROCESS) {
+               if (io_per_process)
+                       process_hash_insert(io->pid, payload);
+               return;
+       }
 
        if (io->action != BLK_TN_TIMESTAMP)
                return;
@@ -413,6 +522,66 @@ out:
        return -1;
 }
 
+static struct dev_info *lookup_dev(struct trace *trace, struct blk_io_trace *io)
+{
+       u32 dev = io->device;
+       int i;
+       struct dev_info *di = NULL;
+
+       for (i = 0; i < trace->num_devices; i++) {
+               if (trace->devices[i].device == dev) {
+                       di = trace->devices + i;
+                       goto found;
+               }
+       }
+       i = trace->num_devices++;
+       if (i >= MAX_DEVICES_PER_TRACE) {
+               fprintf(stderr, "Trace contains too many devices (%d)\n", i);
+               exit(1);
+       }
+       di = trace->devices + i;
+       di->device = dev;
+found:
+       return di;
+}
+
+static void map_devices(struct trace *trace)
+{
+       struct dev_info *di;
+       u64 found;
+       u64 map_start = 0;
+       int i;
+
+       first_record(trace);
+       while (1) {
+               if (!(trace->io->action & BLK_TC_ACT(BLK_TC_NOTIFY))) {
+                       di = lookup_dev(trace, trace->io);
+                       found = trace->io->sector << 9;
+                       if (found < di->min)
+                               di->min = found;
+
+                       found += trace->io->bytes;
+                       if (di->max < found)
+                               di->max = found;
+               }
+               if (next_record(trace))
+                       break;
+       }
+       first_record(trace);
+       for (i = 0; i < trace->num_devices; i++) {
+               di = trace->devices + i;
+               di->map = map_start;
+               map_start += di->max - di->min;
+       }
+}
+
+u64 map_io(struct trace *trace, struct blk_io_trace *io)
+{
+       struct dev_info *di = lookup_dev(trace, io);
+       u64 val = trace->io->sector << 9;
+       return di->map + val - di->min;
+}
+
 void find_extreme_offsets(struct trace *trace, u64 *min_ret, u64 *max_ret, u64 *max_bank_ret,
                          u64 *max_offset_ret)
 {
@@ -421,10 +590,13 @@ void find_extreme_offsets(struct trace *trace, u64 *min_ret, u64 *max_ret, u64 *
        u64 max_bank = 0;
        u64 max_bank_offset = 0;
        u64 num_banks = 0;
+
+       map_devices(trace);
+
        first_record(trace);
        while (1) {
                if (!(trace->io->action & BLK_TC_ACT(BLK_TC_NOTIFY))) {
-                       found = trace->io->sector << 9;
+                       found = map_io(trace, trace->io);
                        if (found < min)
                                min = found;
 
@@ -452,6 +624,27 @@ void find_extreme_offsets(struct trace *trace, u64 *min_ret, u64 *max_ret, u64 *
        *max_offset_ret = max_bank_offset;
 }
 
+static void check_io_types(struct trace *trace)
+{
+       struct blk_io_trace *io = trace->io;
+       int action = io->action & BLK_TA_MASK;
+
+       if (!(io->action & BLK_TC_ACT(BLK_TC_NOTIFY))) {
+               switch (action) {
+               case __BLK_TA_COMPLETE:
+                       trace->found_completion = 1;
+                       break;
+               case __BLK_TA_ISSUE:
+                       trace->found_issue = 1;
+                       break;
+               case __BLK_TA_QUEUE:
+                       trace->found_queue = 1;
+                       break;
+               };
+       }
+}
+
+
 int filter_outliers(struct trace *trace, u64 min_offset, u64 max_offset,
                    u64 *yzoom_min, u64 *yzoom_max)
 {
@@ -467,16 +660,17 @@ int filter_outliers(struct trace *trace, u64 min_offset, u64 max_offset,
        memset(min_per_bucket, 0xff, sizeof(u64) * 11);
        first_record(trace);
        while (1) {
+               check_io_types(trace);
                if (!(trace->io->action & BLK_TC_ACT(BLK_TC_NOTIFY)) &&
                    (trace->io->action & BLK_TA_MASK) == __BLK_TA_QUEUE) {
-                       u64 off = (trace->io->sector << 9) - min_offset;
+                       u64 off = map_io(trace, trace->io) - min_offset;
 
                        slot = (int)(off / bytes_per_bucket);
                        hits[slot]++;
                        if (off < min_per_bucket[slot])
                                min_per_bucket[slot] = off;
 
-                       off += trace->io->bytes;
+                       off += trace->io->bytes;
                        slot = (int)(off / bytes_per_bucket);
                        hits[slot]++;
                        if (off > max_per_bucket[slot])
@@ -514,33 +708,139 @@ int filter_outliers(struct trace *trace, u64 min_offset, u64 max_offset,
        return 0;
 }
 
+static char footer[] = ".blktrace.0";
+static int footer_len = sizeof(footer);
+
+static void match_trace(char *name, char **traces)
+{
+       int match_len;
+       char *match;
+       int footer_start;
+
+       match_len = strlen(name);
+       if (match_len <= footer_len)
+               return;
+
+       footer_start = match_len - footer_len;
+       if (strcmp(name + footer_start + 1, footer) != 0)
+               return;
+
+       match = strdup(name);
+       if (!match)
+               goto enomem;
+
+       match[footer_start + 1] = '\0';
+       snprintf(line, line_len, "%s -i '%s'", *traces ? *traces : "", match);
+       free(match);
+
+       match = strdup(line);
+       if (!match)
+               goto enomem;
+
+       free(*traces);
+       *traces = match;
+       return;
+
+enomem:
+       perror("memory allocation failed");
+       exit(1);
+       return;
+}
+
+static char *combine_blktrace_devs(char *dir_name)
+{
+       DIR *dir;
+       char *traces = NULL;
+       struct dirent *d;
+       int len;
+       int ret;
+
+       dir = opendir(dir_name);
+       if (!dir)
+               return NULL;
+
+       while (1) {
+               d = readdir(dir);
+               if (!d)
+                       break;
+
+               len = strlen(d->d_name);
+               if (len > footer_len)
+                       match_trace(d->d_name, &traces);
+       }
+
+       closedir(dir);
+
+       if (!traces)
+               return NULL;
+
+       snprintf(line, line_len, "blkparse -O %s -D %s -d '%s.%s'",
+                traces, dir_name, dir_name, "dump");
+
+       ret = system(line);
+       if (ret) {
+               fprintf(stderr, "blkparse failure %s\n", line);
+               exit(1);
+       }
+       snprintf(line, line_len, "%s.%s", dir_name, "dump");
+       return strdup(line);
+}
+
 static char *find_trace_file(char *filename)
 {
        int ret;
        struct stat st;
-       char line[1024];
        char *dot;
        char *try;
+       int found_dir = 0;
 
+       /* look for an exact match of whatever they pass in.
+        * If it is a file, assume it is the dump file.
+        * If a directory, remember that it existed so we
+        * can combine traces in that directory later
+        */
        ret = stat(filename, &st);
-       if (ret == 0)
-               return strdup(filename);
+       if (ret == 0) {
+               if (S_ISREG(st.st_mode))
+                       return strdup(filename);
+
+               if (S_ISDIR(st.st_mode))
+                       found_dir = 1;
+       }
 
-       snprintf(line, 1024, "%s.%s", filename, "dump");
+       /*
+        * try tacking .dump onto the end and see if that already
+        * has been generated
+        */
+       snprintf(line, line_len, "%s.%s", filename, "dump");
        ret = stat(line, &st);
        if (ret == 0)
                return strdup(line);
 
+       /*
+        * try to generate the .dump from all the traces in
+        * a single dir.
+        */
+       if (found_dir) {
+               try = combine_blktrace_devs(filename);
+               if (try)
+                       return try;
+       }
+
+       /*
+        * try to generate the .dump from all the blktrace
+        * files for a named trace
+        */
        try = strdup(filename);
        dot = strrchr(try, '.');
        if (!dot || strcmp(".dump", dot) != 0) {
-               if (dot)
+               if (dot && dot != try)
                        *dot = '\0';
-               snprintf(line, 1024, "%s%s", try, ".blktrace.0");
+               snprintf(line, line_len, "%s%s", try, ".blktrace.0");
                ret = stat(line, &st);
                if (ret == 0) {
                        blktrace_to_dump(try);
-                       snprintf(line, 1024, "%s.%s", try, "dump");
+                       snprintf(line, line_len, "%s.%s", try, "dump");
                        ret = stat(line, &st);
                        if (ret == 0) {
                                free(try);
@@ -613,8 +913,23 @@ static inline int tput_event(struct trace *trace)
        return __BLK_TA_COMPLETE;
 }
 
+int action_char_to_num(char action)
+{
+       switch (action) {
+       case 'Q':
+               return __BLK_TA_QUEUE;
+       case 'D':
+               return __BLK_TA_ISSUE;
+       case 'C':
+               return __BLK_TA_COMPLETE;
+       }
+       return -1;
+}
+
 static inline int io_event(struct trace *trace)
 {
+       if (plot_io_action)
+               return plot_io_action;
        if (trace->found_queue)
                return __BLK_TA_QUEUE;
        if (trace->found_issue)
@@ -625,9 +940,11 @@ static inline int io_event(struct trace *trace)
        return __BLK_TA_COMPLETE;
 }
 
-void add_tput(struct trace *trace, struct graph_line_data *gld)
+void add_tput(struct trace *trace, struct graph_line_data *writes_gld,
+             struct graph_line_data *reads_gld)
 {
        struct blk_io_trace *io = trace->io;
+       struct graph_line_data *gld;
        int action = io->action & BLK_TA_MASK;
        int seconds;
 
@@ -637,24 +954,65 @@ void add_tput(struct trace *trace, struct graph_line_data *gld)
        if (action != tput_event(trace))
                return;
 
+       if (BLK_DATADIR(io->action) & BLK_TC_READ)
+               gld = reads_gld;
+       else
+               gld = writes_gld;
+
        seconds = SECONDS(io->time);
-       if (seconds > gld->max_seconds) {
-               fprintf(stderr, "Bad record %d %d %d\n", seconds, gld->max_seconds, action);
-               abort();
-       }
+       if (seconds > gld->max_seconds)
+               return;
 
        gld->data[seconds].sum += io->bytes;
+
        gld->data[seconds].count = 1;
        if (gld->data[seconds].sum > gld->max)
                gld->max = gld->data[seconds].sum;
 }
 
-void add_io(struct trace *trace, struct graph_dot_data *gdd_writes,
-           struct graph_dot_data *gdd_reads)
+#define GDD_PTR_ALLOC_STEP 16
+
+static struct pid_map *get_pid_map(struct trace_file *tf, u32 pid)
+{
+       struct pid_map *pm;
+
+       if (!io_per_process) {
+               if (!tf->io_plots)
+                       tf->io_plots = 1;
+               return NULL;
+       }
+
+       pm = process_hash_insert(pid, NULL);
+       /* New entry? */
+       if (!pm->index) {
+               if (tf->io_plots == tf->io_plots_allocated) {
+                       tf->io_plots_allocated += GDD_PTR_ALLOC_STEP;
+                       tf->gdd_reads = realloc(tf->gdd_reads, tf->io_plots_allocated * sizeof(struct graph_dot_data *));
+                       if (!tf->gdd_reads)
+                               abort();
+                       tf->gdd_writes = realloc(tf->gdd_writes, tf->io_plots_allocated * sizeof(struct graph_dot_data *));
+                       if (!tf->gdd_writes)
+                               abort();
+                       memset(tf->gdd_reads + tf->io_plots_allocated - GDD_PTR_ALLOC_STEP,
+                              0, GDD_PTR_ALLOC_STEP * sizeof(struct graph_dot_data *));
+                       memset(tf->gdd_writes + tf->io_plots_allocated - GDD_PTR_ALLOC_STEP,
+                              0, GDD_PTR_ALLOC_STEP * sizeof(struct graph_dot_data *));
+               }
+               pm->index = tf->io_plots++;
+
+               return pm;
+       }
+       return pm;
+}
+
+void add_io(struct trace *trace, struct trace_file *tf)
 {
        struct blk_io_trace *io = trace->io;
        int action = io->action & BLK_TA_MASK;
        u64 offset;
+       int index;
+       char *label;
+       struct pid_map *pm;
 
        if (io->action & BLK_TC_ACT(BLK_TC_NOTIFY))
                return;
@@ -662,38 +1020,59 @@ void add_io(struct trace *trace, struct graph_dot_data *gdd_writes,
        if (action != io_event(trace))
                return;
 
-       offset = io->sector << 9;
+       offset = map_io(trace, io);
 
-       if (BLK_DATADIR(io->action) & BLK_TC_READ)
-               set_gdd_bit(gdd_reads, offset, io->bytes, io->time);
-       else if (BLK_DATADIR(io->action) & BLK_TC_WRITE)
-               set_gdd_bit(gdd_writes, offset, io->bytes, io->time);
+       pm = get_pid_map(tf, io->pid);
+       if (!pm) {
+               index = 0;
+               label = "";
+       } else {
+               index = pm->index;
+               label = pm->name;
+       }
+       if (BLK_DATADIR(io->action) & BLK_TC_READ) {
+               if (!tf->gdd_reads[index])
+                       tf->gdd_reads[index] = alloc_dot_data(tf->min_seconds, tf->max_seconds, tf->min_offset, tf->max_offset, tf->stop_seconds, pick_color(), strdup(label));
+               set_gdd_bit(tf->gdd_reads[index], offset, io->bytes, io->time);
+       } else if (BLK_DATADIR(io->action) & BLK_TC_WRITE) {
+               if (!tf->gdd_writes[index])
+                       tf->gdd_writes[index] = alloc_dot_data(tf->min_seconds, tf->max_seconds, tf->min_offset, tf->max_offset, tf->stop_seconds, pick_color(), strdup(label));
+               set_gdd_bit(tf->gdd_writes[index], offset, io->bytes, io->time);
+       }
 }
 
 void add_pending_io(struct trace *trace, struct graph_line_data *gld)
 {
-       int ret;
        int seconds;
        struct blk_io_trace *io = trace->io;
        int action = io->action & BLK_TA_MASK;
        double avg;
+       struct pending_io *pio;
 
        if (io->action & BLK_TC_ACT(BLK_TC_NOTIFY))
                return;
 
+       if (action == __BLK_TA_QUEUE) {
+               if (trace->found_issue || trace->found_completion)
+                       hash_queued_io(trace->io);
+               return;
+       }
        if (action != __BLK_TA_ISSUE)
                return;
 
        seconds = SECONDS(io->time);
-       if (seconds > gld->max_seconds) {
-               fprintf(stderr, "Bad record %d %d\n", seconds, gld->max_seconds);
-               abort();
-       }
+       if (seconds > gld->max_seconds)
+               return;
 
-       ret = hash_dispatched_io(trace->io);
-       if (ret)
+       pio = hash_dispatched_io(trace->io);
+       if (!pio)
                return;
 
+       if (!trace->found_completion) {
+               list_del(&pio->hash_list);
+               free(pio);
+       }
+
        ios_in_flight++;
 
        gld->data[seconds].sum += ios_in_flight;
@@ -759,10 +1138,8 @@ void add_iop(struct trace *trace, struct graph_line_data *gld)
                return;
 
        seconds = SECONDS(io->time);
-       if (seconds > gld->max_seconds) {
-               fprintf(stderr, "Bad record %d %d\n", seconds, gld->max_seconds);
-               abort();
-       }
+       if (seconds > gld->max_seconds)
+               return;
 
        gld->data[seconds].sum += 1;
        gld->data[seconds].count = 1;
@@ -772,21 +1149,5 @@ void add_iop(struct trace *trace, struct graph_line_data *gld)
 
 void check_record(struct trace *trace)
 {
-       struct blk_io_trace *io = trace->io;
-       int action = io->action & BLK_TA_MASK;
-
-       if (!(io->action & BLK_TC_ACT(BLK_TC_NOTIFY))) {
-               switch (action) {
-               case __BLK_TA_COMPLETE:
-                       trace->found_completion = 1;
-                       break;
-               case __BLK_TA_ISSUE:
-                       trace->found_issue = 1;
-                       break;
-               case __BLK_TA_QUEUE:
-                       trace->found_queue = 1;
-                       break;
-               };
-       }
        handle_notify(trace);
 }