Don't like btrecord against libaio and librt, as it doesn't use any of their symbols
[blktrace.git] / blkparse.c
index f02934120817d7df7a00320af37973df4d621d6d..7b47df09e6e9c04e379c1905245c872d4da77dc3 100644 (file)
@@ -36,7 +36,7 @@
 #include "rbtree.h"
 #include "jhash.h"
 
-static char blkparse_version[] = "0.99.1";
+static char blkparse_version[] = "0.99.3";
 
 struct skip_info {
        unsigned long start, end;
@@ -282,12 +282,16 @@ static unsigned int bit_alloc_cache;
 static unsigned int rb_batch = RB_BATCH_DEFAULT;
 
 static int pipeline;
+static char *pipename;
 
 static int text_output = 1;
 
 #define is_done()      (*(volatile int *)(&done))
 static volatile int done;
 
+struct timespec                abs_start_time;
+static unsigned long long start_timestamp;
+
 #define JHASH_RANDOM   (0x3af5f2ee)
 
 #define CPUS_PER_LONG  (8 * sizeof(unsigned long))
@@ -556,6 +560,40 @@ static void add_ppm_hash(pid_t pid, const char *name)
        }
 }
 
+static void handle_notify(struct blk_io_trace *bit)
+{
+       void    *payload = (caddr_t) bit + sizeof(*bit);
+       __u32   two32[2];
+
+       switch (bit->action) {
+       case BLK_TN_PROCESS:
+               add_ppm_hash(bit->pid, payload);
+               break;
+
+       case BLK_TN_TIMESTAMP:
+               if (bit->pdu_len != sizeof(two32))
+                       return;
+               memcpy(two32, payload, sizeof(two32));
+               if (!data_is_native) {
+                       two32[0] = be32_to_cpu(two32[0]);
+                       two32[1] = be32_to_cpu(two32[1]);
+               }
+               start_timestamp = bit->time;
+               abs_start_time.tv_sec  = two32[0];
+               abs_start_time.tv_nsec = two32[1];
+               if (abs_start_time.tv_nsec < 0) {
+                       abs_start_time.tv_sec--;
+                       abs_start_time.tv_nsec += 1000000000;
+               }
+
+               break;
+
+       default:
+               /* Ignore unknown notify events */
+               ;
+       }
+}
+
 char *find_process_name(pid_t pid)
 {
        struct process_pid_map *ppm = find_ppm(pid);
@@ -1111,10 +1149,10 @@ static inline void __account_m(struct io_stats *ios, struct blk_io_trace *t,
 {
        if (rw) {
                ios->mwrites++;
-               ios->qwrite_kb += t_kb(t);
+               ios->mwrite_kb += t_kb(t);
        } else {
                ios->mreads++;
-               ios->qread_kb += t_kb(t);
+               ios->mread_kb += t_kb(t);
        }
 }
 
@@ -1468,15 +1506,14 @@ static void dump_io_stats(struct per_dev_info *pdi, struct io_stats *ios,
 
        fprintf(ofp, " Reads Queued:    %s, %siB\t", size_cnv(x, ios->qreads, 0), size_cnv(y, ios->qread_kb, 1));
        fprintf(ofp, " Writes Queued:    %s, %siB\n", size_cnv(x, ios->qwrites, 0), size_cnv(y, ios->qwrite_kb, 1));
-
        fprintf(ofp, " Read Dispatches: %s, %siB\t", size_cnv(x, ios->ireads, 0), size_cnv(y, ios->iread_kb, 1));
        fprintf(ofp, " Write Dispatches: %s, %siB\n", size_cnv(x, ios->iwrites, 0), size_cnv(y, ios->iwrite_kb, 1));
        fprintf(ofp, " Reads Requeued:  %s\t\t", size_cnv(x, ios->rrqueue, 0));
        fprintf(ofp, " Writes Requeued:  %s\n", size_cnv(x, ios->wrqueue, 0));
        fprintf(ofp, " Reads Completed: %s, %siB\t", size_cnv(x, ios->creads, 0), size_cnv(y, ios->cread_kb, 1));
        fprintf(ofp, " Writes Completed: %s, %siB\n", size_cnv(x, ios->cwrites, 0), size_cnv(y, ios->cwrite_kb, 1));
-       fprintf(ofp, " Read Merges:     %'8lu%8c\t", ios->mreads, ' ');
-       fprintf(ofp, " Write Merges:     %'8lu\n", ios->mwrites);
+       fprintf(ofp, " Read Merges:     %s, %siB\t", size_cnv(x, ios->mreads, 0), size_cnv(y, ios->mread_kb, 1));
+       fprintf(ofp, " Write Merges:     %s, %siB\n", size_cnv(x, ios->mwrites, 0), size_cnv(y, ios->mwrite_kb, 1));
        if (pdi) {
                fprintf(ofp, " Read depth:      %'8u%8c\t", pdi->max_depth[0], ' ');
                fprintf(ofp, " Write depth:      %'8u\n", pdi->max_depth[1]);
@@ -1577,6 +1614,7 @@ static void show_device_and_cpu_stats(void)
        int i, j, pci_events;
        char line[3 + 8/*cpu*/ + 2 + 32/*dev*/ + 3];
        char name[32];
+       double ratio;
 
        for (pdi = devices, i = 0; i < ndevices; i++, pdi++) {
 
@@ -1607,6 +1645,8 @@ static void show_device_and_cpu_stats(void)
                        total.cwrite_kb += ios->cwrite_kb;
                        total.iread_kb += ios->iread_kb;
                        total.iwrite_kb += ios->iwrite_kb;
+                       total.mread_kb += ios->mread_kb;
+                       total.mwrite_kb += ios->mwrite_kb;
                        total.timer_unplugs += ios->timer_unplugs;
                        total.io_unplugs += ios->io_unplugs;
 
@@ -1636,10 +1676,13 @@ static void show_device_and_cpu_stats(void)
                        get_dev_name(pdi, line, sizeof(line)), pdi->events);
 
                collect_pdi_skips(pdi);
+               if (!pdi->skips && !pdi->events)
+                       ratio = 0.0;
+               else
+                       ratio = 100.0 * ((double)pdi->seq_skips /
+                                       (double)(pdi->events + pdi->seq_skips));
                fprintf(ofp, "Skips: %'lu forward (%'llu - %5.1lf%%)\n",
-                       pdi->skips,pdi->seq_skips,
-                       100.0 * ((double)pdi->seq_skips /
-                               (double)(pdi->events + pdi->seq_skips)));
+                       pdi->skips, pdi->seq_skips, ratio);
        }
 }
 
@@ -1654,6 +1697,25 @@ static void find_genesis(void)
 
                t = t->next;
        }
+
+       /* The time stamp record will usually be the first
+        * record in the trace, but not always.
+        */
+       if (start_timestamp
+        && start_timestamp != genesis_time) {
+               long delta = genesis_time - start_timestamp;
+
+               abs_start_time.tv_sec  += SECONDS(delta);
+               abs_start_time.tv_nsec += NANO_SECONDS(delta);
+               if (abs_start_time.tv_nsec < 0) {
+                       abs_start_time.tv_nsec += 1000000000;
+                       abs_start_time.tv_sec -= 1;
+               } else
+               if (abs_start_time.tv_nsec > 1000000000) {
+                       abs_start_time.tv_nsec -= 1000000000;
+                       abs_start_time.tv_sec += 1;
+               }
+       }
 }
 
 static inline int check_stopwatch(struct blk_io_trace *bit)
@@ -1975,7 +2037,7 @@ static int read_events(int fd, int always_block, int *fdblock)
                 * not a real trace, so grab and handle it here
                 */
                if (bit->action & BLK_TC_ACT(BLK_TC_NOTIFY)) {
-                       add_ppm_hash(bit->pid, (char *) bit + sizeof(*bit));
+                       handle_notify(bit);
                        output_binary(bit, sizeof(*bit) + bit->pdu_len);
                        continue;
                }
@@ -2106,6 +2168,7 @@ static int ms_prime(struct ms_stream *msp)
                                                             1, &pci->fdblock);
                        if (ret) {
                                free(ptr);
+                               bit = NULL;
                                goto err;
                        }
 
@@ -2117,7 +2180,7 @@ static int ms_prime(struct ms_stream *msp)
                        goto err;
 
                if (bit->action & BLK_TC_ACT(BLK_TC_NOTIFY)) {
-                       add_ppm_hash(bit->pid, (char *) bit + sizeof(*bit));
+                       handle_notify(bit);
                        output_binary(bit, sizeof(*bit) + bit->pdu_len);
                        bit_free(bit);
 
@@ -2193,8 +2256,10 @@ static int setup_file(struct per_dev_info *pdi, int cpu)
 
        snprintf(pci->fname + len, sizeof(pci->fname)-1-len,
                 "%s.blktrace.%d", pdi->name, pci->cpu);
-       if (stat(pci->fname, &st) < 0 || !st.st_size)
+       if (stat(pci->fname, &st) < 0)
                return 0;
+       if (!st.st_size)
+               return 1;
 
        pci->fd = open(pci->fname, O_RDONLY);
        if (pci->fd < 0) {
@@ -2244,9 +2309,27 @@ static int handle(struct ms_stream *msp)
        return 1;
 }
 
+/*
+ * Check if we need to sanitize the name. We allow 'foo', or if foo.blktrace.X
+ * is given, then strip back down to 'foo' to avoid missing files.
+ */
+static int name_fixup(char *name)
+{
+       char *b;
+
+       if (!name)
+               return 1;
+
+       b = strstr(name, ".blktrace.");
+       if (b)
+               *b = '\0';
+
+       return 0;
+}
+
 static int do_file(void)
 {
-       int i, cpu;
+       int i, cpu, ret;
        struct per_dev_info *pdi;
 
        /*
@@ -2254,6 +2337,10 @@ static int do_file(void)
         */
        for (i = 0; i < ndevices; i++) {
                pdi = &devices[i];
+               ret = name_fixup(pdi->name);
+               if (ret)
+                       return ret;
+
                for (cpu = 0; setup_file(pdi, cpu); cpu++)
                        ;
        }
@@ -2273,18 +2360,12 @@ static int do_file(void)
        return 0;
 }
 
-static int do_stdin(void)
+static void do_pipe(int fd)
 {
        unsigned long long youngest;
-       int fd, events, fdblock;
+       int events, fdblock;
 
        last_allowed_time = -1ULL;
-       fd = dup(STDIN_FILENO);
-       if (fd == -1) {
-               perror("dup stdin");
-               return -1;
-       }
-
        fdblock = -1;
        while ((events = read_events(fd, 0, &fdblock)) > 0) {
                read_sequence++;
@@ -2304,7 +2385,23 @@ static int do_stdin(void)
 
        if (rb_sort_entries)
                show_entries_rb(1);
+}
+
+static int do_fifo(void)
+{
+       int fd;
+
+       if (!strcmp(pipename, "-"))
+               fd = dup(STDIN_FILENO);
+       else
+               fd = open(pipename, O_RDONLY);
 
+       if (fd == -1) {
+               perror("dup stdin");
+               return -1;
+       }
+
+       do_pipe(fd);
        close(fd);
        return 0;
 }
@@ -2370,6 +2467,18 @@ static int find_stopwatch_interval(char *string)
        return 0;
 }
 
+static int is_pipe(const char *str)
+{
+       struct stat st;
+
+       if (!strcmp(str, "-"))
+               return 1;
+       if (!stat(str, &st) && S_ISFIFO(st.st_mode))
+               return 1;
+
+       return 0;
+}
+
 #define S_OPTS  "a:A:b:D:d:f:F:hi:o:Oqstw:vV"
 static char usage_str[] =    "\n\n" \
        "-i <file>           | --input=<file>\n" \
@@ -2443,9 +2552,10 @@ int main(int argc, char *argv[])
                        act_mask_tmp = i;
                        break;
                case 'i':
-                       if (!strcmp(optarg, "-") && !pipeline)
+                       if (is_pipe(optarg) && !pipeline) {
                                pipeline = 1;
-                       else if (resize_devices(optarg) != 0)
+                               pipename = strdup(optarg);
+                       } else if (resize_devices(optarg) != 0)
                                return 1;
                        break;
                case 'D':
@@ -2501,9 +2611,10 @@ int main(int argc, char *argv[])
        }
 
        while (optind < argc) {
-               if (!strcmp(argv[optind], "-") && !pipeline)
+               if (is_pipe(argv[optind]) && !pipeline) {
                        pipeline = 1;
-               else if (resize_devices(argv[optind]) != 0)
+                       pipename = strdup(argv[optind]);
+               } else if (resize_devices(argv[optind]) != 0)
                        return 1;
                optind++;
        }
@@ -2563,11 +2674,13 @@ int main(int argc, char *argv[])
        }
 
        if (pipeline)
-               ret = do_stdin();
+               ret = do_fifo();
        else
                ret = do_file();
 
-       show_stats();
+       if (!ret)
+               show_stats();
+
        if (ofp_buffer) {
                fflush(ofp);
                free(ofp_buffer);