blkparse: split off the timestamp correction code in to a separate function
[blktrace.git] / blkparse.c
index 0d2ea1281fcbfa1c61d8c3fd065dab12b4b64aa0..cf7a87b262fed7c4e11a84eb848ac34c24963e81 100644 (file)
@@ -36,7 +36,7 @@
 #include "rbtree.h"
 #include "jhash.h"
 
-static char blkparse_version[] = "0.99.2";
+static char blkparse_version[] = "1.2.0";
 
 struct skip_info {
        unsigned long start, end;
@@ -100,11 +100,23 @@ struct per_process_info {
 #define PPI_HASH_SHIFT (8)
 #define PPI_HASH_SIZE  (1 << PPI_HASH_SHIFT)
 #define PPI_HASH_MASK  (PPI_HASH_SIZE - 1)
+
+enum {
+       SORT_PROG_EVENT_N,   /* Program Name */
+       SORT_PROG_EVENT_QKB, /* KB: Queued read and write */
+       SORT_PROG_EVENT_RKB, /* KB: Queued Read */
+       SORT_PROG_EVENT_WKB, /* KB: Queued Write */
+       SORT_PROG_EVENT_CKB, /* KB: Complete */
+       SORT_PROG_EVENT_QIO, /* IO: Queued read and write */
+       SORT_PROG_EVENT_RIO, /* IO: Queued Read */
+       SORT_PROG_EVENT_WIO, /* IO: Queued Write */
+       SORT_PROG_EVENT_CIO, /* IO: Complete */
+};
+
 static struct per_process_info *ppi_hash_table[PPI_HASH_SIZE];
 static struct per_process_info *ppi_list;
 static int ppi_list_entries;
 
-#define S_OPTS  "a:A:b:D:d:f:F:hi:o:Oqstw:vV"
 static struct option l_opts[] = {
        {
                .name = "act-mask",
@@ -160,6 +172,12 @@ static struct option l_opts[] = {
                .flag = NULL,
                .val = 'i'
        },
+       {
+               .name = "no-msgs",
+               .has_arg = no_argument,
+               .flag = NULL,
+               .val = 'M'
+       },
        {
                .name = "output",
                .has_arg = required_argument,
@@ -184,6 +202,12 @@ static struct option l_opts[] = {
                .flag = NULL,
                .val = 's'
        },
+       {
+               .name = "sort-program-stats",
+               .has_arg = required_argument,
+               .flag = NULL,
+               .val = 'S'
+       },
        {
                .name = "track-ios",
                .has_arg = no_argument,
@@ -264,12 +288,14 @@ static unsigned long long stopwatch_end = -1ULL;  /* "infinity" */
 static unsigned long read_sequence;
 
 static int per_process_stats;
+static int per_process_stats_event = SORT_PROG_EVENT_N;
 static int per_device_and_cpu_stats = 1;
 static int track_ios;
 static int ppi_hash_by_pid = 1;
 static int verbose;
 static unsigned int act_mask = -1U;
 static int stats_printed;
+static int bin_output_msgs = 1;
 int data_is_native = -1;
 
 static FILE *dump_fp;
@@ -282,6 +308,7 @@ static unsigned int bit_alloc_cache;
 static unsigned int rb_batch = RB_BATCH_DEFAULT;
 
 static int pipeline;
+static char *pipename;
 
 static int text_output = 1;
 
@@ -291,6 +318,8 @@ static volatile int done;
 struct timespec                abs_start_time;
 static unsigned long long start_timestamp;
 
+static int have_drv_data = 0;
+
 #define JHASH_RANDOM   (0x3af5f2ee)
 
 #define CPUS_PER_LONG  (8 * sizeof(unsigned long))
@@ -543,7 +572,7 @@ static struct process_pid_map *find_ppm(pid_t pid)
        return NULL;
 }
 
-static void add_ppm_hash(pid_t pid, const char *name)
+static struct process_pid_map *add_ppm_hash(pid_t pid, const char *name)
 {
        const int hash_idx = ppm_hash_pid(pid);
        struct process_pid_map *ppm;
@@ -553,10 +582,14 @@ static void add_ppm_hash(pid_t pid, const char *name)
                ppm = malloc(sizeof(*ppm));
                memset(ppm, 0, sizeof(*ppm));
                ppm->pid = pid;
-               strcpy(ppm->comm, name);
+               memset(ppm->comm, 0, sizeof(ppm->comm));
+               strncpy(ppm->comm, name, sizeof(ppm->comm));
+               ppm->comm[sizeof(ppm->comm) - 1] = '\0';
                ppm->hash_next = ppm_hash_table[hash_idx];
                ppm_hash_table[hash_idx] = ppm;
        }
+
+       return ppm;
 }
 
 static void handle_notify(struct blk_io_trace *bit)
@@ -587,6 +620,22 @@ static void handle_notify(struct blk_io_trace *bit)
 
                break;
 
+       case BLK_TN_MESSAGE:
+               if (bit->pdu_len > 0) {
+                       char msg[bit->pdu_len+1];
+
+                       memcpy(msg, (char *)payload, bit->pdu_len);
+                       msg[bit->pdu_len] = '\0';
+
+                       fprintf(ofp,
+                               "%3d,%-3d %2d %8s %5d.%09lu %5u %2s %3s %s\n",
+                               MAJOR(bit->device), MINOR(bit->device),
+                               bit->cpu, "0", (int) SECONDS(bit->time),
+                               (unsigned long) NANO_SECONDS(bit->time),
+                               0, "m", "N", msg);
+               }
+               break;
+
        default:
                /* Ignore unknown notify events */
                ;
@@ -936,6 +985,8 @@ static struct io_track *find_track(struct per_dev_info *pdi, pid_t pid,
        if (!iot) {
                iot = malloc(sizeof(*iot));
                iot->ppm = find_ppm(pid);
+               if (!iot->ppm)
+                       iot->ppm = add_ppm_hash(pid, "unknown");
                iot->sector = sector;
                track_rb_insert(pdi, iot);
        }
@@ -1118,6 +1169,8 @@ static struct io_stats *find_process_io_stats(pid_t pid)
                ppi = malloc(sizeof(*ppi));
                memset(ppi, 0, sizeof(*ppi));
                ppi->ppm = find_ppm(pid);
+               if (!ppi->ppm)
+                       ppi->ppm = add_ppm_hash(pid, "unknown");
                add_ppi_to_hash(ppi);
                add_ppi_to_list(ppi);
        }
@@ -1148,10 +1201,12 @@ static inline void __account_m(struct io_stats *ios, struct blk_io_trace *t,
 {
        if (rw) {
                ios->mwrites++;
-               ios->qwrite_kb += t_kb(t);
+               ios->mwrite_kb += t_kb(t);
+               ios->mwrite_b += t_b(t);
        } else {
                ios->mreads++;
-               ios->qread_kb += t_kb(t);
+               ios->mread_kb += t_kb(t);
+               ios->mread_b += t_b(t);
        }
 }
 
@@ -1167,15 +1222,115 @@ static inline void account_m(struct blk_io_trace *t, struct per_cpu_info *pci,
        }
 }
 
+static inline void __account_pc_queue(struct io_stats *ios,
+                                     struct blk_io_trace *t, int rw)
+{
+       if (rw) {
+               ios->qwrites_pc++;
+               ios->qwrite_kb_pc += t_kb(t);
+               ios->qwrite_b_pc += t_b(t);
+       } else {
+               ios->qreads_pc++;
+               ios->qread_kb += t_kb(t);
+               ios->qread_b_pc += t_b(t);
+       }
+}
+
+static inline void account_pc_queue(struct blk_io_trace *t,
+                                   struct per_cpu_info *pci, int rw)
+{
+       __account_pc_queue(&pci->io_stats, t, rw);
+
+       if (per_process_stats) {
+               struct io_stats *ios = find_process_io_stats(t->pid);
+
+               __account_pc_queue(ios, t, rw);
+       }
+}
+
+static inline void __account_pc_issue(struct io_stats *ios, int rw,
+                                     unsigned int bytes)
+{
+       if (rw) {
+               ios->iwrites_pc++;
+               ios->iwrite_kb_pc += bytes >> 10;
+               ios->iwrite_b_pc += bytes & 1023;
+       } else {
+               ios->ireads_pc++;
+               ios->iread_kb_pc += bytes >> 10;
+               ios->iread_b_pc += bytes & 1023;
+       }
+}
+
+static inline void account_pc_issue(struct blk_io_trace *t,
+                                   struct per_cpu_info *pci, int rw)
+{
+       __account_pc_issue(&pci->io_stats, rw, t->bytes);
+
+       if (per_process_stats) {
+               struct io_stats *ios = find_process_io_stats(t->pid);
+
+               __account_pc_issue(ios, rw, t->bytes);
+       }
+}
+
+static inline void __account_pc_requeue(struct io_stats *ios,
+                                       struct blk_io_trace *t, int rw)
+{
+       if (rw) {
+               ios->wrqueue_pc++;
+               ios->iwrite_kb_pc -= t_kb(t);
+               ios->iwrite_b_pc -= t_b(t);
+       } else {
+               ios->rrqueue_pc++;
+               ios->iread_kb_pc -= t_kb(t);
+               ios->iread_b_pc -= t_b(t);
+       }
+}
+
+static inline void account_pc_requeue(struct blk_io_trace *t,
+                                     struct per_cpu_info *pci, int rw)
+{
+       __account_pc_requeue(&pci->io_stats, t, rw);
+
+       if (per_process_stats) {
+               struct io_stats *ios = find_process_io_stats(t->pid);
+
+               __account_pc_requeue(ios, t, rw);
+       }
+}
+
+static inline void __account_pc_c(struct io_stats *ios, int rw)
+{
+       if (rw)
+               ios->cwrites_pc++;
+       else
+               ios->creads_pc++;
+}
+
+static inline void account_pc_c(struct blk_io_trace *t,
+                               struct per_cpu_info *pci, int rw)
+{
+       __account_pc_c(&pci->io_stats, rw);
+
+       if (per_process_stats) {
+               struct io_stats *ios = find_process_io_stats(t->pid);
+
+               __account_pc_c(ios, rw);
+       }
+}
+
 static inline void __account_queue(struct io_stats *ios, struct blk_io_trace *t,
                                   int rw)
 {
        if (rw) {
                ios->qwrites++;
                ios->qwrite_kb += t_kb(t);
+               ios->qwrite_b += t_b(t);
        } else {
                ios->qreads++;
                ios->qread_kb += t_kb(t);
+               ios->qread_b += t_b(t);
        }
 }
 
@@ -1196,9 +1351,11 @@ static inline void __account_c(struct io_stats *ios, int rw, int bytes)
        if (rw) {
                ios->cwrites++;
                ios->cwrite_kb += bytes >> 10;
+               ios->cwrite_b += bytes & 1023;
        } else {
                ios->creads++;
                ios->cread_kb += bytes >> 10;
+               ios->cread_b += bytes & 1023;
        }
 }
 
@@ -1220,9 +1377,11 @@ static inline void __account_issue(struct io_stats *ios, int rw,
        if (rw) {
                ios->iwrites++;
                ios->iwrite_kb += bytes >> 10;
+               ios->iwrite_b  += bytes & 1023;
        } else {
                ios->ireads++;
                ios->iread_kb += bytes >> 10;
+               ios->iread_b  += bytes & 1023;
        }
 }
 
@@ -1264,9 +1423,11 @@ static inline void __account_requeue(struct io_stats *ios,
        if (rw) {
                ios->wrqueue++;
                ios->iwrite_kb -= t_kb(t);
+               ios->iwrite_b -= t_b(t);
        } else {
                ios->rrqueue++;
                ios->iread_kb -= t_kb(t);
+               ios->iread_b -= t_b(t);
        }
 }
 
@@ -1346,13 +1507,16 @@ static void log_pc(struct per_cpu_info *pci, struct blk_io_trace *t, char *act)
        process_fmt(act, pci, t, -1ULL, t->pdu_len, buf);
 }
 
-static void dump_trace_pc(struct blk_io_trace *t, struct per_cpu_info *pci)
+static void dump_trace_pc(struct blk_io_trace *t, struct per_dev_info *pdi,
+                         struct per_cpu_info *pci)
 {
+       int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
        int act = t->action & 0xffff;
 
        switch (act) {
                case __BLK_TA_QUEUE:
                        log_generic(pci, t, "Q");
+                       account_pc_queue(t, pci, w);
                        break;
                case __BLK_TA_GETRQ:
                        log_generic(pci, t, "G");
@@ -1361,13 +1525,27 @@ static void dump_trace_pc(struct blk_io_trace *t, struct per_cpu_info *pci)
                        log_generic(pci, t, "S");
                        break;
                case __BLK_TA_REQUEUE:
+                       /*
+                        * can happen if we miss traces, don't let it go
+                        * below zero
+                        */
+                       if (pdi->cur_depth[w])
+                               pdi->cur_depth[w]--;
+                       account_pc_requeue(t, pci, w);
                        log_generic(pci, t, "R");
                        break;
                case __BLK_TA_ISSUE:
+                       account_pc_issue(t, pci, w);
+                       pdi->cur_depth[w]++;
+                       if (pdi->cur_depth[w] > pdi->max_depth[w])
+                               pdi->max_depth[w] = pdi->cur_depth[w];
                        log_pc(pci, t, "D");
                        break;
                case __BLK_TA_COMPLETE:
+                       if (pdi->cur_depth[w])
+                               pdi->cur_depth[w]--;
                        log_pc(pci, t, "C");
+                       account_pc_c(t, pci, w);
                        break;
                case __BLK_TA_INSERT:
                        log_pc(pci, t, "I");
@@ -1451,6 +1629,10 @@ static void dump_trace_fs(struct blk_io_trace *t, struct per_dev_info *pdi,
                case __BLK_TA_REMAP:
                        log_generic(pci, t, "A");
                        break;
+               case __BLK_TA_DRV_DATA:
+                       have_drv_data = 1;
+                       /* dump to binary file only */
+                       break;
                default:
                        fprintf(stderr, "Bad fs action %x\n", t->action);
                        break;
@@ -1461,8 +1643,10 @@ static void dump_trace(struct blk_io_trace *t, struct per_cpu_info *pci,
                       struct per_dev_info *pdi)
 {
        if (text_output) {
-               if (t->action & BLK_TC_ACT(BLK_TC_PC))
-                       dump_trace_pc(t, pci);
+               if (t->action == BLK_TN_MESSAGE)
+                       handle_notify(t);
+               else if (t->action & BLK_TC_ACT(BLK_TC_PC))
+                       dump_trace_pc(t, pdi, pci);
                else
                        dump_trace_fs(t, pdi, pci);
        }
@@ -1472,7 +1656,10 @@ static void dump_trace(struct blk_io_trace *t, struct per_cpu_info *pci,
 
        pdi->events++;
 
-       output_binary(t, sizeof(*t) + t->pdu_len);
+       if (bin_output_msgs ||
+                           !(t->action & BLK_TC_ACT(BLK_TC_NOTIFY) &&
+                             t->action == BLK_TN_MESSAGE))
+               output_binary(t, sizeof(*t) + t->pdu_len);
 }
 
 /*
@@ -1503,21 +1690,60 @@ static void dump_io_stats(struct per_dev_info *pdi, struct io_stats *ios,
 
        fprintf(ofp, "%s\n", msg);
 
-       fprintf(ofp, " Reads Queued:    %s, %siB\t", size_cnv(x, ios->qreads, 0), size_cnv(y, ios->qread_kb, 1));
-       fprintf(ofp, " Writes Queued:    %s, %siB\n", size_cnv(x, ios->qwrites, 0), size_cnv(y, ios->qwrite_kb, 1));
-
-       fprintf(ofp, " Read Dispatches: %s, %siB\t", size_cnv(x, ios->ireads, 0), size_cnv(y, ios->iread_kb, 1));
-       fprintf(ofp, " Write Dispatches: %s, %siB\n", size_cnv(x, ios->iwrites, 0), size_cnv(y, ios->iwrite_kb, 1));
+       fprintf(ofp, " Reads Queued:    %s, %siB\t",
+                       size_cnv(x, ios->qreads, 0),
+                       size_cnv(y, ios->qread_kb + (ios->qread_b>>10), 1));
+       fprintf(ofp, " Writes Queued:    %s, %siB\n",
+                       size_cnv(x, ios->qwrites, 0),
+                       size_cnv(y, ios->qwrite_kb + (ios->qwrite_b>>10), 1));
+       fprintf(ofp, " Read Dispatches: %s, %siB\t",
+                       size_cnv(x, ios->ireads, 0),
+                       size_cnv(y, ios->iread_kb + (ios->iread_b>>10), 1));
+       fprintf(ofp, " Write Dispatches: %s, %siB\n",
+                       size_cnv(x, ios->iwrites, 0),
+                       size_cnv(y, ios->iwrite_kb + (ios->iwrite_b>>10), 1));
        fprintf(ofp, " Reads Requeued:  %s\t\t", size_cnv(x, ios->rrqueue, 0));
        fprintf(ofp, " Writes Requeued:  %s\n", size_cnv(x, ios->wrqueue, 0));
-       fprintf(ofp, " Reads Completed: %s, %siB\t", size_cnv(x, ios->creads, 0), size_cnv(y, ios->cread_kb, 1));
-       fprintf(ofp, " Writes Completed: %s, %siB\n", size_cnv(x, ios->cwrites, 0), size_cnv(y, ios->cwrite_kb, 1));
-       fprintf(ofp, " Read Merges:     %'8lu%8c\t", ios->mreads, ' ');
-       fprintf(ofp, " Write Merges:     %'8lu\n", ios->mwrites);
+       fprintf(ofp, " Reads Completed: %s, %siB\t",
+                       size_cnv(x, ios->creads, 0),
+                       size_cnv(y, ios->cread_kb + (ios->cread_b>>10), 1));
+       fprintf(ofp, " Writes Completed: %s, %siB\n",
+                       size_cnv(x, ios->cwrites, 0),
+                       size_cnv(y, ios->cwrite_kb + (ios->cwrite_b>>10), 1));
+       fprintf(ofp, " Read Merges:     %s, %siB\t",
+                       size_cnv(x, ios->mreads, 0),
+                       size_cnv(y, ios->mread_kb + (ios->mread_b>>10), 1));
+       fprintf(ofp, " Write Merges:     %s, %siB\n",
+                       size_cnv(x, ios->mwrites, 0),
+                       size_cnv(y, ios->mwrite_kb + (ios->mwrite_b>>10), 1));
        if (pdi) {
                fprintf(ofp, " Read depth:      %'8u%8c\t", pdi->max_depth[0], ' ');
                fprintf(ofp, " Write depth:      %'8u\n", pdi->max_depth[1]);
        }
+       if (ios->qreads_pc || ios->qwrites_pc || ios->ireads_pc || ios->iwrites_pc ||
+           ios->rrqueue_pc || ios->wrqueue_pc || ios->creads_pc || ios->cwrites_pc) {
+               fprintf(ofp, " PC Reads Queued: %s, %siB\t",
+                       size_cnv(x, ios->qreads_pc, 0),
+                       size_cnv(y,
+                               ios->qread_kb_pc + (ios->qread_b_pc>>10), 1));
+               fprintf(ofp, " PC Writes Queued: %s, %siB\n",
+                       size_cnv(x, ios->qwrites_pc, 0),
+                       size_cnv(y,
+                               ios->qwrite_kb_pc + (ios->qwrite_b_pc>>10), 1));
+               fprintf(ofp, " PC Read Disp.:   %s, %siB\t",
+                       size_cnv(x, ios->ireads_pc, 0),
+                       size_cnv(y,
+                               ios->iread_kb_pc + (ios->iread_b_pc>>10), 1));
+               fprintf(ofp, " PC Write Disp.:   %s, %siB\n",
+                       size_cnv(x, ios->iwrites_pc, 0),
+                       size_cnv(y,
+                               ios->iwrite_kb_pc + (ios->iwrite_b_pc>>10),
+                               1));
+               fprintf(ofp, " PC Reads Req.:   %s\t\t", size_cnv(x, ios->rrqueue_pc, 0));
+               fprintf(ofp, " PC Writes Req.:   %s\n", size_cnv(x, ios->wrqueue_pc, 0));
+               fprintf(ofp, " PC Reads Compl.: %s\t\t", size_cnv(x, ios->creads_pc, 0));
+               fprintf(ofp, " PC Writes Compl.: %s\n", size_cnv(x, ios->cwrites_pc, 0));
+       }
        fprintf(ofp, " IO unplugs:      %'8lu%8c\t", ios->io_unplugs, ' ');
        fprintf(ofp, " Timer unplugs:    %'8lu\n", ios->timer_unplugs);
 }
@@ -1552,6 +1778,88 @@ static int ppi_name_compare(const void *p1, const void *p2)
        return res;
 }
 
+static int ppi_event_compare(const void *p1, const void *p2)
+{
+       struct per_process_info *ppi1 = *((struct per_process_info **) p1);
+       struct per_process_info *ppi2 = *((struct per_process_info **) p2);
+       struct io_stats *ios1 = &ppi1->io_stats;
+       struct io_stats *ios2 = &ppi2->io_stats;
+       unsigned long io1, io2;
+       unsigned long long kb1,kb2;
+       int sort_by_kb = 1;
+
+       io1 = io2 = 0;
+       kb1 = kb2 = 0;
+
+       switch (per_process_stats_event) {
+       case SORT_PROG_EVENT_QKB: /* KB: Queued read and write */
+               kb1 = ios1->qwrite_kb + (ios1->qwrite_b>>10) +
+                       ios1->qread_kb + (ios1->qread_b>>10);
+               kb2 = ios2->qwrite_kb + (ios2->qwrite_b>>10) +
+                       ios2->qread_kb + (ios2->qread_b>>10);
+               break;
+       case SORT_PROG_EVENT_RKB: /* KB: Queued Read */
+               kb1 = ios1->qread_kb + (ios1->qread_b>>10);
+               kb2 = ios2->qread_kb + (ios2->qread_b>>10);
+               break;
+       case SORT_PROG_EVENT_WKB: /* KB: Queued Write */
+               kb1 = ios1->qwrite_kb + (ios1->qwrite_b>>10);
+               kb2 = ios2->qwrite_kb + (ios2->qwrite_b>>10);
+               break;
+       case SORT_PROG_EVENT_CKB: /* KB: Complete */
+               kb1 = ios1->cwrite_kb + (ios1->cwrite_b>>10) +
+                       ios1->cread_kb + (ios1->cread_b>>10);
+               kb2 = ios2->cwrite_kb + (ios2->cwrite_b>>10) +
+                       ios2->cread_kb + (ios2->cread_b>>10);
+               break;
+       case SORT_PROG_EVENT_QIO: /* IO: Queued read and write */
+               sort_by_kb = 0;
+               io1 = ios1->qreads + ios1->qwrites;
+               io2 = ios2->qreads + ios2->qwrites;
+               break;
+       case SORT_PROG_EVENT_RIO: /* IO: Queued Read */
+               sort_by_kb = 0;
+               io1 = ios1->qreads;
+               io2 = ios2->qreads;
+               break;
+       case SORT_PROG_EVENT_WIO: /* IO: Queued Write */
+               sort_by_kb = 0;
+               io1 = ios1->qwrites;
+               io2 = ios2->qwrites;
+               break;
+       case SORT_PROG_EVENT_CIO: /* IO: Complete */
+               sort_by_kb = 0;
+               io1 = ios1->creads + ios1->cwrites;
+               io2 = ios2->creads + ios2->cwrites;
+               break;
+       }
+
+
+       /* compare kb */
+       if (sort_by_kb) {
+               if (kb1 > kb2)
+                       return 1;
+               else if (kb1 == kb2)
+                       return 0;
+               return -1;
+       }
+
+       /* compare io */
+       if (io1 > io2)
+               return 1;
+       else if (io1 == io2)
+               return 0;
+       return -1;
+}
+
+static int ppi_compare(const void *p1, const void *p2)
+{
+       if (per_process_stats_event == SORT_PROG_EVENT_N)
+               return ppi_name_compare(p1, p2);
+
+       return ppi_event_compare(p1, p2);
+}
+
 static void sort_process_list(void)
 {
        struct per_process_info **ppis;
@@ -1566,7 +1874,7 @@ static void sort_process_list(void)
                ppi = ppi->list_next;
        }
 
-       qsort(ppis, ppi_list_entries, sizeof(ppi), ppi_name_compare);
+       qsort(ppis, ppi_list_entries, sizeof(ppi), ppi_compare);
 
        i = ppi_list_entries - 1;
        ppi_list = NULL;
@@ -1614,6 +1922,7 @@ static void show_device_and_cpu_stats(void)
        int i, j, pci_events;
        char line[3 + 8/*cpu*/ + 2 + 32/*dev*/ + 3];
        char name[32];
+       double ratio;
 
        for (pdi = devices, i = 0; i < ndevices; i++, pdi++) {
 
@@ -1644,6 +1953,34 @@ static void show_device_and_cpu_stats(void)
                        total.cwrite_kb += ios->cwrite_kb;
                        total.iread_kb += ios->iread_kb;
                        total.iwrite_kb += ios->iwrite_kb;
+                       total.mread_kb += ios->mread_kb;
+                       total.mwrite_kb += ios->mwrite_kb;
+                       total.qread_b += ios->qread_b;
+                       total.qwrite_b += ios->qwrite_b;
+                       total.cread_b += ios->cread_b;
+                       total.cwrite_b += ios->cwrite_b;
+                       total.iread_b += ios->iread_b;
+                       total.iwrite_b += ios->iwrite_b;
+                       total.mread_b += ios->mread_b;
+                       total.mwrite_b += ios->mwrite_b;
+
+                       total.qreads_pc += ios->qreads_pc;
+                       total.qwrites_pc += ios->qwrites_pc;
+                       total.creads_pc += ios->creads_pc;
+                       total.cwrites_pc += ios->cwrites_pc;
+                       total.ireads_pc += ios->ireads_pc;
+                       total.iwrites_pc += ios->iwrites_pc;
+                       total.rrqueue_pc += ios->rrqueue_pc;
+                       total.wrqueue_pc += ios->wrqueue_pc;
+                       total.qread_kb_pc += ios->qread_kb_pc;
+                       total.qwrite_kb_pc += ios->qwrite_kb_pc;
+                       total.iread_kb_pc += ios->iread_kb_pc;
+                       total.iwrite_kb_pc += ios->iwrite_kb_pc;
+                       total.qread_b_pc += ios->qread_b_pc;
+                       total.qwrite_b_pc += ios->qwrite_b_pc;
+                       total.iread_b_pc += ios->iread_b_pc;
+                       total.iwrite_b_pc += ios->iwrite_b_pc;
+
                        total.timer_unplugs += ios->timer_unplugs;
                        total.io_unplugs += ios->io_unplugs;
 
@@ -1663,8 +2000,10 @@ static void show_device_and_cpu_stats(void)
                wrate = rrate = 0;
                msec = (pdi->last_reported_time - pdi->first_reported_time) / 1000000;
                if (msec) {
-                       rrate = 1000 * total.cread_kb / msec;
-                       wrate = 1000 * total.cwrite_kb / msec;
+                       rrate = ((1000 * total.cread_kb) + total.cread_b) /
+                                                                       msec;
+                       wrate = ((1000 * total.cwrite_kb) + total.cwrite_b) /
+                                                                       msec;
                }
 
                fprintf(ofp, "\nThroughput (R/W): %'LuKiB/s / %'LuKiB/s\n",
@@ -1673,10 +2012,29 @@ static void show_device_and_cpu_stats(void)
                        get_dev_name(pdi, line, sizeof(line)), pdi->events);
 
                collect_pdi_skips(pdi);
+               if (!pdi->skips && !pdi->events)
+                       ratio = 0.0;
+               else
+                       ratio = 100.0 * ((double)pdi->seq_skips /
+                                       (double)(pdi->events + pdi->seq_skips));
                fprintf(ofp, "Skips: %'lu forward (%'llu - %5.1lf%%)\n",
-                       pdi->skips,pdi->seq_skips,
-                       100.0 * ((double)pdi->seq_skips /
-                               (double)(pdi->events + pdi->seq_skips)));
+                       pdi->skips, pdi->seq_skips, ratio);
+       }
+}
+
+static void correct_abs_start_time(void)
+{
+       long delta = genesis_time - start_timestamp;
+
+       abs_start_time.tv_sec  += SECONDS(delta);
+       abs_start_time.tv_nsec += NANO_SECONDS(delta);
+       if (abs_start_time.tv_nsec < 0) {
+               abs_start_time.tv_nsec += 1000000000;
+               abs_start_time.tv_sec -= 1;
+       } else
+       if (abs_start_time.tv_nsec > 1000000000) {
+               abs_start_time.tv_nsec -= 1000000000;
+               abs_start_time.tv_sec += 1;
        }
 }
 
@@ -1697,18 +2055,7 @@ static void find_genesis(void)
         */
        if (start_timestamp
         && start_timestamp != genesis_time) {
-               long delta = genesis_time - start_timestamp;
-
-               abs_start_time.tv_sec  += SECONDS(delta);
-               abs_start_time.tv_nsec += NANO_SECONDS(delta);
-               if (abs_start_time.tv_nsec < 0) {
-                       abs_start_time.tv_nsec += 1000000000;
-                       abs_start_time.tv_sec -= 1;
-               } else
-               if (abs_start_time.tv_nsec > 1000000000) {
-                       abs_start_time.tv_nsec -= 1000000000;
-                       abs_start_time.tv_sec += 1;
-               }
+               correct_abs_start_time();
        }
 }
 
@@ -1783,6 +2130,7 @@ static int check_cpu_map(struct per_dev_info *pdi)
         * create a map of the cpus we have traces for
         */
        cpu_map = malloc(pdi->cpu_map_max / sizeof(long));
+       memset(cpu_map, 0, sizeof(*cpu_map));
        n = rb_first(&rb_sort_root);
        while (n) {
                __t = rb_entry(n, struct trace, rb_node);
@@ -1886,7 +2234,8 @@ static void show_entries_rb(int force)
                        break;
                }
 
-               if (check_sequence(pdi, t, force))
+               if (!(bit->action == BLK_TN_MESSAGE) &&
+                   check_sequence(pdi, t, force))
                        break;
 
                if (!force && bit->time > last_allowed_time)
@@ -1897,7 +2246,8 @@ static void show_entries_rb(int force)
                if (!pci || pci->cpu != bit->cpu)
                        pci = get_cpu_info(pdi, bit->cpu);
 
-               pci->last_sequence = bit->sequence;
+               if (!(bit->action == BLK_TN_MESSAGE))
+                       pci->last_sequence = bit->sequence;
 
                pci->nelems++;
 
@@ -2030,7 +2380,7 @@ static int read_events(int fd, int always_block, int *fdblock)
                /*
                 * not a real trace, so grab and handle it here
                 */
-               if (bit->action & BLK_TC_ACT(BLK_TC_NOTIFY)) {
+               if (bit->action & BLK_TC_ACT(BLK_TC_NOTIFY) && bit->action != BLK_TN_MESSAGE) {
                        handle_notify(bit);
                        output_binary(bit, sizeof(*bit) + bit->pdu_len);
                        continue;
@@ -2162,6 +2512,7 @@ static int ms_prime(struct ms_stream *msp)
                                                             1, &pci->fdblock);
                        if (ret) {
                                free(ptr);
+                               bit = NULL;
                                goto err;
                        }
 
@@ -2172,7 +2523,13 @@ static int ms_prime(struct ms_stream *msp)
                if (verify_trace(bit))
                        goto err;
 
-               if (bit->action & BLK_TC_ACT(BLK_TC_NOTIFY)) {
+               if (bit->cpu != pci->cpu) {
+                       fprintf(stderr, "cpu %d trace info has error cpu %d\n",
+                               pci->cpu, bit->cpu);
+                       continue;
+               }
+
+               if (bit->action & BLK_TC_ACT(BLK_TC_NOTIFY) && bit->action != BLK_TN_MESSAGE) {
                        handle_notify(bit);
                        output_binary(bit, sizeof(*bit) + bit->pdu_len);
                        bit_free(bit);
@@ -2277,17 +2634,19 @@ static int handle(struct ms_stream *msp)
        struct blk_io_trace *bit;
 
        t = ms_peek(msp);
-       if (t->bit->time > stopwatch_end)
-               return 0;
 
        bit = t->bit;
        pdi = msp->pdi;
        pci = get_cpu_info(pdi, msp->cpu);
        pci->nelems++;
-
        bit->time -= genesis_time;
+
+       if (t->bit->time > stopwatch_end)
+               return 0;
+
        pdi->last_reported_time = bit->time;
-       if (bit->action & (act_mask << BLK_TC_SHIFT))
+       if ((bit->action & (act_mask << BLK_TC_SHIFT))&&
+           t->bit->time >= stopwatch_start)
                dump_trace(bit, pci, pdi);
 
        ms_deq(msp);
@@ -2302,9 +2661,27 @@ static int handle(struct ms_stream *msp)
        return 1;
 }
 
+/*
+ * Check if we need to sanitize the name. We allow 'foo', or if foo.blktrace.X
+ * is given, then strip back down to 'foo' to avoid missing files.
+ */
+static int name_fixup(char *name)
+{
+       char *b;
+
+       if (!name)
+               return 1;
+
+       b = strstr(name, ".blktrace.");
+       if (b)
+               *b = '\0';
+
+       return 0;
+}
+
 static int do_file(void)
 {
-       int i, cpu;
+       int i, cpu, ret;
        struct per_dev_info *pdi;
 
        /*
@@ -2312,8 +2689,18 @@ static int do_file(void)
         */
        for (i = 0; i < ndevices; i++) {
                pdi = &devices[i];
+               ret = name_fixup(pdi->name);
+               if (ret)
+                       return ret;
+
                for (cpu = 0; setup_file(pdi, cpu); cpu++)
                        ;
+
+               if (!cpu) {
+                       fprintf(stderr,"No input files found for %s\n",
+                               pdi->name);
+                       return 1;
+               }
        }
 
        /*
@@ -2331,18 +2718,12 @@ static int do_file(void)
        return 0;
 }
 
-static int do_stdin(void)
+static void do_pipe(int fd)
 {
        unsigned long long youngest;
-       int fd, events, fdblock;
+       int events, fdblock;
 
        last_allowed_time = -1ULL;
-       fd = dup(STDIN_FILENO);
-       if (fd == -1) {
-               perror("dup stdin");
-               return -1;
-       }
-
        fdblock = -1;
        while ((events = read_events(fd, 0, &fdblock)) > 0) {
                read_sequence++;
@@ -2362,7 +2743,23 @@ static int do_stdin(void)
 
        if (rb_sort_entries)
                show_entries_rb(1);
+}
+
+static int do_fifo(void)
+{
+       int fd;
+
+       if (!strcmp(pipename, "-"))
+               fd = dup(STDIN_FILENO);
+       else
+               fd = open(pipename, O_RDONLY);
+
+       if (fd == -1) {
+               perror("dup stdin");
+               return -1;
+       }
 
+       do_pipe(fd);
        close(fd);
        return 0;
 }
@@ -2428,7 +2825,58 @@ static int find_stopwatch_interval(char *string)
        return 0;
 }
 
-#define S_OPTS  "a:A:b:D:d:f:F:hi:o:Oqstw:vV"
+static int is_pipe(const char *str)
+{
+       struct stat st;
+
+       if (!strcmp(str, "-"))
+               return 1;
+       if (!stat(str, &st) && S_ISFIFO(st.st_mode))
+               return 1;
+
+       return 0;
+}
+
+static int get_program_sort_event(const char *str)
+{
+       char evt = str[0];
+
+       switch (evt) {
+       case 'N':
+               per_process_stats_event = SORT_PROG_EVENT_N;
+               break;
+       case 'Q':
+               per_process_stats_event = SORT_PROG_EVENT_QKB;
+               break;
+       case 'q':
+               per_process_stats_event = SORT_PROG_EVENT_QIO;
+               break;
+       case 'R':
+               per_process_stats_event = SORT_PROG_EVENT_RKB;
+               break;
+       case 'r':
+               per_process_stats_event = SORT_PROG_EVENT_RIO;
+               break;
+       case 'W':
+               per_process_stats_event = SORT_PROG_EVENT_WKB;
+               break;
+       case 'w':
+               per_process_stats_event = SORT_PROG_EVENT_WIO;
+               break;
+       case 'C':
+               per_process_stats_event = SORT_PROG_EVENT_CKB;
+               break;
+       case 'c':
+               per_process_stats_event = SORT_PROG_EVENT_CIO;
+               break;
+       default:
+               return 1;
+       }
+
+       return 0;
+}
+
+#define S_OPTS  "a:A:b:D:d:f:F:hi:o:OqsS:tw:vVM"
 static char usage_str[] =    "\n\n" \
        "-i <file>           | --input=<file>\n" \
        "[ -a <action field> | --act-mask=<action field> ]\n" \
@@ -2443,10 +2891,14 @@ static char usage_str[] =    "\n\n" \
        "[ -O                | --no-text-output ]\n" \
        "[ -q                | --quiet ]\n" \
        "[ -s                | --per-program-stats ]\n" \
+       "[ -S <event>        | --sort-program-stats=<event> ]\n" \
        "[ -t                | --track-ios ]\n" \
        "[ -w <time>         | --stopwatch=<time> ]\n" \
+       "[ -M                | --no-msgs\n" \
        "[ -v                | --verbose ]\n" \
        "[ -V                | --version ]\n\n" \
+       "\t-a Only trace specified actions. See documentation\n" \
+       "\t-A Give trace mask as a single value. See documentation\n" \
        "\t-b stdin read batching\n" \
        "\t-d Output file. If specified, binary data is written to file\n" \
        "\t-D Directory to prepend to input file names\n" \
@@ -2459,16 +2911,22 @@ static char usage_str[] =    "\n\n" \
        "\t-O Do NOT output text data\n" \
        "\t-q Quiet. Don't display any stats at the end of the trace\n" \
        "\t-s Show per-program io statistics\n" \
+       "\t-S Show per-program io statistics sorted by N/Q/q/R/r/W/w/C/c\n" \
+       "\t   N:Name, Q/q:Queued(read & write), R/r:Queued Read, W/w:Queued Write, C/c:Complete.\n" \
+       "\t   Sort programs by how much data(KB): Q,R,W,C.\n" \
+       "\t   Sort programs by how many IO operations: q,r,w,c.\n" \
+       "\t   if -S was used, the -s parameter will be ignored.\n" \
        "\t-t Track individual ios. Will tell you the time a request took\n" \
        "\t   to get queued, to get dispatched, and to get completed\n" \
        "\t-w Only parse data between the given time interval in seconds.\n" \
        "\t   If 'start' isn't given, blkparse defaults the start time to 0\n" \
+       "\t-M Do not output messages to binary file\n" \
        "\t-v More verbose for marginal errors\n" \
        "\t-V Print program version info\n\n";
 
 static void usage(char *prog)
 {
-       fprintf(stderr, "Usage: %s %s %s", prog, blkparse_version, usage_str);
+       fprintf(stderr, "Usage: %s %s", prog, usage_str);
 }
 
 int main(int argc, char *argv[])
@@ -2501,9 +2959,10 @@ int main(int argc, char *argv[])
                        act_mask_tmp = i;
                        break;
                case 'i':
-                       if (!strcmp(optarg, "-") && !pipeline)
+                       if (is_pipe(optarg) && !pipeline) {
                                pipeline = 1;
-                       else if (resize_devices(optarg) != 0)
+                               pipename = strdup(optarg);
+                       } else if (resize_devices(optarg) != 0)
                                return 1;
                        break;
                case 'D':
@@ -2523,6 +2982,11 @@ int main(int argc, char *argv[])
                case 's':
                        per_process_stats = 1;
                        break;
+               case 'S':
+                       per_process_stats = 1;
+                       if (get_program_sort_event(optarg))
+                               return 1;
+                       break;
                case 't':
                        track_ios = 1;
                        break;
@@ -2552,6 +3016,9 @@ int main(int argc, char *argv[])
                case 'd':
                        dump_binary = optarg;
                        break;
+               case 'M':
+                       bin_output_msgs = 0;
+                       break;
                default:
                        usage(argv[0]);
                        return 1;
@@ -2559,9 +3026,10 @@ int main(int argc, char *argv[])
        }
 
        while (optind < argc) {
-               if (!strcmp(argv[optind], "-") && !pipeline)
+               if (is_pipe(argv[optind]) && !pipeline) {
                        pipeline = 1;
-               else if (resize_devices(argv[optind]) != 0)
+                       pipename = strdup(argv[optind]);
+               } else if (resize_devices(argv[optind]) != 0)
                        return 1;
                optind++;
        }
@@ -2587,7 +3055,7 @@ int main(int argc, char *argv[])
                        ofp = fdopen(STDOUT_FILENO, "w");
                        mode = _IOLBF;
                } else {
-                       char ofname[128];
+                       char ofname[PATH_MAX];
 
                        snprintf(ofname, sizeof(ofname) - 1, "%s", output_name);
                        ofp = fopen(ofname, "w");
@@ -2607,11 +3075,15 @@ int main(int argc, char *argv[])
        }
 
        if (dump_binary) {
-               dump_fp = fopen(dump_binary, "w");
-               if (!dump_fp) {
-                       perror(dump_binary);
-                       dump_binary = NULL;
-                       return 1;
+               if (!strcmp(dump_binary, "-"))
+                       dump_fp = stdout;
+               else {
+                       dump_fp = fopen(dump_binary, "w");
+                       if (!dump_fp) {
+                               perror(dump_binary);
+                               dump_binary = NULL;
+                               return 1;
+                       }
                }
                bin_ofp_buffer = malloc(128 * 1024);
                if (setvbuf(dump_fp, bin_ofp_buffer, _IOFBF, 128 * 1024)) {
@@ -2621,11 +3093,17 @@ int main(int argc, char *argv[])
        }
 
        if (pipeline)
-               ret = do_stdin();
+               ret = do_fifo();
        else
                ret = do_file();
 
-       show_stats();
+       if (!ret)
+               show_stats();
+
+       if (have_drv_data && !dump_binary)
+               printf("\ndiscarded traces containing low-level device driver "
+                      "specific data (only available in binary output)\n");
+
        if (ofp_buffer) {
                fflush(ofp);
                free(ofp_buffer);