[PATCH] blkparse: Add option to print missing entries
[blktrace.git] / blkparse.c
index 93559261ec9b6f2760461de64e9c2a3f450a8cc1..a1ebcd2a54b3985abf33fc926b058592dfde0e93 100644 (file)
@@ -38,7 +38,7 @@
 static char blkparse_version[] = "0.90";
 
 struct per_dev_info {
-       dev_t id;
+       dev_t dev;
        char *name;
 
        int backwards;
@@ -49,6 +49,11 @@ struct per_dev_info {
        unsigned long last_sequence;
        unsigned long skips;
 
+       struct rb_root rb_last;
+       unsigned long rb_last_entries;
+
+       struct rb_root rb_track;
+
        int nfiles;
        int ncpus;
        struct per_cpu_info *cpus;
@@ -76,7 +81,7 @@ static struct per_process_info *ppi_hash_table[PPI_HASH_SIZE];
 static struct per_process_info *ppi_list;
 static int ppi_list_entries;
 
-#define S_OPTS "i:o:b:stqw:f:F:vn"
+#define S_OPTS "i:o:b:stqw:f:F:vnm"
 static struct option l_opts[] = {
        {
                .name = "input",
@@ -138,6 +143,12 @@ static struct option l_opts[] = {
                .flag = NULL,
                .val = 'n'
        },
+       {
+               .name = "missing",
+               .has_arg = no_argument,
+               .flag = NULL,
+               .val = 'm'
+       },
        {
                .name = "version",
                .has_arg = no_argument,
@@ -153,14 +164,11 @@ struct trace {
        struct blk_io_trace *bit;
        struct rb_node rb_node;
        struct trace *next;
-       int skipped;
 };
 
 static struct rb_root rb_sort_root;
 static unsigned long rb_sort_entries;
 
-static struct rb_root rb_track_root;
-
 static struct trace *trace_list;
 
 /*
@@ -175,7 +183,6 @@ static struct trace *t_alloc_list;
 struct io_track {
        struct rb_node rb_node;
 
-       dev_t device;
        __u64 sector;
        __u32 pid;
        char comm[16];
@@ -194,15 +201,20 @@ static char *output_name;
 
 static unsigned long long genesis_time;
 static unsigned long long last_allowed_time;
+static unsigned int smallest_seq_read;
 static unsigned long long stopwatch_start;     /* start from zero by default */
 static unsigned long long stopwatch_end = ULONG_LONG_MAX;      /* "infinity" */
 
 static int per_process_stats;
 static int track_ios;
 static int ppi_hash_by_pid = 1;
+static int print_missing;
+
+static unsigned int t_alloc_cache;
+static unsigned int bit_alloc_cache;
 
 #define RB_BATCH_DEFAULT       (512)
-static int rb_batch = RB_BATCH_DEFAULT;
+static unsigned int rb_batch = RB_BATCH_DEFAULT;
 
 static int pipeline;
 
@@ -290,21 +302,28 @@ static struct per_process_info *find_process(__u32 pid, char *name)
        return ppi;
 }
 
-static inline int trace_rb_insert(struct trace *t)
+static inline int trace_rb_insert(struct trace *t, struct rb_root *root,
+                                 int check_time)
 {
-       struct rb_node **p = &rb_sort_root.rb_node;
+       struct rb_node **p = &root->rb_node;
        struct rb_node *parent = NULL;
        struct trace *__t;
 
        while (*p) {
                parent = *p;
+
                __t = rb_entry(parent, struct trace, rb_node);
 
-               if (t->bit->time < __t->bit->time)
-                       p = &(*p)->rb_left;
-               else if (t->bit->time > __t->bit->time)
-                       p = &(*p)->rb_right;
-               else if (t->bit->device < __t->bit->device)
+               if (check_time) {
+                       if (t->bit->time < __t->bit->time) {
+                               p = &(*p)->rb_left;
+                               continue;
+                       } else if (t->bit->time > __t->bit->time) {
+                               p = &(*p)->rb_right;
+                               continue;
+                       }
+               }
+               if (t->bit->device < __t->bit->device)
                        p = &(*p)->rb_left;
                else if (t->bit->device > __t->bit->device)
                        p = &(*p)->rb_right;
@@ -321,53 +340,100 @@ static inline int trace_rb_insert(struct trace *t)
                }
        }
 
-       rb_sort_entries++;
        rb_link_node(&t->rb_node, parent, p);
-       rb_insert_color(&t->rb_node, &rb_sort_root);
+       rb_insert_color(&t->rb_node, root);
        return 0;
 }
 
-static struct trace *trace_rb_find(dev_t device, unsigned long sequence)
+static inline int trace_rb_insert_sort(struct trace *t)
 {
-       struct rb_node **p = &rb_sort_root.rb_node;
-       struct rb_node *parent = NULL;
+       if (!trace_rb_insert(t, &rb_sort_root, 1)) {
+               rb_sort_entries++;
+               return 0;
+       }
+
+       return 1;
+}
+
+static inline int trace_rb_insert_last(struct per_dev_info *pdi,struct trace *t)
+{
+       if (!trace_rb_insert(t, &pdi->rb_last, 1)) {
+               pdi->rb_last_entries++;
+               return 0;
+       }
+
+       return 1;
+}
+
+static struct trace *trace_rb_find(dev_t device, unsigned long sequence,
+                                  struct rb_root *root, int order)
+{
+       struct rb_node *n = root->rb_node;
+       struct rb_node *prev = NULL;
        struct trace *__t;
 
-       while (*p) {
-               parent = *p;
-               __t = rb_entry(parent, struct trace, rb_node);
+       while (n) {
+               __t = rb_entry(n, struct trace, rb_node);
+               prev = n;
 
                if (device < __t->bit->device)
-                       p = &(*p)->rb_left;
+                       n = n->rb_left;
                else if (device > __t->bit->device)
-                       p = &(*p)->rb_right;
+                       n = n->rb_right;
                else if (sequence < __t->bit->sequence)
-                       p = &(*p)->rb_left;
+                       n = n->rb_left;
                else if (sequence > __t->bit->sequence)
-                       p = &(*p)->rb_right;
+                       n = n->rb_right;
                else
                        return __t;
        }
 
+       /*
+        * hack - the list may not be sequence ordered because some
+        * events don't have sequence and time matched. so we end up
+        * being a little off in the rb lookup here, because we don't
+        * know the time we are looking for. compensate by browsing
+        * a little ahead from the last entry to find the match
+        */
+       if (order && prev) {
+               int max = 5;
+
+               while (((n = rb_next(prev)) != NULL) && max--) {
+                       __t = rb_entry(n, struct trace, rb_node);
+                       
+                       if (__t->bit->device == device &&
+                           __t->bit->sequence == sequence)
+                               return __t;
+
+                       prev = n;
+               }
+       }
+                       
        return NULL;
 }
 
-static inline int track_rb_insert(struct io_track *iot)
+static inline struct trace *trace_rb_find_sort(dev_t dev, unsigned long seq)
 {
-       struct rb_node **p = &rb_track_root.rb_node;
+       return trace_rb_find(dev, seq, &rb_sort_root, 1);
+}
+
+static inline struct trace *trace_rb_find_last(struct per_dev_info *pdi,
+                                              unsigned long seq)
+{
+       return trace_rb_find(pdi->dev, seq, &pdi->rb_last, 0);
+}
+
+static inline int track_rb_insert(struct per_dev_info *pdi,struct io_track *iot)
+{
+       struct rb_node **p = &pdi->rb_track.rb_node;
        struct rb_node *parent = NULL;
        struct io_track *__iot;
 
        while (*p) {
                parent = *p;
-
                __iot = rb_entry(parent, struct io_track, rb_node);
 
-               if (iot->device < __iot->device)
-                       p = &(*p)->rb_left;
-               else if (iot->device > __iot->device)
-                       p = &(*p)->rb_right;
-               else if (iot->sector < __iot->sector)
+               if (iot->sector < __iot->sector)
                        p = &(*p)->rb_left;
                else if (iot->sector > __iot->sector)
                        p = &(*p)->rb_right;
@@ -375,35 +441,28 @@ static inline int track_rb_insert(struct io_track *iot)
                        fprintf(stderr,
                                "sector alias (%Lu) on device %d,%d!\n",
                                (unsigned long long) iot->sector,
-                               MAJOR(iot->device), MINOR(iot->device));
+                               MAJOR(pdi->dev), MINOR(pdi->dev));
                        return 1;
                }
        }
 
        rb_link_node(&iot->rb_node, parent, p);
-       rb_insert_color(&iot->rb_node, &rb_track_root);
+       rb_insert_color(&iot->rb_node, &pdi->rb_track);
        return 0;
 }
 
-static struct io_track *__find_track(dev_t device, __u64 sector)
+static struct io_track *__find_track(struct per_dev_info *pdi, __u64 sector)
 {
-       struct rb_node **p = &rb_track_root.rb_node;
-       struct rb_node *parent = NULL;
+       struct rb_node *n = pdi->rb_track.rb_node;
        struct io_track *__iot;
 
-       while (*p) {
-               parent = *p;
-               
-               __iot = rb_entry(parent, struct io_track, rb_node);
+       while (n) {
+               __iot = rb_entry(n, struct io_track, rb_node);
 
-               if (device < __iot->device)
-                       p = &(*p)->rb_left;
-               else if (device > __iot->device)
-                       p = &(*p)->rb_right;
-               else if (sector < __iot->sector)
-                       p = &(*p)->rb_left;
+               if (sector < __iot->sector)
+                       n = n->rb_left;
                else if (sector > __iot->sector)
-                       p = &(*p)->rb_right;
+                       n = n->rb_right;
                else
                        return __iot;
        }
@@ -411,58 +470,60 @@ static struct io_track *__find_track(dev_t device, __u64 sector)
        return NULL;
 }
 
-static struct io_track *find_track(__u32 pid, char *comm, dev_t device,
-                                  __u64 sector)
+static struct io_track *find_track(struct per_dev_info *pdi, __u32 pid,
+                                  char *comm, __u64 sector)
 {
        struct io_track *iot;
 
-       iot = __find_track(device, sector);
+       iot = __find_track(pdi, sector);
        if (!iot) {
                iot = malloc(sizeof(*iot));
                iot->pid = pid;
                memcpy(iot->comm, comm, sizeof(iot->comm));
-               iot->device = device;
                iot->sector = sector;
-               track_rb_insert(iot);
+               track_rb_insert(pdi, iot);
        }
 
        return iot;
 }
 
-static void log_track_frontmerge(struct blk_io_trace *t)
+static void log_track_frontmerge(struct per_dev_info *pdi,
+                                struct blk_io_trace *t)
 {
        struct io_track *iot;
 
        if (!track_ios)
                return;
 
-       iot = __find_track(t->device, t->sector + (t->bytes >> 9));
+       iot = __find_track(pdi, t->sector + (t->bytes >> 9));
        if (!iot) {
-               fprintf(stderr, "failed to find mergeable event\n");
+               fprintf(stderr, "merge not found for (%d,%d): %llu\n",
+                       MAJOR(pdi->dev), MINOR(pdi->dev),
+                       (unsigned long long) t->sector + (t->bytes >> 9));
                return;
        }
 
-       rb_erase(&iot->rb_node, &rb_track_root);
+       rb_erase(&iot->rb_node, &pdi->rb_track);
        iot->sector -= t->bytes >> 9;
-       track_rb_insert(iot);
+       track_rb_insert(pdi, iot);
 }
 
-static void log_track_getrq(struct blk_io_trace *t)
+static void log_track_getrq(struct per_dev_info *pdi, struct blk_io_trace *t)
 {
        struct io_track *iot;
 
        if (!track_ios)
                return;
 
-       iot = find_track(t->pid, t->comm, t->device, t->sector);
+       iot = find_track(pdi, t->pid, t->comm, t->sector);
        iot->allocation_time = t->time;
 }
 
-
 /*
  * return time between rq allocation and insertion
  */
-static unsigned long long log_track_insert(struct blk_io_trace *t)
+static unsigned long long log_track_insert(struct per_dev_info *pdi,
+                                          struct blk_io_trace *t)
 {
        unsigned long long elapsed;
        struct io_track *iot;
@@ -470,8 +531,12 @@ static unsigned long long log_track_insert(struct blk_io_trace *t)
        if (!track_ios)
                return -1;
 
-       iot = find_track(t->pid, t->comm, t->device, t->sector);
+       iot = find_track(pdi, t->pid, t->comm, t->sector);
        iot->queue_time = t->time;
+
+       if (!iot->allocation_time)
+               return -1;
+
        elapsed = iot->queue_time - iot->allocation_time;
 
        if (per_process_stats) {
@@ -488,7 +553,8 @@ static unsigned long long log_track_insert(struct blk_io_trace *t)
 /*
  * return time between queue and issue
  */
-static unsigned long long log_track_issue(struct blk_io_trace *t)
+static unsigned long long log_track_issue(struct per_dev_info *pdi,
+                                         struct blk_io_trace *t)
 {
        unsigned long long elapsed;
        struct io_track *iot;
@@ -498,9 +564,11 @@ static unsigned long long log_track_issue(struct blk_io_trace *t)
        if ((t->action & BLK_TC_ACT(BLK_TC_FS)) == 0)
                return -1;
 
-       iot = __find_track(t->device, t->sector);
+       iot = __find_track(pdi, t->sector);
        if (!iot) {
-               fprintf(stderr, "failed to find issue event\n");
+               fprintf(stderr, "issue not found for (%d,%d): %llu\n",
+                       MAJOR(pdi->dev), MINOR(pdi->dev),
+                       (unsigned long long) t->sector);
                return -1;
        }
 
@@ -521,7 +589,8 @@ static unsigned long long log_track_issue(struct blk_io_trace *t)
 /*
  * return time between dispatch and complete
  */
-static unsigned long long log_track_complete(struct blk_io_trace *t)
+static unsigned long long log_track_complete(struct per_dev_info *pdi,
+                                            struct blk_io_trace *t)
 {
        unsigned long long elapsed;
        struct io_track *iot;
@@ -531,9 +600,11 @@ static unsigned long long log_track_complete(struct blk_io_trace *t)
        if ((t->action & BLK_TC_ACT(BLK_TC_FS)) == 0)
                return -1;
 
-       iot = __find_track(t->device, t->sector);
+       iot = __find_track(pdi, t->sector);
        if (!iot) {
-               fprintf(stderr, "failed to find complete event\n");
+               fprintf(stderr, "complete not found for (%d,%d): %llu\n",
+                       MAJOR(pdi->dev), MINOR(pdi->dev),
+                       (unsigned long long) t->sector);
                return -1;
        }
 
@@ -551,7 +622,7 @@ static unsigned long long log_track_complete(struct blk_io_trace *t)
        /*
         * kill the trace, we don't need it after completion
         */
-       rb_erase(&iot->rb_node, &rb_track_root);
+       rb_erase(&iot->rb_node, &pdi->rb_track);
        free(iot);
 
        return elapsed;
@@ -627,25 +698,27 @@ static int resize_devices(char *name)
        return 0;
 }
 
-static struct per_dev_info *get_dev_info(dev_t id)
+static struct per_dev_info *get_dev_info(dev_t dev)
 {
        struct per_dev_info *pdi;
        int i;
 
        for (i = 0; i < ndevices; i++) {
-               if (!devices[i].id)
-                       devices[i].id = id;
-               if (devices[i].id == id)
+               if (!devices[i].dev)
+                       devices[i].dev = dev;
+               if (devices[i].dev == dev)
                        return &devices[i];
        }
 
-       if (resize_devices(NULL) != 0)
+       if (resize_devices(NULL))
                return NULL;
 
        pdi = &devices[ndevices - 1];
-       pdi->id = id;
-       pdi->last_sequence = 0;
+       pdi->dev = dev;
+       pdi->last_sequence = -1;
        pdi->last_read_time = 0;
+       memset(&pdi->rb_last, 0, sizeof(pdi->rb_last));
+       pdi->rb_last_entries = 0;
        return pdi;
 }
 
@@ -654,7 +727,7 @@ static char *get_dev_name(struct per_dev_info *pdi, char *buffer, int size)
        if (pdi->name)
                snprintf(buffer, size, "%s", pdi->name);
        else
-               snprintf(buffer, size, "%d,%d", MAJOR(pdi->id), MINOR(pdi->id));
+               snprintf(buffer, size, "%d,%d",MAJOR(pdi->dev),MINOR(pdi->dev));
        return buffer;
 }
 
@@ -782,16 +855,16 @@ static inline void account_unplug(struct blk_io_trace *t,
        }
 }
 
-static void log_complete(struct per_cpu_info *pci, struct blk_io_trace *t,
-                        char *act)
+static void log_complete(struct per_dev_info *pdi, struct per_cpu_info *pci,
+                        struct blk_io_trace *t, char *act)
 {
-       process_fmt(act, pci, t, log_track_complete(t), 0, NULL);
+       process_fmt(act, pci, t, log_track_complete(pdi, t), 0, NULL);
 }
 
-static void log_insert(struct per_cpu_info *pci, struct blk_io_trace *t,
-                     char *act)
+static void log_insert(struct per_dev_info *pdi, struct per_cpu_info *pci,
+                      struct blk_io_trace *t, char *act)
 {
-       process_fmt(act, pci, t, log_track_insert(t), 0, NULL);
+       process_fmt(act, pci, t, log_track_insert(pdi, t), 0, NULL);
 }
 
 static void log_queue(struct per_cpu_info *pci, struct blk_io_trace *t,
@@ -800,17 +873,17 @@ static void log_queue(struct per_cpu_info *pci, struct blk_io_trace *t,
        process_fmt(act, pci, t, -1, 0, NULL);
 }
 
-static void log_issue(struct per_cpu_info *pci, struct blk_io_trace *t,
-                     char *act)
+static void log_issue(struct per_dev_info *pdi, struct per_cpu_info *pci,
+                     struct blk_io_trace *t, char *act)
 {
-       process_fmt(act, pci, t, log_track_issue(t), 0, NULL);
+       process_fmt(act, pci, t, log_track_issue(pdi, t), 0, NULL);
 }
 
-static void log_merge(struct per_cpu_info *pci, struct blk_io_trace *t,
-                     char *act)
+static void log_merge(struct per_dev_info *pdi, struct per_cpu_info *pci,
+                     struct blk_io_trace *t, char *act)
 {
        if (act[0] == 'F')
-               log_track_frontmerge(t);
+               log_track_frontmerge(pdi, t);
 
        process_fmt(act, pci, t, -1ULL, 0, NULL);
 }
@@ -878,7 +951,8 @@ static void dump_trace_pc(struct blk_io_trace *t, struct per_cpu_info *pci)
        }
 }
 
-static void dump_trace_fs(struct blk_io_trace *t, struct per_cpu_info *pci)
+static void dump_trace_fs(struct blk_io_trace *t, struct per_dev_info *pdi,
+                         struct per_cpu_info *pci)
 {
        int w = t->action & BLK_TC_ACT(BLK_TC_WRITE);
        int act = t->action & 0xffff;
@@ -889,18 +963,18 @@ static void dump_trace_fs(struct blk_io_trace *t, struct per_cpu_info *pci)
                        log_queue(pci, t, "Q");
                        break;
                case __BLK_TA_INSERT:
-                       log_insert(pci, t, "I");
+                       log_insert(pdi, pci, t, "I");
                        break;
                case __BLK_TA_BACKMERGE:
                        account_m(t, pci, w);
-                       log_merge(pci, t, "M");
+                       log_merge(pdi, pci, t, "M");
                        break;
                case __BLK_TA_FRONTMERGE:
                        account_m(t, pci, w);
-                       log_merge(pci, t, "F");
+                       log_merge(pdi, pci, t, "F");
                        break;
                case __BLK_TA_GETRQ:
-                       log_track_getrq(t);
+                       log_track_getrq(pdi, t);
                        log_generic(pci, t, "G");
                        break;
                case __BLK_TA_SLEEPRQ:
@@ -912,11 +986,11 @@ static void dump_trace_fs(struct blk_io_trace *t, struct per_cpu_info *pci)
                        break;
                case __BLK_TA_ISSUE:
                        account_issue(t, pci, w);
-                       log_issue(pci, t, "D");
+                       log_issue(pdi, pci, t, "D");
                        break;
                case __BLK_TA_COMPLETE:
                        account_c(t, pci, w, t->bytes);
-                       log_complete(pci, t, "C");
+                       log_complete(pdi, pci, t, "C");
                        break;
                case __BLK_TA_PLUG:
                        log_action(pci, t, "P");
@@ -947,7 +1021,7 @@ static void dump_trace(struct blk_io_trace *t, struct per_cpu_info *pci,
        if (t->action & BLK_TC_ACT(BLK_TC_PC))
                dump_trace_pc(t, pci);
        else
-               dump_trace_fs(t, pci);
+               dump_trace_fs(t, pdi, pci);
 
        pdi->events++;
 }
@@ -1116,8 +1190,12 @@ static void show_device_and_cpu_stats(void)
  */
 static inline void t_free(struct trace *t)
 {
-       t->next = t_alloc_list;
-       t_alloc_list = t;
+       if (t_alloc_cache < 1024) {
+               t->next = t_alloc_list;
+               t_alloc_list = t;
+               t_alloc_cache++;
+       } else
+               free(t);
 }
 
 static inline struct trace *t_alloc(void)
@@ -1126,6 +1204,7 @@ static inline struct trace *t_alloc(void)
 
        if (t) {
                t_alloc_list = t->next;
+               t_alloc_cache--;
                return t;
        }
 
@@ -1134,11 +1213,15 @@ static inline struct trace *t_alloc(void)
 
 static inline void bit_free(struct blk_io_trace *bit)
 {
-       /*
-        * abuse a 64-bit field for a next pointer for the free item
-        */
-       bit->time = (__u64) (unsigned long) bit_alloc_list;
-       bit_alloc_list = (struct blk_io_trace *) bit;
+       if (bit_alloc_cache < 1024) {
+               /*
+                * abuse a 64-bit field for a next pointer for the free item
+                */
+               bit->time = (__u64) (unsigned long) bit_alloc_list;
+               bit_alloc_list = (struct blk_io_trace *) bit;
+               bit_alloc_cache++;
+       } else
+               free(bit);
 }
 
 static inline struct blk_io_trace *bit_alloc(void)
@@ -1148,6 +1231,7 @@ static inline struct blk_io_trace *bit_alloc(void)
        if (bit) {
                bit_alloc_list = (struct blk_io_trace *) (unsigned long) \
                                 bit->time;
+               bit_alloc_cache--;
                return bit;
        }
 
@@ -1176,37 +1260,105 @@ static inline int check_stopwatch(struct blk_io_trace *bit)
        return 1;
 }
 
-static int sort_entries(void)
+/*
+ * return youngest entry read
+ */
+static int sort_entries(unsigned long long *youngest)
 {
        struct trace *t;
-       int nr = 0;
 
        if (!genesis_time)
                find_genesis();
 
+       *youngest = 0;
        while ((t = trace_list) != NULL) {
                struct blk_io_trace *bit = t->bit;
 
                trace_list = t->next;
 
-               if (verify_trace(bit))
-                       continue;
-
                bit->time -= genesis_time;
 
+               if (bit->time < *youngest || !*youngest)
+                       *youngest = bit->time;
+
                if (check_stopwatch(bit)) {
                        bit_free(bit);
                        t_free(t);
                        continue;
                }
 
-               if (trace_rb_insert(t))
-                       break;
+               if (trace_rb_insert_sort(t))
+                       return -1;
 
-               nr++;
+               if (bit->sequence < smallest_seq_read)
+                       smallest_seq_read = bit->sequence;
        }
 
-       return nr;
+       return 0;
+}
+
+static inline void __put_trace_last(struct per_dev_info *pdi, struct trace *t)
+{
+       rb_erase(&t->rb_node, &pdi->rb_last);
+       pdi->rb_last_entries--;
+
+       bit_free(t->bit);
+       t_free(t);
+}
+
+static void put_trace(struct per_dev_info *pdi, struct trace *t)
+{
+       rb_erase(&t->rb_node, &rb_sort_root);
+       rb_sort_entries--;
+
+       trace_rb_insert_last(pdi, t);
+
+       if (pdi->rb_last_entries > rb_batch * pdi->nfiles) {
+               struct rb_node *n = rb_first(&pdi->rb_last);
+
+               t = rb_entry(n, struct trace, rb_node);
+               __put_trace_last(pdi, t);
+       }
+}
+
+static int check_sequence(struct per_dev_info *pdi, struct blk_io_trace *bit,
+                         int force)
+{
+       unsigned long expected_sequence = pdi->last_sequence + 1;
+       struct trace *t;
+       
+       /*
+        * first entry, always ok
+        */
+       if (!expected_sequence)
+               return 0;
+
+       if (bit->sequence == expected_sequence)
+               return 0;
+
+       /*
+        * we may not have seen that sequence yet. if we are not doing
+        * the final run, break and wait for more entries.
+        */
+       if (expected_sequence < smallest_seq_read) {
+               t = trace_rb_find_last(pdi, expected_sequence);
+               if (!t)
+                       goto skip;
+
+               __put_trace_last(pdi, t);
+               return 0;
+       } else if (!force)
+               return 1;
+       else {
+skip:
+               if (print_missing) {
+                       fprintf(stderr, "(%d,%d): skipping %lu -> %u\n",
+                               MAJOR(pdi->dev), MINOR(pdi->dev),
+                               pdi->last_sequence, bit->sequence);
+               }
+               pdi->skips++;
+               return 0;
+       }
 }
 
 static void show_entries_rb(int force)
@@ -1217,15 +1369,20 @@ static void show_entries_rb(int force)
        struct rb_node *n;
        struct trace *t;
 
-       while ((n = rb_first(&rb_sort_root)) != NULL) {
+       if (force) {
+               n = rb_first(&rb_sort_root);
+               t = rb_entry(n, struct trace, rb_node);
+               fprintf(stderr, "first force %u\n", t->bit->sequence);
+       }
 
+       while ((n = rb_first(&rb_sort_root)) != NULL) {
                if (done)
                        break;
 
                t = rb_entry(n, struct trace, rb_node);
                bit = t->bit;
 
-               if (!pdi || pdi->id != bit->device)
+               if (!pdi || pdi->dev != bit->device)
                        pdi = get_dev_info(bit->device);
 
                if (!pdi) {
@@ -1234,38 +1391,9 @@ static void show_entries_rb(int force)
                        break;
                }
 
-               if (bit->cpu > pdi->ncpus) {
-                       fprintf(stderr, "Unknown CPU ID? (%d, device %d,%d)\n",
-                               bit->cpu, MAJOR(bit->device),
-                               MINOR(bit->device));
+               if (check_sequence(pdi, bit, force))
                        break;
-               }
-
-               /*
-                * back off displaying more info if we are out of sync
-                * on SMP systems. to prevent stalling on lost events,
-                * only allow an event to skip us a few times
-                */
-               if (bit->sequence > (pdi->last_sequence + 1) && !force) {
-                       struct trace *__t;
-
-                       /*
-                        * the wanted sequence is really there, continue
-                        * because this means that the log time is earlier
-                        * on the trace we have now
-                        */
-                       __t = trace_rb_find(pdi->id, pdi->last_sequence + 1);
-                       if (__t)
-                               goto ok;
-
-                       if (t->skipped < 5) {
-                               t->skipped++;
-                               break;
-                       } else
-                               pdi->skips++;
-               }
 
-ok:
                if (!force && bit->time > last_allowed_time)
                        break;
 
@@ -1278,10 +1406,7 @@ ok:
 
                dump_trace(bit, pci, pdi);
 
-               rb_erase(&t->rb_node, &rb_sort_root);
-               rb_sort_entries--;
-               bit_free(bit);
-               t_free(t);
+               put_trace(pdi, t);
        }
 }
 
@@ -1320,7 +1445,7 @@ static int read_data(int fd, void *buffer, int bytes, int block)
 static int read_events(int fd, int always_block)
 {
        struct per_dev_info *pdi = NULL;
-       int events = 0;
+       unsigned int events = 0;
 
        while (!is_done() && events < rb_batch) {
                struct blk_io_trace *bit;
@@ -1349,16 +1474,21 @@ static int read_events(int fd, int always_block)
                        bit = ptr;
                }
 
+               trace_to_cpu(bit);
+
+               if (verify_trace(bit)) {
+                       bit_free(bit);
+                       continue;
+               }
+
                t = t_alloc();
                memset(t, 0, sizeof(*t));
                t->bit = bit;
 
-               trace_to_cpu(bit);
-
                t->next = trace_list;
                trace_list = t;
 
-               if (!pdi || pdi->id != bit->device)
+               if (!pdi || pdi->dev != bit->device)
                        pdi = get_dev_info(bit->device);
 
                if (bit->time > pdi->last_read_time)
@@ -1382,7 +1512,7 @@ static int do_file(void)
        for (i = 0; i < ndevices; i++) {
                pdi = &devices[i];
                pdi->nfiles = 0;
-               pdi->last_sequence = 0;
+               pdi->last_sequence = -1;
 
                for (j = 0;; j++) {
                        struct stat st;
@@ -1412,8 +1542,11 @@ static int do_file(void)
         * now loop over the files reading in the data
         */
        do {
+               unsigned long long youngest;
+
                events_added = 0;
                last_allowed_time = -1ULL;
+               smallest_seq_read = -1U;
 
                for (i = 0; i < ndevices; i++) {
                        pdi = &devices[i];
@@ -1439,20 +1572,14 @@ static int do_file(void)
                        }
                }
 
-               if (sort_entries() == -1)
+               if (sort_entries(&youngest))
                        break;
 
-               show_entries_rb(0);
-
-               /*
-                * we check this a little late, since we want to make
-                * sure that all files have had a chance to be read
-                * before deciding there are definitely no further
-                * entries before the stop time
-                */
-               if (last_allowed_time > stopwatch_end)
+               if (youngest > stopwatch_end)
                        break;
 
+               show_entries_rb(0);
+
        } while (events_added);
 
        if (rb_sort_entries)
@@ -1463,6 +1590,7 @@ static int do_file(void)
 
 static int do_stdin(void)
 {
+       unsigned long long youngest;
        int fd;
 
        last_allowed_time = -1ULL;
@@ -1474,7 +1602,10 @@ static int do_stdin(void)
                if (!events)
                        break;
        
-               if (sort_entries() == -1)
+               if (sort_entries(&youngest))
+                       break;
+
+               if (youngest > stopwatch_end)
                        break;
 
                show_entries_rb(0);
@@ -1492,7 +1623,7 @@ static void flush_output(void)
        fflush(ofp);
 }
 
-static void handle_sigint(int sig)
+static void handle_sigint(__attribute__((__unused__)) int sig)
 {
        done = 1;
        flush_output();
@@ -1552,6 +1683,7 @@ static char usage_str[] = \
        "\t -f Output format. Customize the output format. The format field\n" \
        "\t    identifies can be found in the documentation\n" \
        "\t-F Format specification. Can be found in the documentation\n" \
+       "\t-m Print missing entries\n" \
        "\t-v Print program version info\n\n";
 
 static void usage(char *prog)
@@ -1604,6 +1736,9 @@ int main(int argc, char *argv[])
                case 'n':
                        ppi_hash_by_pid = 0;
                        break;
+               case 'm':
+                       print_missing = 1;
+                       break;
                case 'v':
                        printf("%s version %s\n", argv[0], blkparse_version);
                        return 0;
@@ -1627,7 +1762,6 @@ int main(int argc, char *argv[])
        }
 
        memset(&rb_sort_root, 0, sizeof(rb_sort_root));
-       memset(&rb_track_root, 0, sizeof(rb_track_root));
 
        signal(SIGINT, handle_sigint);
        signal(SIGHUP, handle_sigint);