unsigned long long events;
unsigned long long last_reported_time;
struct io_stats io_stats;
+ unsigned long last_sequence;
int ncpus;
struct per_cpu_info *cpus;
}
};
-static struct rb_root rb_sort_root;
-static struct rb_root rb_track_root;
-
/*
* for sorting the displayed output
*/
struct trace {
struct blk_io_trace *bit;
struct rb_node rb_node;
+ struct trace *next;
+ int skipped;
};
+static struct rb_root rb_sort_root;
+static struct rb_root rb_track_root;
+
+static struct trace *trace_list;
+
/*
* for tracking individual ios
*/
* this can happen if we lose events, so don't print an error
*/
iot = __find_track(t->device, t->sector - (t->bytes >> 10));
- if (iot) {
- rb_erase(&iot->rb_node, &rb_track_root);
- iot->sector -= t->bytes >> 10;
- track_rb_insert(iot);
+ if (!iot) {
+ fprintf(stderr, "failed to find mergeable event\n");
+ return;
}
+
+ rb_erase(&iot->rb_node, &rb_track_root);
+ iot->sector -= t->bytes >> 10;
+ track_rb_insert(iot);
}
static void log_track_getrq(struct blk_io_trace *t)
* this can happen if we lose events, so don't print an error
*/
iot = __find_track(t->device, t->sector);
- if (!iot)
+ if (!iot) {
+ fprintf(stderr, "failed to find issue event\n");
return -1;
+ }
iot->dispatch_time = t->time;
elapsed = iot->dispatch_time - iot->queue_time;
return -1;
iot = __find_track(t->device, t->sector);
- if (!iot)
+ if (!iot) {
+ fprintf(stderr, "failed to find complete event\n");
return -1;
+ }
iot->completion_time = t->time;
elapsed = iot->completion_time - iot->dispatch_time;
pdi->ncpus = new_count;
pdi->cpus = cpus;
}
-
+
static struct per_cpu_info *get_cpu_info(struct per_dev_info *pdi, int cpu)
{
+ struct per_cpu_info *pci;
+
if (cpu >= pdi->ncpus)
resize_cpu_info(pdi, cpu);
- return &pdi->cpus[cpu];
+
+ pci = &pdi->cpus[cpu];
+ pci->cpu = cpu;
+ return pci;
}
return 0;
}
-static struct per_dev_info *get_dev_info(dev_t id, int create)
+static struct per_dev_info *get_dev_info(dev_t id)
{
+ struct per_dev_info *pdi;
int i;
for (i = 0; i < ndevices; i++)
if (devices[i].id == id)
return &devices[i];
- if (!create)
- return NULL;
+
if (resize_devices(NULL) != 0)
return NULL;
- return &devices[ndevices-1];
+
+ pdi = &devices[ndevices - 1];
+ pdi->id = id;
+ return pdi;
}
static char *get_dev_name(struct per_dev_info *pdi, char *buffer, int size)
return buffer;
}
-
static void check_time(struct per_dev_info *pdi, struct blk_io_trace *bit)
{
unsigned long long this = bit->time;
pdi->last_reported_time = this;
}
-
static inline void __account_m(struct io_stats *ios, struct blk_io_trace *t,
int rw)
{
}
}
-static struct blk_io_trace *find_trace(void *p, unsigned long offset, int nr)
+static struct blk_io_trace *find_trace(void *p, unsigned long offset)
{
- unsigned long max_offset = min(offset,nr * sizeof(struct blk_io_trace));
+ unsigned long max_offset = offset;
unsigned long off;
struct blk_io_trace *bit;
__u32 magic;
return NULL;
}
-static int sort_entries(void *traces, unsigned long offset, int nr,
- struct per_dev_info *fpdi, struct per_cpu_info *fpci)
+static int sort_entries(void)
{
struct per_dev_info *pdi;
struct per_cpu_info *pci;
struct blk_io_trace *bit;
struct trace *t;
- void *start = traces;
+ int nr = 0;
- while (traces - start <= offset - sizeof(*bit)) {
- if (!nr)
- break;
+ while ((t = trace_list) != NULL) {
- bit = find_trace(traces, offset - (traces - start), nr);
- if (!bit)
- break;
+ trace_list = t->next;
+ bit = t->bit;
- t = malloc(sizeof(*t));
- if (!t) {
- fprintf(stderr, "Out of memory, seq %d on dev %d,%d\n",
- bit->sequence,
- MAJOR(bit->device), MINOR(bit->device));
- return -1;
- }
- t->bit = bit;
memset(&t->rb_node, 0, sizeof(t->rb_node));
trace_to_cpu(bit);
- if (verify_trace(bit)) {
- free(t);
+ if (verify_trace(bit))
break;
- }
+ if (trace_rb_insert(t))
+ return -1;
- pdi = fpdi ? fpdi : get_dev_info(bit->device, 1);
- pdi->id = bit->device;
- pci = fpci ? fpci : get_cpu_info(pdi, bit->cpu);
- pci->cpu = bit->cpu;
+ pdi = get_dev_info(bit->device);
+ pci = get_cpu_info(pdi, bit->cpu);
pci->nelems++;
- if (trace_rb_insert(t)) {
- free(t);
- return -1;
- }
-
- traces += sizeof(*bit) + bit->pdu_len;
- nr--;
+ nr++;
}
- return 0;
-}
-
-static void free_entries_rb(void)
-{
- struct rb_node *n;
-
- while ((n = rb_first(&rb_sort_root)) != NULL) {
- struct trace *t = rb_entry(n, struct trace, rb_node);
-
- rb_erase(&t->rb_node, &rb_sort_root);
- free(t);
- }
+ return nr;
}
static void show_entries_rb(void)
struct trace *t;
int cpu;
- n = rb_first(&rb_sort_root);
- if (!n)
- return;
+ while ((n = rb_first(&rb_sort_root)) != NULL) {
- do {
t = rb_entry(n, struct trace, rb_node);
bit = t->bit;
- pdi = get_dev_info(bit->device, 0);
+ pdi = get_dev_info(bit->device);
if (!pdi) {
fprintf(stderr, "Unknown device ID? (%d,%d)\n",
MAJOR(bit->device), MINOR(bit->device));
break;
}
+ /*
+ * back off displaying more info if we are out of sync
+ * on SMP systems. to prevent stalling on lost events,
+ * only allow an event to skip us once
+ */
+ if (bit->sequence != (pdi->last_sequence + 1)) {
+ if (!t->skipped) {
+ t->skipped = 1;
+ break;
+ }
+ }
+
+ pdi->last_sequence = bit->sequence;
+
bit->time -= genesis_time;
if (bit->time < stopwatch_start)
continue;
if (dump_trace(bit, &pdi->cpus[cpu], pdi))
break;
- } while ((n = rb_next(n)) != NULL);
+ rb_erase(&t->rb_node, &rb_sort_root);
+ free(bit);
+ free(t);
+ }
}
static int read_data(int fd, void *buffer, int bytes, int block)
return 0;
}
+/*
+ * Find the traces in 'tb' and add them to the list for sorting and
+ * displaying
+ */
+static int find_entries(void *tb, unsigned long size)
+{
+ struct blk_io_trace *bit;
+ struct trace *t;
+ void *start = tb;
+
+ while (tb - start <= size - sizeof(*bit)) {
+ bit = find_trace(tb, size - (tb - start));
+ if (!bit)
+ break;
+
+ t = malloc(sizeof(*t));
+ memset(t, 0, sizeof(*t));
+ t->bit = bit;
+
+ t->next = trace_list;
+ trace_list = t;
+
+ tb += sizeof(*bit) + bit->pdu_len;
+ }
+
+ return 0;
+}
+
static int do_file(void)
{
struct per_dev_info *pdi;
- int i, j, nfiles = 0;
+ int i, j, nfiles = 0, nelems;
for (pdi = devices, i = 0; i < ndevices; i++, pdi++) {
for (j = 0;; j++, nfiles++) {
continue;
}
- if (sort_entries(tb, st.st_size, ~0U, pdi, pci) == -1) {
+ if (find_entries(tb, st.st_size)) {
+ close(pci->fd);
+ free(tb);
+ }
+
+ nelems = sort_entries();
+ if (nelems == -1) {
close(pci->fd);
free(tb);
continue;
}
printf("Completed %s (CPU%d %d, entries)\n",
- pci->fname, j, pci->nelems);
+ pci->fname, j, nelems);
close(pci->fd);
}
}
return 0;
}
-static void resize_buffer(void **buffer, long *size, long offset)
+static int read_sort_events(int fd)
{
- long old_size = *size;
+ int events = 0;
- if (*size == 0)
- *size = 64 * sizeof(struct blk_io_trace);
-
- *size *= 2;
- *buffer = realloc(*buffer, *size);
-
- if (old_size)
- memset(*buffer + offset, 0, *size - old_size);
-}
-
-static int read_sort_events(int fd, void **buffer, long *max_offset)
-{
- long offset;
- int events;
-
- events = offset = 0;
do {
- struct blk_io_trace *t;
+ struct blk_io_trace *bit;
+ struct trace *t;
int pdu_len;
__u32 magic;
- if (*max_offset - offset < sizeof(*t))
- resize_buffer(buffer, max_offset, offset);
+ bit = malloc(sizeof(*bit));
- if (read_data(fd, *buffer + offset, sizeof(*t), !events))
+ if (read_data(fd, bit, sizeof(*bit), !events))
break;
- t = *buffer + offset;
- offset += sizeof(*t);
-
- magic = be32_to_cpu(t->magic);
+ magic = be32_to_cpu(bit->magic);
if ((magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
fprintf(stderr, "Bad magic %x\n", magic);
break;
}
- pdu_len = be16_to_cpu(t->pdu_len);
+ pdu_len = be16_to_cpu(bit->pdu_len);
if (pdu_len) {
- if (*max_offset - offset <= pdu_len)
- resize_buffer(buffer, max_offset, offset);
+ void *ptr = realloc(bit, sizeof(*bit) + pdu_len);
- if (read_data(fd, *buffer + offset, pdu_len, 1))
+ if (read_data(fd, ptr + sizeof(*bit), pdu_len, 1))
break;
- offset += pdu_len;
+ bit = ptr;
}
+ t = malloc(sizeof(*t));
+ memset(t, 0, sizeof(*t));
+ t->bit = bit;
+ t->next = trace_list;
+ trace_list = t;
+
events++;
} while (!is_done() && events < rb_batch);
static int do_stdin(void)
{
int fd;
- void *ptr = NULL;
- long max_offset;
fd = dup(STDIN_FILENO);
- max_offset = 0;
do {
int events;
- events = read_sort_events(fd, &ptr, &max_offset);
+ events = read_sort_events(fd);
if (!events)
break;
- if (sort_entries(ptr, ~0UL, events, NULL, NULL) == -1)
+ if (sort_entries() == -1)
break;
show_entries_rb();
- free_entries_rb();
} while (1);
- if (ptr)
- free(ptr);
-
close(fd);
return 0;
}