2 * block queue tracing parse application
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 #include <sys/types.h>
36 #define SECONDS(x) ((unsigned long long)(x) / 1000000000)
37 #define NANO_SECONDS(x) ((unsigned long long)(x) % 1000000000)
40 static unsigned long long genesis_time, last_reported_time;
52 unsigned long qreads, qwrites, creads, cwrites, mreads, mwrites;
53 unsigned long ireads, iwrites;
54 unsigned long long qread_kb, qwrite_kb, cread_kb, cwrite_kb;
55 unsigned long long iread_kb, iwrite_kb;
58 #define S_OPTS "i:o:b:"
59 static struct option l_opts[] = {
86 static struct rb_root rb_root;
89 struct blk_io_trace *bit;
90 struct rb_node rb_node;
94 static struct per_cpu_info *per_cpu_info;
96 static unsigned long long events;
98 static char *dev, *output_name;
100 #define RB_BATCH_DEFAULT (1024)
101 static int rb_batch = RB_BATCH_DEFAULT;
103 #define is_done() (*(volatile int *)(&done))
104 static volatile int done;
106 static void resize_cpu_info(int cpuid)
108 int new_space, new_max = cpuid + 1;
111 per_cpu_info = realloc(per_cpu_info, new_max * sizeof(*per_cpu_info));
113 fprintf(stderr, "Cannot allocate CPU info -- %d x %d bytes\n",
114 new_max, (int) sizeof(*per_cpu_info));
118 new_start = (char *)per_cpu_info + (max_cpus * sizeof(*per_cpu_info));
119 new_space = (new_max - max_cpus) * sizeof(*per_cpu_info);
120 memset(new_start, 0, new_space);
124 static struct per_cpu_info *get_cpu_info(int cpu)
126 struct per_cpu_info *pci;
129 resize_cpu_info(cpu);
132 * ->cpu might already be set, but just set it unconditionally
134 pci = &per_cpu_info[cpu];
140 static inline void check_time(struct blk_io_trace *bit)
142 unsigned long long this = bit->time;
143 unsigned long long last = last_reported_time;
145 backwards = (this < last) ? 'B' : ' ';
146 last_reported_time = this;
149 static inline void account_m(struct per_cpu_info *pci, int rw,
154 pci->qwrite_kb += bytes >> 10;
157 pci->qread_kb += bytes >> 10;
161 static inline void account_q(struct per_cpu_info *pci, int rw,
166 pci->qwrite_kb += bytes >> 10;
169 pci->qread_kb += bytes >> 10;
173 static inline void account_c(struct per_cpu_info *pci, int rw,
178 pci->cwrite_kb += bytes >> 10;
181 pci->cread_kb += bytes >> 10;
185 static inline void account_i(struct per_cpu_info *pci, int rw,
190 pci->iwrite_kb += bytes >> 10;
193 pci->iread_kb += bytes >> 10;
197 static void output(struct per_cpu_info *pci, char *s)
202 fprintf(pci->ofp, "%s", s);
205 static char hstring[256];
206 static char tstring[256];
208 static inline char *setup_header(struct per_cpu_info *pci,
209 struct blk_io_trace *t, char act)
211 int w = t->action & BLK_TC_ACT(BLK_TC_WRITE);
212 int b = t->action & BLK_TC_ACT(BLK_TC_BARRIER);
213 int s = t->action & BLK_TC_ACT(BLK_TC_SYNC);
228 sprintf(hstring, "%c %3d %15ld %5Lu.%09Lu %5u %c %3s", backwards,
230 (unsigned long)t->sequence, SECONDS(t->time),
231 NANO_SECONDS(t->time), t->pid, act, rwbs);
236 static void log_complete(struct per_cpu_info *pci, struct blk_io_trace *t,
239 sprintf(tstring,"%s %Lu + %u [%d]\n", setup_header(pci, t, act),
240 (unsigned long long)t->sector, t->bytes >> 9, t->error);
241 output(pci, tstring);
244 static void log_queue(struct per_cpu_info *pci, struct blk_io_trace *t,
247 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
248 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
249 output(pci, tstring);
252 static void log_issue(struct per_cpu_info *pci, struct blk_io_trace *t,
255 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
256 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
257 output(pci, tstring);
260 static void log_merge(struct per_cpu_info *pci, struct blk_io_trace *t,
263 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
264 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
265 output(pci, tstring);
268 static void log_generic(struct per_cpu_info *pci, struct blk_io_trace *t,
271 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
272 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
273 output(pci, tstring);
276 static int log_pc(struct per_cpu_info *pci, struct blk_io_trace *t, char act)
281 sprintf(tstring,"%s ", setup_header(pci, t, act));
282 output(pci, tstring);
284 buf = (unsigned char *) t + sizeof(*t);
285 for (i = 0; i < t->pdu_len; i++) {
286 sprintf(tstring,"%02x ", buf[i]);
287 output(pci, tstring);
291 sprintf(tstring,"[%d]\n", t->error);
292 output(pci, tstring);
294 sprintf(tstring,"[%s]\n", t->comm);
295 output(pci, tstring);
300 static int dump_trace_pc(struct blk_io_trace *t, struct per_cpu_info *pci)
304 switch (t->action & 0xffff) {
306 log_generic(pci, t, 'Q');
309 log_generic(pci, t, 'G');
311 case __BLK_TA_SLEEPRQ:
312 log_generic(pci, t, 'S');
314 case __BLK_TA_REQUEUE:
315 log_generic(pci, t, 'R');
318 ret = log_pc(pci, t, 'D');
320 case __BLK_TA_COMPLETE:
324 fprintf(stderr, "Bad pc action %x\n", t->action);
332 static void dump_trace_fs(struct blk_io_trace *t, struct per_cpu_info *pci)
334 int w = t->action & BLK_TC_ACT(BLK_TC_WRITE);
336 switch (t->action & 0xffff) {
338 account_q(pci, w, t->bytes);
339 log_queue(pci, t, 'Q');
341 case __BLK_TA_BACKMERGE:
342 account_m(pci, w, t->bytes);
343 log_merge(pci, t, 'M');
345 case __BLK_TA_FRONTMERGE:
346 account_m(pci, w, t->bytes);
347 log_merge(pci, t, 'F');
350 log_generic(pci, t, 'G');
352 case __BLK_TA_SLEEPRQ:
353 log_generic(pci, t, 'S');
355 case __BLK_TA_REQUEUE:
356 account_c(pci, w, -t->bytes);
357 log_queue(pci, t, 'R');
360 account_i(pci, w, t->bytes);
361 log_issue(pci, t, 'D');
363 case __BLK_TA_COMPLETE:
364 account_c(pci, w, t->bytes);
365 log_complete(pci, t, 'C');
368 fprintf(stderr, "Bad fs action %x\n", t->action);
373 static int dump_trace(struct blk_io_trace *t, struct per_cpu_info *pci)
377 if (output_name && !pci->ofp) {
378 snprintf(pci->ofname, sizeof(pci->ofname) - 1,
379 "%s_log.%d", output_name, pci->cpu);
381 pci->ofp = fopen(pci->ofname, "w");
382 if (pci->ofp == NULL) {
388 if (t->action & BLK_TC_ACT(BLK_TC_PC))
389 ret = dump_trace_pc(t, pci);
391 dump_trace_fs(t, pci);
397 static void dump_pci_stats(struct per_cpu_info *pci)
399 printf(" Reads Queued: %'8lu, %'8LuKiB\t", pci->qreads, pci->qread_kb);
400 printf(" Writes Queued: %'8lu, %'8LuKiB\n", pci->qwrites,pci->qwrite_kb);
402 printf(" Read Dispatches: %'8lu, %'8LuKiB\t", pci->ireads, pci->iread_kb);
403 printf(" Write Dispatches: %'8lu, %'8LuKiB\n", pci->iwrites,pci->iwrite_kb);
404 printf(" Reads Completed: %'8lu, %'8LuKiB\t", pci->creads, pci->cread_kb);
405 printf(" Writes Completed: %'8lu, %'8LuKiB\n", pci->cwrites,pci->cwrite_kb);
406 printf(" Read Merges: %'8lu%8c\t", pci->mreads, ' ');
408 printf(" Write Merges: %'8lu\n", pci->mwrites);
411 static void show_stats(void)
413 struct per_cpu_info foo, *pci;
414 int i, pci_events = 0;
416 memset(&foo, 0, sizeof(foo));
418 for (i = 0; i < max_cpus; i++) {
419 pci = &per_cpu_info[i];
424 foo.qreads += pci->qreads;
425 foo.qwrites += pci->qwrites;
426 foo.creads += pci->creads;
427 foo.cwrites += pci->cwrites;
428 foo.mreads += pci->mreads;
429 foo.mwrites += pci->mwrites;
430 foo.qread_kb += pci->qread_kb;
431 foo.qwrite_kb += pci->qwrite_kb;
432 foo.cread_kb += pci->cread_kb;
433 foo.cwrite_kb += pci->cwrite_kb;
435 printf("CPU%d:\n", i);
440 if (pci_events > 1) {
442 dump_pci_stats(&foo);
445 printf("Events: %'Lu\n", events);
448 static inline int trace_rb_insert(struct trace *t)
450 struct rb_node **p = &rb_root.rb_node;
451 struct rb_node *parent = NULL;
456 __t = rb_entry(parent, struct trace, rb_node);
458 if (t->bit->sequence < __t->bit->sequence)
460 else if (t->bit->sequence > __t->bit->sequence)
463 fprintf(stderr, "sequence alias!\n");
468 rb_link_node(&t->rb_node, parent, p);
469 rb_insert_color(&t->rb_node, &rb_root);
473 #define min(a, b) ((a) < (b) ? (a) : (b))
475 static struct blk_io_trace *find_trace(void *p, unsigned long offset, int nr)
477 unsigned long max_offset = min(offset,nr * sizeof(struct blk_io_trace));
479 struct blk_io_trace *bit;
482 for (off = 0; off < max_offset; off++) {
485 magic = be32_to_cpu(bit->magic);
486 if ((magic & 0xffffff00) == BLK_IO_TRACE_MAGIC)
493 static int sort_entries(void *traces, unsigned long offset, int nr)
495 struct per_cpu_info *pci;
496 struct blk_io_trace *bit;
498 void *start = traces;
501 while (traces - start <= offset - sizeof(*bit)) {
505 bit = find_trace(traces, offset - (traces - start), nr);
509 t = malloc(sizeof(*t));
511 memset(&t->rb_node, 0, sizeof(t->rb_node));
515 if (verify_trace(bit))
518 pci = get_cpu_info(bit->cpu);
521 if (trace_rb_insert(t))
524 traces += sizeof(*bit) + bit->pdu_len;
532 static void free_entries_rb(void)
536 while ((n = rb_first(&rb_root)) != NULL) {
537 struct trace *t = rb_entry(n, struct trace, rb_node);
539 rb_erase(&t->rb_node, &rb_root);
544 static void show_entries_rb(void)
546 struct blk_io_trace *bit;
551 n = rb_first(&rb_root);
556 t = rb_entry(n, struct trace, rb_node);
560 if (cpu > max_cpus) {
561 fprintf(stderr, "CPU number too large (%d)\n", cpu);
565 if (genesis_time == 0)
566 genesis_time = bit->time;
567 bit->time -= genesis_time;
571 if (dump_trace(bit, &per_cpu_info[cpu]))
574 } while ((n = rb_next(n)) != NULL);
577 static int read_data(int fd, void *buffer, int bytes, int block)
579 int ret, bytes_left, fl;
582 fl = fcntl(fd, F_GETFL);
585 fcntl(fd, F_SETFL, fl | O_NONBLOCK);
587 fcntl(fd, F_SETFL, fl & ~O_NONBLOCK);
591 while (bytes_left > 0) {
592 ret = read(fd, p, bytes_left);
608 static int do_file(void)
612 for (i = 0, nfiles = 0;; i++, nfiles++) {
613 struct per_cpu_info *pci;
617 pci = get_cpu_info(i);
620 snprintf(pci->fname, sizeof(pci->fname)-1,"%s_out.%d", dev, i);
621 if (stat(pci->fname, &st) < 0)
626 printf("Processing %s\n", pci->fname);
628 tb = malloc(st.st_size);
630 pci->fd = open(pci->fname, O_RDONLY);
636 if (read_data(pci->fd, tb, st.st_size, 1))
639 if (sort_entries(tb, st.st_size, ~0U) == -1)
643 printf("\t%2d %10s %15d\n", i, pci->fname, pci->nelems);
648 fprintf(stderr, "No files found\n");
656 static void resize_buffer(void **buffer, long *size, long offset)
658 long old_size = *size;
661 *buffer = realloc(*buffer, *size);
662 memset(*buffer + offset, 0, *size - old_size);
665 static int read_sort_events(int fd, void **buffer)
667 long offset, max_offset;
670 max_offset = 128 * sizeof(struct blk_io_trace);
671 *buffer = malloc(max_offset);
676 struct blk_io_trace *t;
679 if (max_offset - offset < sizeof(*t))
680 resize_buffer(buffer, &max_offset, offset);
682 if (read_data(fd, *buffer + offset, sizeof(*t), !events)) {
690 t = *buffer + offset;
691 offset += sizeof(*t);
693 pdu_len = be16_to_cpu(t->pdu_len);
695 if (max_offset - offset < pdu_len)
696 resize_buffer(buffer, &max_offset, offset);
698 if (read_data(fd, *buffer + offset, pdu_len, 1))
705 } while (!is_done() && events < rb_batch);
710 static int do_stdin(void)
715 fd = dup(STDIN_FILENO);
719 events = read_sort_events(fd, &ptr);
723 if (sort_entries(ptr, ~0UL, events) == -1)
735 static void flush_output(void)
739 for (i = 0; i < max_cpus; i++) {
740 struct per_cpu_info *pci = &per_cpu_info[i];
750 static void handle_sigint(int sig)
756 static void usage(char *prog)
758 fprintf(stderr, "Usage: %s -i <name> [-o <output>]\n", prog);
761 int main(int argc, char *argv[])
765 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) != -1) {
768 dev = strdup(optarg);
771 output_name = strdup(optarg);
774 rb_batch = atoi(optarg);
776 rb_batch = RB_BATCH_DEFAULT;
789 memset(&rb_root, 0, sizeof(rb_root));
791 signal(SIGINT, handle_sigint);
792 signal(SIGHUP, handle_sigint);
793 signal(SIGTERM, handle_sigint);
795 setlocale(LC_NUMERIC, "en_US");
797 if (!strcmp(dev, "-"))