2 * block queue tracing parse application
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 #include <sys/types.h>
35 #define SECONDS(x) ((unsigned long long)(x) / 1000000000)
36 #define NANO_SECONDS(x) ((unsigned long long)(x) % 1000000000)
39 static unsigned long long genesis_time, last_reported_time;
51 unsigned long qreads, qwrites, creads, cwrites, mreads, mwrites;
52 unsigned long ireads, iwrites;
53 unsigned long long qread_kb, qwrite_kb, cread_kb, cwrite_kb;
54 unsigned long long iread_kb, iwrite_kb;
58 static struct option l_opts[] = {
79 static struct rb_root rb_root;
82 struct blk_io_trace *bit;
83 struct rb_node rb_node;
87 static struct per_cpu_info *per_cpu_info;
89 static unsigned long long events;
91 static char *dev, *output_name;
93 #define is_done() (*(volatile int *)(&done))
94 static volatile int done;
96 static void resize_cpu_info(int cpuid)
98 int new_space, new_max = cpuid + 1;
101 per_cpu_info = realloc(per_cpu_info, new_max * sizeof(*per_cpu_info));
103 fprintf(stderr, "Cannot allocate CPU info -- %d x %d bytes\n",
104 new_max, (int) sizeof(*per_cpu_info));
108 new_start = (char *)per_cpu_info + (max_cpus * sizeof(*per_cpu_info));
109 new_space = (new_max - max_cpus) * sizeof(*per_cpu_info);
110 memset(new_start, 0, new_space);
114 static struct per_cpu_info *get_cpu_info(int cpu)
116 struct per_cpu_info *pci;
119 resize_cpu_info(cpu);
122 * ->cpu might already be set, but just set it unconditionally
124 pci = &per_cpu_info[cpu];
130 static inline void check_time(struct blk_io_trace *bit)
132 unsigned long long this = bit->time;
133 unsigned long long last = last_reported_time;
135 backwards = (this < last) ? 'B' : ' ';
136 last_reported_time = this;
139 static inline void account_m(struct per_cpu_info *pci, int rw,
144 pci->qwrite_kb += bytes >> 10;
147 pci->qread_kb += bytes >> 10;
151 static inline void account_q(struct per_cpu_info *pci, int rw,
156 pci->qwrite_kb += bytes >> 10;
159 pci->qread_kb += bytes >> 10;
163 static inline void account_c(struct per_cpu_info *pci, int rw,
168 pci->cwrite_kb += bytes >> 10;
171 pci->cread_kb += bytes >> 10;
175 static inline void account_i(struct per_cpu_info *pci, int rw,
180 pci->iwrite_kb += bytes >> 10;
183 pci->iread_kb += bytes >> 10;
187 static void output(struct per_cpu_info *pci, char *s)
192 fprintf(pci->ofp, "%s", s);
195 static char hstring[256];
196 static char tstring[256];
198 static inline char *setup_header(struct per_cpu_info *pci,
199 struct blk_io_trace *t, char act)
201 int w = t->action & BLK_TC_ACT(BLK_TC_WRITE);
202 int b = t->action & BLK_TC_ACT(BLK_TC_BARRIER);
203 int s = t->action & BLK_TC_ACT(BLK_TC_SYNC);
218 sprintf(hstring, "%c %3d %15ld %5Lu.%09Lu %5u %c %3s", backwards,
220 (unsigned long)t->sequence, SECONDS(t->time),
221 NANO_SECONDS(t->time), t->pid, act, rwbs);
226 static void log_complete(struct per_cpu_info *pci, struct blk_io_trace *t,
229 sprintf(tstring,"%s %Lu + %u [%d]\n", setup_header(pci, t, act),
230 (unsigned long long)t->sector, t->bytes >> 9, t->error);
231 output(pci, tstring);
234 static void log_queue(struct per_cpu_info *pci, struct blk_io_trace *t,
237 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
238 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
239 output(pci, tstring);
242 static void log_issue(struct per_cpu_info *pci, struct blk_io_trace *t,
245 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
246 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
247 output(pci, tstring);
250 static void log_merge(struct per_cpu_info *pci, struct blk_io_trace *t,
253 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
254 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
255 output(pci, tstring);
258 static void log_generic(struct per_cpu_info *pci, struct blk_io_trace *t,
261 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
262 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
263 output(pci, tstring);
266 static int log_pc(struct per_cpu_info *pci, struct blk_io_trace *t, char act)
271 sprintf(tstring,"%s ", setup_header(pci, t, act));
272 output(pci, tstring);
274 buf = (unsigned char *) t + sizeof(*t);
275 for (i = 0; i < t->pdu_len; i++) {
276 sprintf(tstring,"%02x ", buf[i]);
277 output(pci, tstring);
281 sprintf(tstring,"[%d]\n", t->error);
282 output(pci, tstring);
284 sprintf(tstring,"[%s]\n", t->comm);
285 output(pci, tstring);
290 static int dump_trace_pc(struct blk_io_trace *t, struct per_cpu_info *pci)
294 switch (t->action & 0xffff) {
296 log_generic(pci, t, 'Q');
299 log_generic(pci, t, 'G');
301 case __BLK_TA_SLEEPRQ:
302 log_generic(pci, t, 'S');
304 case __BLK_TA_REQUEUE:
305 log_generic(pci, t, 'R');
308 ret = log_pc(pci, t, 'D');
310 case __BLK_TA_COMPLETE:
314 fprintf(stderr, "Bad pc action %x\n", t->action);
322 static void dump_trace_fs(struct blk_io_trace *t, struct per_cpu_info *pci)
324 int w = t->action & BLK_TC_ACT(BLK_TC_WRITE);
326 switch (t->action & 0xffff) {
328 account_q(pci, w, t->bytes);
329 log_queue(pci, t, 'Q');
331 case __BLK_TA_BACKMERGE:
332 account_m(pci, w, t->bytes);
333 log_merge(pci, t, 'M');
335 case __BLK_TA_FRONTMERGE:
336 account_m(pci, w, t->bytes);
337 log_merge(pci, t, 'F');
340 log_generic(pci, t, 'G');
342 case __BLK_TA_SLEEPRQ:
343 log_generic(pci, t, 'S');
345 case __BLK_TA_REQUEUE:
346 account_c(pci, w, -t->bytes);
347 log_queue(pci, t, 'R');
350 account_i(pci, w, t->bytes);
351 log_issue(pci, t, 'D');
353 case __BLK_TA_COMPLETE:
354 account_c(pci, w, t->bytes);
355 log_complete(pci, t, 'C');
358 fprintf(stderr, "Bad fs action %x\n", t->action);
363 static int dump_trace(struct blk_io_trace *t, struct per_cpu_info *pci)
367 if (output_name && !pci->ofp) {
368 snprintf(pci->ofname, sizeof(pci->ofname) - 1,
369 "%s_log.%d", output_name, pci->cpu);
371 pci->ofp = fopen(pci->ofname, "w");
372 if (pci->ofp == NULL) {
378 if (t->action & BLK_TC_ACT(BLK_TC_PC))
379 ret = dump_trace_pc(t, pci);
381 dump_trace_fs(t, pci);
387 static void dump_pci_stats(struct per_cpu_info *pci)
389 printf("\tReads:\n");
390 printf("\t\tQueued: %'8lu, %'8LuKiB\n", pci->qreads, pci->qread_kb);
391 printf("\t\tDispatched:%'8lu, %'8LuKiB\n", pci->ireads, pci->iread_kb);
392 printf("\t\tCompleted: %'8lu, %'8LuKiB\n", pci->creads, pci->cread_kb);
393 printf("\t\tMerges: %'8lu\n", pci->mreads);
395 printf("\tWrites:\n");
396 printf("\t\tQueued: %'8lu, %'8LuKiB\n", pci->qwrites,pci->qwrite_kb);
397 printf("\t\tDispatched:%'8lu, %'8LuKiB\n", pci->iwrites,pci->iwrite_kb);
398 printf("\t\tCompleted: %'8lu, %'8LuKiB\n", pci->cwrites,pci->cwrite_kb);
399 printf("\t\tMerges: %'8lu\n", pci->mwrites);
402 static void show_stats(void)
404 struct per_cpu_info foo, *pci;
405 int i, pci_events = 0;
407 memset(&foo, 0, sizeof(foo));
409 for (i = 0; i < max_cpus; i++) {
410 pci = &per_cpu_info[i];
415 foo.qreads += pci->qreads;
416 foo.qwrites += pci->qwrites;
417 foo.creads += pci->creads;
418 foo.cwrites += pci->cwrites;
419 foo.mreads += pci->mreads;
420 foo.mwrites += pci->mwrites;
421 foo.qread_kb += pci->qread_kb;
422 foo.qwrite_kb += pci->qwrite_kb;
423 foo.cread_kb += pci->cread_kb;
424 foo.cwrite_kb += pci->cwrite_kb;
426 printf("CPU%d:\n", i);
431 if (pci_events > 1) {
433 dump_pci_stats(&foo);
436 printf("Events: %'Lu\n", events);
439 static inline int trace_rb_insert(struct trace *t)
441 struct rb_node **p = &rb_root.rb_node;
442 struct rb_node *parent = NULL;
447 __t = rb_entry(parent, struct trace, rb_node);
449 if (t->bit->sequence < __t->bit->sequence)
451 else if (t->bit->sequence > __t->bit->sequence)
454 fprintf(stderr, "sequence alias!\n");
459 rb_link_node(&t->rb_node, parent, p);
460 rb_insert_color(&t->rb_node, &rb_root);
464 static int sort_entries(void *traces, unsigned long offset, int nr)
466 struct per_cpu_info *pci;
467 struct blk_io_trace *bit;
469 void *start = traces;
472 while (traces - start <= offset - sizeof(*bit)) {
478 t = malloc(sizeof(*t));
480 memset(&t->rb_node, 0, sizeof(t->rb_node));
484 if (verify_trace(bit))
487 pci = get_cpu_info(bit->cpu);
490 if (trace_rb_insert(t))
493 traces += sizeof(*bit) + bit->pdu_len;
501 static void free_entries_rb(void)
505 while ((n = rb_first(&rb_root)) != NULL) {
506 struct trace *t = rb_entry(n, struct trace, rb_node);
508 rb_erase(&t->rb_node, &rb_root);
513 static void show_entries_rb(void)
515 struct blk_io_trace *bit;
520 n = rb_first(&rb_root);
525 t = rb_entry(n, struct trace, rb_node);
529 if (cpu > max_cpus) {
530 fprintf(stderr, "CPU number too large (%d)\n", cpu);
534 if (genesis_time == 0)
535 genesis_time = bit->time;
536 bit->time -= genesis_time;
540 if (dump_trace(bit, &per_cpu_info[cpu]))
543 } while ((n = rb_next(n)) != NULL);
546 static int read_data(int fd, void *buffer, int bytes, int block)
548 int ret, bytes_left, fl;
551 fl = fcntl(fd, F_GETFL);
554 fcntl(fd, F_SETFL, fl | O_NONBLOCK);
556 fcntl(fd, F_SETFL, fl & ~O_NONBLOCK);
560 while (bytes_left > 0) {
561 ret = read(fd, p, bytes_left);
577 static int do_file(void)
581 for (i = 0, nfiles = 0;; i++, nfiles++) {
582 struct per_cpu_info *pci;
586 pci = get_cpu_info(i);
589 snprintf(pci->fname, sizeof(pci->fname)-1,"%s_out.%d", dev, i);
590 if (stat(pci->fname, &st) < 0)
595 printf("Processing %s\n", pci->fname);
597 tb = malloc(st.st_size);
599 pci->fd = open(pci->fname, O_RDONLY);
605 if (read_data(pci->fd, tb, st.st_size, 1))
608 if (sort_entries(tb, st.st_size, ~0U) == -1)
612 printf("\t%2d %10s %15d\n", i, pci->fname, pci->nelems);
617 fprintf(stderr, "No files found\n");
625 static void resize_buffer(void **buffer, long *old_size)
627 long cur_size = *old_size;
631 ptr = malloc(*old_size);
632 memcpy(ptr, *buffer, cur_size);
637 static int read_sort_events(int fd, void **buffer)
639 long offset, max_offset;
642 max_offset = 128 * sizeof(struct blk_io_trace);
643 *buffer = malloc(max_offset);
648 struct blk_io_trace *t;
651 if (max_offset - offset < sizeof(*t))
652 resize_buffer(buffer, &max_offset);
654 if (read_data(fd, *buffer + offset, sizeof(*t), !events)) {
662 t = *buffer + offset;
663 offset += sizeof(*t);
665 pdu_len = be16_to_cpu(t->pdu_len);
667 if (max_offset - offset < pdu_len)
668 resize_buffer(buffer, &max_offset);
670 if (read_data(fd, *buffer + offset, pdu_len, 1))
675 } while (!is_done());
680 static int do_stdin(void)
685 fd = dup(STDIN_FILENO);
689 events = read_sort_events(fd, &ptr);
693 sort_entries(ptr, ~0UL, events);
703 static void flush_output(void)
707 for (i = 0; i < max_cpus; i++) {
708 struct per_cpu_info *pci = &per_cpu_info[i];
718 static void handle_sigint(int sig)
724 static void usage(char *prog)
726 fprintf(stderr, "Usage: %s -i <name> [-o <output>]\n", prog);
729 int main(int argc, char *argv[])
733 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) != -1) {
736 dev = strdup(optarg);
739 output_name = strdup(optarg);
752 memset(&rb_root, 0, sizeof(rb_root));
754 signal(SIGINT, handle_sigint);
755 signal(SIGHUP, handle_sigint);
756 signal(SIGTERM, handle_sigint);
758 if (!strcmp(dev, "-"))