2 * block queue tracing parse application
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 #include <sys/types.h>
35 #define SECONDS(x) ((unsigned long long)(x) / 1000000000)
36 #define NANO_SECONDS(x) ((unsigned long long)(x) % 1000000000)
39 static unsigned long long genesis_time, last_reported_time;
51 unsigned long qreads, qwrites, creads, cwrites, mreads, mwrites;
52 unsigned long ireads, iwrites;
53 unsigned long long qread_kb, qwrite_kb, cread_kb, cwrite_kb;
54 unsigned long long iread_kb, iwrite_kb;
57 #define S_OPTS "i:o:b:"
58 static struct option l_opts[] = {
85 static struct rb_root rb_root;
88 struct blk_io_trace *bit;
89 struct rb_node rb_node;
93 static struct per_cpu_info *per_cpu_info;
95 static unsigned long long events;
97 static char *dev, *output_name;
99 #define RB_BATCH_DEFAULT (1024)
100 static int rb_batch = RB_BATCH_DEFAULT;
102 #define is_done() (*(volatile int *)(&done))
103 static volatile int done;
105 static void resize_cpu_info(int cpuid)
107 int new_space, new_max = cpuid + 1;
110 per_cpu_info = realloc(per_cpu_info, new_max * sizeof(*per_cpu_info));
112 fprintf(stderr, "Cannot allocate CPU info -- %d x %d bytes\n",
113 new_max, (int) sizeof(*per_cpu_info));
117 new_start = (char *)per_cpu_info + (max_cpus * sizeof(*per_cpu_info));
118 new_space = (new_max - max_cpus) * sizeof(*per_cpu_info);
119 memset(new_start, 0, new_space);
123 static struct per_cpu_info *get_cpu_info(int cpu)
125 struct per_cpu_info *pci;
128 resize_cpu_info(cpu);
131 * ->cpu might already be set, but just set it unconditionally
133 pci = &per_cpu_info[cpu];
139 static inline void check_time(struct blk_io_trace *bit)
141 unsigned long long this = bit->time;
142 unsigned long long last = last_reported_time;
144 backwards = (this < last) ? 'B' : ' ';
145 last_reported_time = this;
148 static inline void account_m(struct per_cpu_info *pci, int rw,
153 pci->qwrite_kb += bytes >> 10;
156 pci->qread_kb += bytes >> 10;
160 static inline void account_q(struct per_cpu_info *pci, int rw,
165 pci->qwrite_kb += bytes >> 10;
168 pci->qread_kb += bytes >> 10;
172 static inline void account_c(struct per_cpu_info *pci, int rw,
177 pci->cwrite_kb += bytes >> 10;
180 pci->cread_kb += bytes >> 10;
184 static inline void account_i(struct per_cpu_info *pci, int rw,
189 pci->iwrite_kb += bytes >> 10;
192 pci->iread_kb += bytes >> 10;
196 static void output(struct per_cpu_info *pci, char *s)
201 fprintf(pci->ofp, "%s", s);
204 static char hstring[256];
205 static char tstring[256];
207 static inline char *setup_header(struct per_cpu_info *pci,
208 struct blk_io_trace *t, char act)
210 int w = t->action & BLK_TC_ACT(BLK_TC_WRITE);
211 int b = t->action & BLK_TC_ACT(BLK_TC_BARRIER);
212 int s = t->action & BLK_TC_ACT(BLK_TC_SYNC);
227 sprintf(hstring, "%c %3d %15ld %5Lu.%09Lu %5u %c %3s", backwards,
229 (unsigned long)t->sequence, SECONDS(t->time),
230 NANO_SECONDS(t->time), t->pid, act, rwbs);
235 static void log_complete(struct per_cpu_info *pci, struct blk_io_trace *t,
238 sprintf(tstring,"%s %Lu + %u [%d]\n", setup_header(pci, t, act),
239 (unsigned long long)t->sector, t->bytes >> 9, t->error);
240 output(pci, tstring);
243 static void log_queue(struct per_cpu_info *pci, struct blk_io_trace *t,
246 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
247 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
248 output(pci, tstring);
251 static void log_issue(struct per_cpu_info *pci, struct blk_io_trace *t,
254 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
255 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
256 output(pci, tstring);
259 static void log_merge(struct per_cpu_info *pci, struct blk_io_trace *t,
262 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
263 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
264 output(pci, tstring);
267 static void log_generic(struct per_cpu_info *pci, struct blk_io_trace *t,
270 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
271 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
272 output(pci, tstring);
275 static int log_pc(struct per_cpu_info *pci, struct blk_io_trace *t, char act)
280 sprintf(tstring,"%s ", setup_header(pci, t, act));
281 output(pci, tstring);
283 buf = (unsigned char *) t + sizeof(*t);
284 for (i = 0; i < t->pdu_len; i++) {
285 sprintf(tstring,"%02x ", buf[i]);
286 output(pci, tstring);
290 sprintf(tstring,"[%d]\n", t->error);
291 output(pci, tstring);
293 sprintf(tstring,"[%s]\n", t->comm);
294 output(pci, tstring);
299 static int dump_trace_pc(struct blk_io_trace *t, struct per_cpu_info *pci)
303 switch (t->action & 0xffff) {
305 log_generic(pci, t, 'Q');
308 log_generic(pci, t, 'G');
310 case __BLK_TA_SLEEPRQ:
311 log_generic(pci, t, 'S');
313 case __BLK_TA_REQUEUE:
314 log_generic(pci, t, 'R');
317 ret = log_pc(pci, t, 'D');
319 case __BLK_TA_COMPLETE:
323 fprintf(stderr, "Bad pc action %x\n", t->action);
331 static void dump_trace_fs(struct blk_io_trace *t, struct per_cpu_info *pci)
333 int w = t->action & BLK_TC_ACT(BLK_TC_WRITE);
335 switch (t->action & 0xffff) {
337 account_q(pci, w, t->bytes);
338 log_queue(pci, t, 'Q');
340 case __BLK_TA_BACKMERGE:
341 account_m(pci, w, t->bytes);
342 log_merge(pci, t, 'M');
344 case __BLK_TA_FRONTMERGE:
345 account_m(pci, w, t->bytes);
346 log_merge(pci, t, 'F');
349 log_generic(pci, t, 'G');
351 case __BLK_TA_SLEEPRQ:
352 log_generic(pci, t, 'S');
354 case __BLK_TA_REQUEUE:
355 account_c(pci, w, -t->bytes);
356 log_queue(pci, t, 'R');
359 account_i(pci, w, t->bytes);
360 log_issue(pci, t, 'D');
362 case __BLK_TA_COMPLETE:
363 account_c(pci, w, t->bytes);
364 log_complete(pci, t, 'C');
367 fprintf(stderr, "Bad fs action %x\n", t->action);
372 static int dump_trace(struct blk_io_trace *t, struct per_cpu_info *pci)
376 if (output_name && !pci->ofp) {
377 snprintf(pci->ofname, sizeof(pci->ofname) - 1,
378 "%s_log.%d", output_name, pci->cpu);
380 pci->ofp = fopen(pci->ofname, "w");
381 if (pci->ofp == NULL) {
387 if (t->action & BLK_TC_ACT(BLK_TC_PC))
388 ret = dump_trace_pc(t, pci);
390 dump_trace_fs(t, pci);
396 static void dump_pci_stats(struct per_cpu_info *pci)
398 printf(" Reads Queued: %'8lu, %'8LuKiB\t", pci->qreads, pci->qread_kb);
399 printf(" Writes Queued: %'8lu, %'8LuKiB\n", pci->qwrites,pci->qwrite_kb);
401 printf(" Read Dispatches: %'8lu, %'8LuKiB\t", pci->ireads, pci->iread_kb);
402 printf(" Write Dispatches: %'8lu, %'8LuKiB\n", pci->iwrites,pci->iwrite_kb);
403 printf(" Reads Completed: %'8lu, %'8LuKiB\t", pci->creads, pci->cread_kb);
404 printf(" Writes Completed: %'8lu, %'8LuKiB\n", pci->cwrites,pci->cwrite_kb);
405 printf(" Read Merges: %'8lu%8c\t", pci->mreads, ' ');
407 printf(" Write Merges: %'8lu\n", pci->mwrites);
410 static void show_stats(void)
412 struct per_cpu_info foo, *pci;
413 int i, pci_events = 0;
415 memset(&foo, 0, sizeof(foo));
417 for (i = 0; i < max_cpus; i++) {
418 pci = &per_cpu_info[i];
423 foo.qreads += pci->qreads;
424 foo.qwrites += pci->qwrites;
425 foo.creads += pci->creads;
426 foo.cwrites += pci->cwrites;
427 foo.mreads += pci->mreads;
428 foo.mwrites += pci->mwrites;
429 foo.qread_kb += pci->qread_kb;
430 foo.qwrite_kb += pci->qwrite_kb;
431 foo.cread_kb += pci->cread_kb;
432 foo.cwrite_kb += pci->cwrite_kb;
434 printf("CPU%d:\n", i);
439 if (pci_events > 1) {
441 dump_pci_stats(&foo);
444 printf("Events: %'Lu\n", events);
447 static inline int trace_rb_insert(struct trace *t)
449 struct rb_node **p = &rb_root.rb_node;
450 struct rb_node *parent = NULL;
455 __t = rb_entry(parent, struct trace, rb_node);
457 if (t->bit->sequence < __t->bit->sequence)
459 else if (t->bit->sequence > __t->bit->sequence)
462 fprintf(stderr, "sequence alias!\n");
467 rb_link_node(&t->rb_node, parent, p);
468 rb_insert_color(&t->rb_node, &rb_root);
472 static int sort_entries(void *traces, unsigned long offset, int nr)
474 struct per_cpu_info *pci;
475 struct blk_io_trace *bit;
477 void *start = traces;
480 while (traces - start <= offset - sizeof(*bit)) {
486 t = malloc(sizeof(*t));
488 memset(&t->rb_node, 0, sizeof(t->rb_node));
492 if (verify_trace(bit))
495 pci = get_cpu_info(bit->cpu);
498 if (trace_rb_insert(t))
501 traces += sizeof(*bit) + bit->pdu_len;
509 static void free_entries_rb(void)
513 while ((n = rb_first(&rb_root)) != NULL) {
514 struct trace *t = rb_entry(n, struct trace, rb_node);
516 rb_erase(&t->rb_node, &rb_root);
521 static void show_entries_rb(void)
523 struct blk_io_trace *bit;
528 n = rb_first(&rb_root);
533 t = rb_entry(n, struct trace, rb_node);
537 if (cpu > max_cpus) {
538 fprintf(stderr, "CPU number too large (%d)\n", cpu);
542 if (genesis_time == 0)
543 genesis_time = bit->time;
544 bit->time -= genesis_time;
548 if (dump_trace(bit, &per_cpu_info[cpu]))
551 } while ((n = rb_next(n)) != NULL);
554 static int read_data(int fd, void *buffer, int bytes, int block)
556 int ret, bytes_left, fl;
559 fl = fcntl(fd, F_GETFL);
562 fcntl(fd, F_SETFL, fl | O_NONBLOCK);
564 fcntl(fd, F_SETFL, fl & ~O_NONBLOCK);
568 while (bytes_left > 0) {
569 ret = read(fd, p, bytes_left);
585 static int do_file(void)
589 for (i = 0, nfiles = 0;; i++, nfiles++) {
590 struct per_cpu_info *pci;
594 pci = get_cpu_info(i);
597 snprintf(pci->fname, sizeof(pci->fname)-1,"%s_out.%d", dev, i);
598 if (stat(pci->fname, &st) < 0)
603 printf("Processing %s\n", pci->fname);
605 tb = malloc(st.st_size);
607 pci->fd = open(pci->fname, O_RDONLY);
613 if (read_data(pci->fd, tb, st.st_size, 1))
616 if (sort_entries(tb, st.st_size, ~0U) == -1)
620 printf("\t%2d %10s %15d\n", i, pci->fname, pci->nelems);
625 fprintf(stderr, "No files found\n");
633 static void resize_buffer(void **buffer, long *old_size)
635 long cur_size = *old_size;
639 ptr = malloc(*old_size);
640 memcpy(ptr, *buffer, cur_size);
645 static int read_sort_events(int fd, void **buffer)
647 long offset, max_offset;
650 max_offset = 128 * sizeof(struct blk_io_trace);
651 *buffer = malloc(max_offset);
656 struct blk_io_trace *t;
659 if (max_offset - offset < sizeof(*t))
660 resize_buffer(buffer, &max_offset);
662 if (read_data(fd, *buffer + offset, sizeof(*t), !events)) {
670 t = *buffer + offset;
671 offset += sizeof(*t);
673 pdu_len = be16_to_cpu(t->pdu_len);
675 if (max_offset - offset < pdu_len)
676 resize_buffer(buffer, &max_offset);
678 if (read_data(fd, *buffer + offset, pdu_len, 1))
683 } while (!is_done() && events < rb_batch);
688 static int do_stdin(void)
693 fd = dup(STDIN_FILENO);
697 events = read_sort_events(fd, &ptr);
701 sort_entries(ptr, ~0UL, events);
711 static void flush_output(void)
715 for (i = 0; i < max_cpus; i++) {
716 struct per_cpu_info *pci = &per_cpu_info[i];
726 static void handle_sigint(int sig)
732 static void usage(char *prog)
734 fprintf(stderr, "Usage: %s -i <name> [-o <output>]\n", prog);
737 int main(int argc, char *argv[])
741 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) != -1) {
744 dev = strdup(optarg);
747 output_name = strdup(optarg);
750 rb_batch = atoi(optarg);
752 rb_batch = RB_BATCH_DEFAULT;
765 memset(&rb_root, 0, sizeof(rb_root));
767 signal(SIGINT, handle_sigint);
768 signal(SIGHUP, handle_sigint);
769 signal(SIGTERM, handle_sigint);
771 if (!strcmp(dev, "-"))