2 * block queue tracing parse application
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 #include <sys/types.h>
36 #define SECONDS(x) ((unsigned long long)(x) / 1000000000)
37 #define NANO_SECONDS(x) ((unsigned long long)(x) % 1000000000)
40 static unsigned long long genesis_time, last_reported_time;
43 unsigned long qreads, qwrites, creads, cwrites, mreads, mwrites;
44 unsigned long ireads, iwrites;
45 unsigned long long qread_kb, qwrite_kb, cread_kb, cwrite_kb;
46 unsigned long long iread_kb, iwrite_kb;
56 struct io_stats io_stats;
59 struct per_process_info {
62 struct io_stats io_stats;
63 struct per_process_info *hash_next, *list_next;
66 #define PPI_HASH_SHIFT (8)
67 static struct per_process_info *ppi_hash[1 << PPI_HASH_SHIFT];
68 static struct per_process_info *ppi_list;
70 #define S_OPTS "i:o:b:s"
71 static struct option l_opts[] = {
91 .name = "per program stats",
104 static struct rb_root rb_root;
107 struct blk_io_trace *bit;
108 struct rb_node rb_node;
112 static struct per_cpu_info *per_cpu_info;
114 static unsigned long long events;
116 static char *dev, *output_name;
119 static int per_process_stats;
121 #define RB_BATCH_DEFAULT (1024)
122 static int rb_batch = RB_BATCH_DEFAULT;
124 #define is_done() (*(volatile int *)(&done))
125 static volatile int done;
127 static inline unsigned long hash_long(unsigned long val)
131 #elif __WORDSIZE == 64
132 val *= 0x9e37fffffffc0001UL;
134 #error unknown word size
137 return val >> (__WORDSIZE - PPI_HASH_SHIFT);
140 static inline void add_process_to_hash(struct per_process_info *ppi)
142 const int hash_idx = hash_long(ppi->pid);
144 ppi->hash_next = ppi_hash[hash_idx];
145 ppi_hash[hash_idx] = ppi;
148 static inline void add_process_to_list(struct per_process_info *ppi)
150 ppi->list_next = ppi_list;
154 static struct per_process_info *find_process_by_pid(__u32 pid)
156 const int hash_idx = hash_long(pid);
157 struct per_process_info *ppi;
159 ppi = ppi_hash[hash_idx];
164 ppi = ppi->hash_next;
170 static struct io_stats *find_process_io_stats(__u32 pid, char *name)
172 struct per_process_info *ppi = find_process_by_pid(pid);
175 ppi = malloc(sizeof(*ppi));
176 memset(ppi, 0, sizeof(*ppi));
177 strncpy(ppi->name, name, sizeof(ppi->name));
179 add_process_to_hash(ppi);
180 add_process_to_list(ppi);
183 return &ppi->io_stats;
186 static void resize_cpu_info(int cpuid)
188 int new_space, new_max = cpuid + 1;
191 per_cpu_info = realloc(per_cpu_info, new_max * sizeof(*per_cpu_info));
193 fprintf(stderr, "Cannot allocate CPU info -- %d x %d bytes\n",
194 new_max, (int) sizeof(*per_cpu_info));
198 new_start = (char *)per_cpu_info + (max_cpus * sizeof(*per_cpu_info));
199 new_space = (new_max - max_cpus) * sizeof(*per_cpu_info);
200 memset(new_start, 0, new_space);
204 static struct per_cpu_info *get_cpu_info(int cpu)
206 struct per_cpu_info *pci;
209 resize_cpu_info(cpu);
212 * ->cpu might already be set, but just set it unconditionally
214 pci = &per_cpu_info[cpu];
220 static inline void check_time(struct blk_io_trace *bit)
222 unsigned long long this = bit->time;
223 unsigned long long last = last_reported_time;
225 backwards = (this < last) ? 'B' : ' ';
226 last_reported_time = this;
229 static inline void __account_m(struct io_stats *ios, struct blk_io_trace *t,
234 ios->qwrite_kb += t->bytes >> 10;
237 ios->qread_kb += t->bytes >> 10;
241 static inline void account_m(struct blk_io_trace *t, struct per_cpu_info *pci,
244 __account_m(&pci->io_stats, t, rw);
246 if (per_process_stats) {
247 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
249 __account_m(ios, t, rw);
253 static inline void __account_q(struct io_stats *ios, struct blk_io_trace *t,
258 ios->qwrite_kb += t->bytes >> 10;
261 ios->qread_kb += t->bytes >> 10;
265 static inline void account_q(struct blk_io_trace *t, struct per_cpu_info *pci,
268 __account_q(&pci->io_stats, t, rw);
270 if (per_process_stats) {
271 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
273 __account_q(ios, t, rw);
277 static inline void __account_c(struct io_stats *ios, int rw, unsigned int bytes)
281 ios->cwrite_kb += bytes >> 10;
284 ios->cread_kb += bytes >> 10;
288 static inline void account_c(struct blk_io_trace *t, struct per_cpu_info *pci,
291 __account_c(&pci->io_stats, rw, bytes);
293 if (per_process_stats) {
294 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
296 __account_c(ios, rw, bytes);
300 static inline void __account_i(struct io_stats *ios, int rw, unsigned int bytes)
304 ios->iwrite_kb += bytes >> 10;
307 ios->iread_kb += bytes >> 10;
311 static inline void account_i(struct blk_io_trace *t, struct per_cpu_info *pci,
314 __account_i(&pci->io_stats, rw, t->bytes);
316 if (per_process_stats) {
317 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
319 __account_i(ios, rw, t->bytes);
323 static void output(struct per_cpu_info *pci, char *s)
325 fprintf(ofp, "%s", s);
328 static char hstring[256];
329 static char tstring[256];
331 static inline char *setup_header(struct per_cpu_info *pci,
332 struct blk_io_trace *t, char act)
334 int w = t->action & BLK_TC_ACT(BLK_TC_WRITE);
335 int b = t->action & BLK_TC_ACT(BLK_TC_BARRIER);
336 int s = t->action & BLK_TC_ACT(BLK_TC_SYNC);
351 sprintf(hstring, "%c %3d %15ld %5Lu.%09Lu %5u %c %3s", backwards,
353 (unsigned long)t->sequence, SECONDS(t->time),
354 NANO_SECONDS(t->time), t->pid, act, rwbs);
359 static void log_complete(struct per_cpu_info *pci, struct blk_io_trace *t,
362 sprintf(tstring,"%s %Lu + %u [%d]\n", setup_header(pci, t, act),
363 (unsigned long long)t->sector, t->bytes >> 9, t->error);
364 output(pci, tstring);
367 static void log_queue(struct per_cpu_info *pci, struct blk_io_trace *t,
370 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
371 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
372 output(pci, tstring);
375 static void log_issue(struct per_cpu_info *pci, struct blk_io_trace *t,
378 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
379 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
380 output(pci, tstring);
383 static void log_merge(struct per_cpu_info *pci, struct blk_io_trace *t,
386 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
387 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
388 output(pci, tstring);
391 static void log_generic(struct per_cpu_info *pci, struct blk_io_trace *t,
394 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
395 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
396 output(pci, tstring);
399 static int log_pc(struct per_cpu_info *pci, struct blk_io_trace *t, char act)
404 sprintf(tstring,"%s ", setup_header(pci, t, act));
405 output(pci, tstring);
407 buf = (unsigned char *) t + sizeof(*t);
408 for (i = 0; i < t->pdu_len; i++) {
409 sprintf(tstring,"%02x ", buf[i]);
410 output(pci, tstring);
414 sprintf(tstring,"[%d]\n", t->error);
415 output(pci, tstring);
417 sprintf(tstring,"[%s]\n", t->comm);
418 output(pci, tstring);
423 static int dump_trace_pc(struct blk_io_trace *t, struct per_cpu_info *pci)
427 switch (t->action & 0xffff) {
429 log_generic(pci, t, 'Q');
432 log_generic(pci, t, 'G');
434 case __BLK_TA_SLEEPRQ:
435 log_generic(pci, t, 'S');
437 case __BLK_TA_REQUEUE:
438 log_generic(pci, t, 'R');
441 ret = log_pc(pci, t, 'D');
443 case __BLK_TA_COMPLETE:
447 fprintf(stderr, "Bad pc action %x\n", t->action);
455 static void dump_trace_fs(struct blk_io_trace *t, struct per_cpu_info *pci)
457 int w = t->action & BLK_TC_ACT(BLK_TC_WRITE);
459 switch (t->action & 0xffff) {
461 account_q(t, pci, w);
462 log_queue(pci, t, 'Q');
464 case __BLK_TA_BACKMERGE:
465 account_m(t, pci, w);
466 log_merge(pci, t, 'M');
468 case __BLK_TA_FRONTMERGE:
469 account_m(t, pci, w);
470 log_merge(pci, t, 'F');
473 log_generic(pci, t, 'G');
475 case __BLK_TA_SLEEPRQ:
476 log_generic(pci, t, 'S');
478 case __BLK_TA_REQUEUE:
479 account_c(t, pci, w, -t->bytes);
480 log_queue(pci, t, 'R');
483 account_i(t, pci, w);
484 log_issue(pci, t, 'D');
486 case __BLK_TA_COMPLETE:
487 account_c(t, pci, w, t->bytes);
488 log_complete(pci, t, 'C');
491 fprintf(stderr, "Bad fs action %x\n", t->action);
496 static int dump_trace(struct blk_io_trace *t, struct per_cpu_info *pci)
500 if (t->action & BLK_TC_ACT(BLK_TC_PC))
501 ret = dump_trace_pc(t, pci);
503 dump_trace_fs(t, pci);
509 static void dump_io_stats(struct io_stats *ios, char *msg)
511 fprintf(ofp, "%s\n", msg);
513 fprintf(ofp, " Reads Queued: %'8lu, %'8LuKiB\t", ios->qreads, ios->qread_kb);
514 fprintf(ofp, " Writes Queued: %'8lu, %'8LuKiB\n", ios->qwrites,ios->qwrite_kb);
516 fprintf(ofp, " Read Dispatches: %'8lu, %'8LuKiB\t", ios->ireads, ios->iread_kb);
517 fprintf(ofp, " Write Dispatches: %'8lu, %'8LuKiB\n", ios->iwrites,ios->iwrite_kb);
518 fprintf(ofp, " Reads Completed: %'8lu, %'8LuKiB\t", ios->creads, ios->cread_kb);
519 fprintf(ofp, " Writes Completed: %'8lu, %'8LuKiB\n", ios->cwrites,ios->cwrite_kb);
520 fprintf(ofp, " Read Merges: %'8lu%8c\t", ios->mreads, ' ');
522 fprintf(ofp, " Write Merges: %'8lu\n", ios->mwrites);
525 static void show_process_stats(void)
527 struct per_process_info *ppi;
531 dump_io_stats(&ppi->io_stats, ppi->name);
532 ppi = ppi->list_next;
538 static void show_cpu_stats(void)
540 struct per_cpu_info foo, *pci;
541 struct io_stats *ios;
542 int i, pci_events = 0;
544 memset(&foo, 0, sizeof(foo));
546 for (i = 0; i < max_cpus; i++) {
549 pci = &per_cpu_info[i];
550 ios = &pci->io_stats;
555 foo.io_stats.qreads += ios->qreads;
556 foo.io_stats.qwrites += ios->qwrites;
557 foo.io_stats.creads += ios->creads;
558 foo.io_stats.cwrites += ios->cwrites;
559 foo.io_stats.mreads += ios->mreads;
560 foo.io_stats.mwrites += ios->mwrites;
561 foo.io_stats.ireads += ios->ireads;
562 foo.io_stats.iwrites += ios->iwrites;
563 foo.io_stats.qread_kb += ios->qread_kb;
564 foo.io_stats.qwrite_kb += ios->qwrite_kb;
565 foo.io_stats.cread_kb += ios->cread_kb;
566 foo.io_stats.cwrite_kb += ios->cwrite_kb;
567 foo.io_stats.iread_kb += ios->iread_kb;
568 foo.io_stats.iwrite_kb += ios->iwrite_kb;
570 snprintf(cpu, sizeof(cpu) - 1, "CPU%d:", i);
571 dump_io_stats(ios, cpu);
575 if (pci_events > 1) {
577 dump_io_stats(&foo.io_stats, "Total:");
580 fprintf(ofp, "\nEvents: %'Lu\n", events);
583 static inline int trace_rb_insert(struct trace *t)
585 struct rb_node **p = &rb_root.rb_node;
586 struct rb_node *parent = NULL;
591 __t = rb_entry(parent, struct trace, rb_node);
593 if (t->bit->sequence < __t->bit->sequence)
595 else if (t->bit->sequence > __t->bit->sequence)
598 fprintf(stderr, "sequence alias!\n");
603 rb_link_node(&t->rb_node, parent, p);
604 rb_insert_color(&t->rb_node, &rb_root);
608 #define min(a, b) ((a) < (b) ? (a) : (b))
610 static struct blk_io_trace *find_trace(void *p, unsigned long offset, int nr)
612 unsigned long max_offset = min(offset,nr * sizeof(struct blk_io_trace));
614 struct blk_io_trace *bit;
617 for (off = 0; off < max_offset; off++) {
620 magic = be32_to_cpu(bit->magic);
621 if ((magic & 0xffffff00) == BLK_IO_TRACE_MAGIC)
628 static int sort_entries(void *traces, unsigned long offset, int nr)
630 struct per_cpu_info *pci;
631 struct blk_io_trace *bit;
633 void *start = traces;
636 while (traces - start <= offset - sizeof(*bit)) {
640 bit = find_trace(traces, offset - (traces - start), nr);
644 t = malloc(sizeof(*t));
646 memset(&t->rb_node, 0, sizeof(t->rb_node));
650 if (verify_trace(bit))
653 pci = get_cpu_info(bit->cpu);
656 if (trace_rb_insert(t))
659 traces += sizeof(*bit) + bit->pdu_len;
667 static void free_entries_rb(void)
671 while ((n = rb_first(&rb_root)) != NULL) {
672 struct trace *t = rb_entry(n, struct trace, rb_node);
674 rb_erase(&t->rb_node, &rb_root);
679 static void show_entries_rb(void)
681 struct blk_io_trace *bit;
686 n = rb_first(&rb_root);
691 t = rb_entry(n, struct trace, rb_node);
695 if (cpu > max_cpus) {
696 fprintf(stderr, "CPU number too large (%d)\n", cpu);
700 if (genesis_time == 0)
701 genesis_time = bit->time;
702 bit->time -= genesis_time;
706 if (dump_trace(bit, &per_cpu_info[cpu]))
709 } while ((n = rb_next(n)) != NULL);
712 static int read_data(int fd, void *buffer, int bytes, int block)
714 int ret, bytes_left, fl;
717 fl = fcntl(fd, F_GETFL);
720 fcntl(fd, F_SETFL, fl | O_NONBLOCK);
722 fcntl(fd, F_SETFL, fl & ~O_NONBLOCK);
726 while (bytes_left > 0) {
727 ret = read(fd, p, bytes_left);
743 static int do_file(void)
747 for (i = 0, nfiles = 0;; i++, nfiles++) {
748 struct per_cpu_info *pci;
752 pci = get_cpu_info(i);
754 snprintf(pci->fname, sizeof(pci->fname)-1,"%s_out.%d", dev, i);
755 if (stat(pci->fname, &st) < 0)
760 printf("Processing %s\n", pci->fname);
762 tb = malloc(st.st_size);
764 pci->fd = open(pci->fname, O_RDONLY);
770 if (read_data(pci->fd, tb, st.st_size, 1))
773 if (sort_entries(tb, st.st_size, ~0U) == -1)
777 printf("\t%2d %10s %15d\n", i, pci->fname, pci->nelems);
782 fprintf(stderr, "No files found\n");
790 static void resize_buffer(void **buffer, long *size, long offset)
792 long old_size = *size;
795 *buffer = realloc(*buffer, *size);
796 memset(*buffer + offset, 0, *size - old_size);
799 static int read_sort_events(int fd, void **buffer)
801 long offset, max_offset;
804 max_offset = 128 * sizeof(struct blk_io_trace);
805 *buffer = malloc(max_offset);
810 struct blk_io_trace *t;
813 if (max_offset - offset < sizeof(*t))
814 resize_buffer(buffer, &max_offset, offset);
816 if (read_data(fd, *buffer + offset, sizeof(*t), !events)) {
824 t = *buffer + offset;
825 offset += sizeof(*t);
827 pdu_len = be16_to_cpu(t->pdu_len);
829 if (max_offset - offset < pdu_len)
830 resize_buffer(buffer, &max_offset, offset);
832 if (read_data(fd, *buffer + offset, pdu_len, 1))
839 } while (!is_done() && events < rb_batch);
844 static int do_stdin(void)
849 fd = dup(STDIN_FILENO);
853 events = read_sort_events(fd, &ptr);
857 if (sort_entries(ptr, ~0UL, events) == -1)
869 static void flush_output(void)
874 static void handle_sigint(int sig)
880 static void usage(char *prog)
882 fprintf(stderr, "Usage: %s -i <name> [-o <output>][-s]\n", prog);
885 int main(int argc, char *argv[])
890 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) != -1) {
896 output_name = optarg;
899 rb_batch = atoi(optarg);
901 rb_batch = RB_BATCH_DEFAULT;
904 per_process_stats = 1;
917 memset(&rb_root, 0, sizeof(rb_root));
919 signal(SIGINT, handle_sigint);
920 signal(SIGHUP, handle_sigint);
921 signal(SIGTERM, handle_sigint);
923 setlocale(LC_NUMERIC, "en_US");
926 ofp = fdopen(STDOUT_FILENO, "w");
930 snprintf(ofname, sizeof(ofname) - 1, "%s.log", output_name);
931 ofp = fopen(ofname, "w");
939 ofp_buffer = malloc(4096);
940 if (setvbuf(ofp, ofp_buffer, _IOFBF, 4096)) {
945 if (!strcmp(dev, "-"))
950 if (per_process_stats)
951 show_process_stats();