2 * block queue tracing parse application
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 #include <sys/types.h>
36 #define SECONDS(x) ((unsigned long long)(x) / 1000000000)
37 #define NANO_SECONDS(x) ((unsigned long long)(x) % 1000000000)
40 static unsigned long long genesis_time, last_reported_time;
43 unsigned long qreads, qwrites, creads, cwrites, mreads, mwrites;
44 unsigned long ireads, iwrites;
45 unsigned long long qread_kb, qwrite_kb, cread_kb, cwrite_kb;
46 unsigned long long iread_kb, iwrite_kb;
56 struct io_stats io_stats;
59 struct per_process_info {
62 struct io_stats io_stats;
63 struct per_process_info *hash_next, *list_next;
66 #define PPI_HASH_SHIFT (8)
67 static struct per_process_info *ppi_hash[1 << PPI_HASH_SHIFT];
68 static struct per_process_info *ppi_list;
70 #define S_OPTS "i:o:b:s"
71 static struct option l_opts[] = {
91 .name = "per program stats",
104 static struct rb_root rb_root;
107 struct blk_io_trace *bit;
108 struct rb_node rb_node;
112 static struct per_cpu_info *per_cpu_info;
114 static unsigned long long events;
116 static char *dev, *output_name;
119 static int per_process_stats;
121 #define RB_BATCH_DEFAULT (1024)
122 static int rb_batch = RB_BATCH_DEFAULT;
124 #define is_done() (*(volatile int *)(&done))
125 static volatile int done;
127 static inline unsigned long hash_long(unsigned long val)
129 const unsigned int bpl = 8 * sizeof(long);
132 * assuming 32 or 64-bit
137 val *= 0x9e37fffffffc0001UL;
139 return val >> (bpl - PPI_HASH_SHIFT);
142 static inline void add_process_to_hash(struct per_process_info *ppi)
144 const int hash_idx = hash_long(ppi->pid);
146 ppi->hash_next = ppi_hash[hash_idx];
147 ppi_hash[hash_idx] = ppi;
150 static inline void add_process_to_list(struct per_process_info *ppi)
152 ppi->list_next = ppi_list;
156 static struct per_process_info *find_process_by_pid(__u32 pid)
158 const int hash_idx = hash_long(pid);
159 struct per_process_info *ppi;
161 ppi = ppi_hash[hash_idx];
166 ppi = ppi->hash_next;
172 static struct io_stats *find_process_io_stats(__u32 pid, char *name)
174 struct per_process_info *ppi = find_process_by_pid(pid);
177 ppi = malloc(sizeof(*ppi));
178 memset(ppi, 0, sizeof(*ppi));
179 strncpy(ppi->name, name, sizeof(ppi->name));
181 add_process_to_hash(ppi);
182 add_process_to_list(ppi);
185 return &ppi->io_stats;
188 static void resize_cpu_info(int cpuid)
190 int new_space, new_max = cpuid + 1;
193 per_cpu_info = realloc(per_cpu_info, new_max * sizeof(*per_cpu_info));
195 fprintf(stderr, "Cannot allocate CPU info -- %d x %d bytes\n",
196 new_max, (int) sizeof(*per_cpu_info));
200 new_start = (char *)per_cpu_info + (max_cpus * sizeof(*per_cpu_info));
201 new_space = (new_max - max_cpus) * sizeof(*per_cpu_info);
202 memset(new_start, 0, new_space);
206 static struct per_cpu_info *get_cpu_info(int cpu)
208 struct per_cpu_info *pci;
211 resize_cpu_info(cpu);
214 * ->cpu might already be set, but just set it unconditionally
216 pci = &per_cpu_info[cpu];
222 static inline void check_time(struct blk_io_trace *bit)
224 unsigned long long this = bit->time;
225 unsigned long long last = last_reported_time;
227 backwards = (this < last) ? 'B' : ' ';
228 last_reported_time = this;
231 static inline void __account_m(struct io_stats *ios, struct blk_io_trace *t,
236 ios->qwrite_kb += t->bytes >> 10;
239 ios->qread_kb += t->bytes >> 10;
243 static inline void account_m(struct blk_io_trace *t, struct per_cpu_info *pci,
246 __account_m(&pci->io_stats, t, rw);
248 if (per_process_stats) {
249 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
251 __account_m(ios, t, rw);
255 static inline void __account_q(struct io_stats *ios, struct blk_io_trace *t,
260 ios->qwrite_kb += t->bytes >> 10;
263 ios->qread_kb += t->bytes >> 10;
267 static inline void account_q(struct blk_io_trace *t, struct per_cpu_info *pci,
270 __account_q(&pci->io_stats, t, rw);
272 if (per_process_stats) {
273 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
275 __account_q(ios, t, rw);
279 static inline void __account_c(struct io_stats *ios, int rw, unsigned int bytes)
283 ios->cwrite_kb += bytes >> 10;
286 ios->cread_kb += bytes >> 10;
290 static inline void account_c(struct blk_io_trace *t, struct per_cpu_info *pci,
293 __account_c(&pci->io_stats, rw, bytes);
295 if (per_process_stats) {
296 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
298 __account_c(ios, rw, bytes);
302 static inline void __account_i(struct io_stats *ios, int rw, unsigned int bytes)
306 ios->iwrite_kb += bytes >> 10;
309 ios->iread_kb += bytes >> 10;
313 static inline void account_i(struct blk_io_trace *t, struct per_cpu_info *pci,
316 __account_i(&pci->io_stats, rw, t->bytes);
318 if (per_process_stats) {
319 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
321 __account_i(ios, rw, t->bytes);
325 static void output(struct per_cpu_info *pci, char *s)
327 fprintf(ofp, "%s", s);
330 static char hstring[256];
331 static char tstring[256];
333 static inline char *setup_header(struct per_cpu_info *pci,
334 struct blk_io_trace *t, char act)
336 int w = t->action & BLK_TC_ACT(BLK_TC_WRITE);
337 int b = t->action & BLK_TC_ACT(BLK_TC_BARRIER);
338 int s = t->action & BLK_TC_ACT(BLK_TC_SYNC);
353 sprintf(hstring, "%c %3d %15ld %5Lu.%09Lu %5u %c %3s", backwards,
355 (unsigned long)t->sequence, SECONDS(t->time),
356 NANO_SECONDS(t->time), t->pid, act, rwbs);
361 static void log_complete(struct per_cpu_info *pci, struct blk_io_trace *t,
364 sprintf(tstring,"%s %Lu + %u [%d]\n", setup_header(pci, t, act),
365 (unsigned long long)t->sector, t->bytes >> 9, t->error);
366 output(pci, tstring);
369 static void log_queue(struct per_cpu_info *pci, struct blk_io_trace *t,
372 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
373 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
374 output(pci, tstring);
377 static void log_issue(struct per_cpu_info *pci, struct blk_io_trace *t,
380 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
381 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
382 output(pci, tstring);
385 static void log_merge(struct per_cpu_info *pci, struct blk_io_trace *t,
388 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
389 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
390 output(pci, tstring);
393 static void log_generic(struct per_cpu_info *pci, struct blk_io_trace *t,
396 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
397 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
398 output(pci, tstring);
401 static int log_pc(struct per_cpu_info *pci, struct blk_io_trace *t, char act)
406 sprintf(tstring,"%s ", setup_header(pci, t, act));
407 output(pci, tstring);
409 buf = (unsigned char *) t + sizeof(*t);
410 for (i = 0; i < t->pdu_len; i++) {
411 sprintf(tstring,"%02x ", buf[i]);
412 output(pci, tstring);
416 sprintf(tstring,"[%d]\n", t->error);
417 output(pci, tstring);
419 sprintf(tstring,"[%s]\n", t->comm);
420 output(pci, tstring);
425 static int dump_trace_pc(struct blk_io_trace *t, struct per_cpu_info *pci)
429 switch (t->action & 0xffff) {
431 log_generic(pci, t, 'Q');
434 log_generic(pci, t, 'G');
436 case __BLK_TA_SLEEPRQ:
437 log_generic(pci, t, 'S');
439 case __BLK_TA_REQUEUE:
440 log_generic(pci, t, 'R');
443 ret = log_pc(pci, t, 'D');
445 case __BLK_TA_COMPLETE:
449 fprintf(stderr, "Bad pc action %x\n", t->action);
457 static void dump_trace_fs(struct blk_io_trace *t, struct per_cpu_info *pci)
459 int w = t->action & BLK_TC_ACT(BLK_TC_WRITE);
461 switch (t->action & 0xffff) {
463 account_q(t, pci, w);
464 log_queue(pci, t, 'Q');
466 case __BLK_TA_BACKMERGE:
467 account_m(t, pci, w);
468 log_merge(pci, t, 'M');
470 case __BLK_TA_FRONTMERGE:
471 account_m(t, pci, w);
472 log_merge(pci, t, 'F');
475 log_generic(pci, t, 'G');
477 case __BLK_TA_SLEEPRQ:
478 log_generic(pci, t, 'S');
480 case __BLK_TA_REQUEUE:
481 account_c(t, pci, w, -t->bytes);
482 log_queue(pci, t, 'R');
485 account_i(t, pci, w);
486 log_issue(pci, t, 'D');
488 case __BLK_TA_COMPLETE:
489 account_c(t, pci, w, t->bytes);
490 log_complete(pci, t, 'C');
493 fprintf(stderr, "Bad fs action %x\n", t->action);
498 static int dump_trace(struct blk_io_trace *t, struct per_cpu_info *pci)
502 if (t->action & BLK_TC_ACT(BLK_TC_PC))
503 ret = dump_trace_pc(t, pci);
505 dump_trace_fs(t, pci);
511 static void dump_io_stats(struct io_stats *ios, char *msg)
513 fprintf(ofp, "%s\n", msg);
515 fprintf(ofp, " Reads Queued: %'8lu, %'8LuKiB\t", ios->qreads, ios->qread_kb);
516 fprintf(ofp, " Writes Queued: %'8lu, %'8LuKiB\n", ios->qwrites,ios->qwrite_kb);
518 fprintf(ofp, " Read Dispatches: %'8lu, %'8LuKiB\t", ios->ireads, ios->iread_kb);
519 fprintf(ofp, " Write Dispatches: %'8lu, %'8LuKiB\n", ios->iwrites,ios->iwrite_kb);
520 fprintf(ofp, " Reads Completed: %'8lu, %'8LuKiB\t", ios->creads, ios->cread_kb);
521 fprintf(ofp, " Writes Completed: %'8lu, %'8LuKiB\n", ios->cwrites,ios->cwrite_kb);
522 fprintf(ofp, " Read Merges: %'8lu%8c\t", ios->mreads, ' ');
524 fprintf(ofp, " Write Merges: %'8lu\n", ios->mwrites);
527 static void show_process_stats(void)
529 struct per_process_info *ppi;
533 dump_io_stats(&ppi->io_stats, ppi->name);
534 ppi = ppi->list_next;
540 static void show_cpu_stats(void)
542 struct per_cpu_info foo, *pci;
543 struct io_stats *ios;
544 int i, pci_events = 0;
546 memset(&foo, 0, sizeof(foo));
548 for (i = 0; i < max_cpus; i++) {
551 pci = &per_cpu_info[i];
552 ios = &pci->io_stats;
557 foo.io_stats.qreads += ios->qreads;
558 foo.io_stats.qwrites += ios->qwrites;
559 foo.io_stats.creads += ios->creads;
560 foo.io_stats.cwrites += ios->cwrites;
561 foo.io_stats.mreads += ios->mreads;
562 foo.io_stats.mwrites += ios->mwrites;
563 foo.io_stats.qread_kb += ios->qread_kb;
564 foo.io_stats.qwrite_kb += ios->qwrite_kb;
565 foo.io_stats.cread_kb += ios->cread_kb;
566 foo.io_stats.cwrite_kb += ios->cwrite_kb;
568 snprintf(cpu, sizeof(cpu) - 1, "CPU%d:", i);
569 dump_io_stats(ios, cpu);
573 if (pci_events > 1) {
575 dump_io_stats(&foo.io_stats, "Total:");
578 fprintf(ofp, "\nEvents: %'Lu\n", events);
581 static inline int trace_rb_insert(struct trace *t)
583 struct rb_node **p = &rb_root.rb_node;
584 struct rb_node *parent = NULL;
589 __t = rb_entry(parent, struct trace, rb_node);
591 if (t->bit->sequence < __t->bit->sequence)
593 else if (t->bit->sequence > __t->bit->sequence)
596 fprintf(stderr, "sequence alias!\n");
601 rb_link_node(&t->rb_node, parent, p);
602 rb_insert_color(&t->rb_node, &rb_root);
606 #define min(a, b) ((a) < (b) ? (a) : (b))
608 static struct blk_io_trace *find_trace(void *p, unsigned long offset, int nr)
610 unsigned long max_offset = min(offset,nr * sizeof(struct blk_io_trace));
612 struct blk_io_trace *bit;
615 for (off = 0; off < max_offset; off++) {
618 magic = be32_to_cpu(bit->magic);
619 if ((magic & 0xffffff00) == BLK_IO_TRACE_MAGIC)
626 static int sort_entries(void *traces, unsigned long offset, int nr)
628 struct per_cpu_info *pci;
629 struct blk_io_trace *bit;
631 void *start = traces;
634 while (traces - start <= offset - sizeof(*bit)) {
638 bit = find_trace(traces, offset - (traces - start), nr);
642 t = malloc(sizeof(*t));
644 memset(&t->rb_node, 0, sizeof(t->rb_node));
648 if (verify_trace(bit))
651 pci = get_cpu_info(bit->cpu);
654 if (trace_rb_insert(t))
657 traces += sizeof(*bit) + bit->pdu_len;
665 static void free_entries_rb(void)
669 while ((n = rb_first(&rb_root)) != NULL) {
670 struct trace *t = rb_entry(n, struct trace, rb_node);
672 rb_erase(&t->rb_node, &rb_root);
677 static void show_entries_rb(void)
679 struct blk_io_trace *bit;
684 n = rb_first(&rb_root);
689 t = rb_entry(n, struct trace, rb_node);
693 if (cpu > max_cpus) {
694 fprintf(stderr, "CPU number too large (%d)\n", cpu);
698 if (genesis_time == 0)
699 genesis_time = bit->time;
700 bit->time -= genesis_time;
704 if (dump_trace(bit, &per_cpu_info[cpu]))
707 } while ((n = rb_next(n)) != NULL);
710 static int read_data(int fd, void *buffer, int bytes, int block)
712 int ret, bytes_left, fl;
715 fl = fcntl(fd, F_GETFL);
718 fcntl(fd, F_SETFL, fl | O_NONBLOCK);
720 fcntl(fd, F_SETFL, fl & ~O_NONBLOCK);
724 while (bytes_left > 0) {
725 ret = read(fd, p, bytes_left);
741 static int do_file(void)
745 for (i = 0, nfiles = 0;; i++, nfiles++) {
746 struct per_cpu_info *pci;
750 pci = get_cpu_info(i);
752 snprintf(pci->fname, sizeof(pci->fname)-1,"%s_out.%d", dev, i);
753 if (stat(pci->fname, &st) < 0)
758 printf("Processing %s\n", pci->fname);
760 tb = malloc(st.st_size);
762 pci->fd = open(pci->fname, O_RDONLY);
768 if (read_data(pci->fd, tb, st.st_size, 1))
771 if (sort_entries(tb, st.st_size, ~0U) == -1)
775 printf("\t%2d %10s %15d\n", i, pci->fname, pci->nelems);
780 fprintf(stderr, "No files found\n");
788 static void resize_buffer(void **buffer, long *size, long offset)
790 long old_size = *size;
793 *buffer = realloc(*buffer, *size);
794 memset(*buffer + offset, 0, *size - old_size);
797 static int read_sort_events(int fd, void **buffer)
799 long offset, max_offset;
802 max_offset = 128 * sizeof(struct blk_io_trace);
803 *buffer = malloc(max_offset);
808 struct blk_io_trace *t;
811 if (max_offset - offset < sizeof(*t))
812 resize_buffer(buffer, &max_offset, offset);
814 if (read_data(fd, *buffer + offset, sizeof(*t), !events)) {
822 t = *buffer + offset;
823 offset += sizeof(*t);
825 pdu_len = be16_to_cpu(t->pdu_len);
827 if (max_offset - offset < pdu_len)
828 resize_buffer(buffer, &max_offset, offset);
830 if (read_data(fd, *buffer + offset, pdu_len, 1))
837 } while (!is_done() && events < rb_batch);
842 static int do_stdin(void)
847 fd = dup(STDIN_FILENO);
851 events = read_sort_events(fd, &ptr);
855 if (sort_entries(ptr, ~0UL, events) == -1)
867 static void flush_output(void)
872 static void handle_sigint(int sig)
878 static void usage(char *prog)
880 fprintf(stderr, "Usage: %s -i <name> [-o <output>][-s]\n", prog);
883 int main(int argc, char *argv[])
888 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) != -1) {
891 dev = strdup(optarg);
894 output_name = strdup(optarg);
897 rb_batch = atoi(optarg);
899 rb_batch = RB_BATCH_DEFAULT;
902 per_process_stats = 1;
915 memset(&rb_root, 0, sizeof(rb_root));
917 signal(SIGINT, handle_sigint);
918 signal(SIGHUP, handle_sigint);
919 signal(SIGTERM, handle_sigint);
921 setlocale(LC_NUMERIC, "en_US");
924 ofp = fdopen(STDOUT_FILENO, "w");
928 snprintf(ofname, sizeof(ofname) - 1, "%s.log", output_name);
929 ofp = fopen(ofname, "w");
937 ofp_buffer = malloc(4096);
938 if (setvbuf(ofp, ofp_buffer, _IOFBF, 4096)) {
943 if (!strcmp(dev, "-"))
948 if (per_process_stats)
949 show_process_stats();