[PATCH] Add option to limit rb batching for live traces
[blktrace.git] / blkparse.c
1 /*
2  * block queue tracing parse application
3  *
4  * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, write to the Free Software
18  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  *
20  */
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 #include <unistd.h>
24 #include <stdio.h>
25 #include <fcntl.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <getopt.h>
29 #include <errno.h>
30 #include <signal.h>
31
32 #include "blktrace.h"
33 #include "rbtree.h"
34
35 #define SECONDS(x)      ((unsigned long long)(x) / 1000000000)
36 #define NANO_SECONDS(x) ((unsigned long long)(x) % 1000000000)
37
38 static int backwards;
39 static unsigned long long genesis_time, last_reported_time;
40
41 struct per_cpu_info {
42         int cpu;
43         int nelems;
44
45         int fd;
46         char fname[128];
47
48         FILE *ofp;
49         char ofname[128];
50
51         unsigned long qreads, qwrites, creads, cwrites, mreads, mwrites;
52         unsigned long ireads, iwrites;
53         unsigned long long qread_kb, qwrite_kb, cread_kb, cwrite_kb;
54         unsigned long long iread_kb, iwrite_kb;
55 };
56
57 #define S_OPTS  "i:o:b:"
58 static struct option l_opts[] = {
59         {
60                 .name = "input",
61                 .has_arg = 1,
62                 .flag = NULL,
63                 .val = 'i'
64         },
65         {
66                 .name = "output",
67                 .has_arg = 1,
68                 .flag = NULL,
69                 .val = 'o'
70         },
71         {
72                 .name = "batch",
73                 .has_arg = 1,
74                 .flag = NULL,
75                 .val = 'b'
76         },
77         {
78                 .name = NULL,
79                 .has_arg = 0,
80                 .flag = NULL,
81                 .val = 0
82         }
83 };
84
85 static struct rb_root rb_root;
86
87 struct trace {
88         struct blk_io_trace *bit;
89         struct rb_node rb_node;
90 };
91
92 static int max_cpus;
93 static struct per_cpu_info *per_cpu_info;
94
95 static unsigned long long events;
96
97 static char *dev, *output_name;
98
99 #define RB_BATCH_DEFAULT        (1024)
100 static int rb_batch = RB_BATCH_DEFAULT;
101
102 #define is_done()       (*(volatile int *)(&done))
103 static volatile int done;
104
105 static void resize_cpu_info(int cpuid)
106 {
107         int new_space, new_max = cpuid + 1;
108         char *new_start;
109
110         per_cpu_info = realloc(per_cpu_info, new_max * sizeof(*per_cpu_info));
111         if (!per_cpu_info) {
112                 fprintf(stderr, "Cannot allocate CPU info -- %d x %d bytes\n",
113                         new_max, (int) sizeof(*per_cpu_info));
114                 exit(1);
115         }
116
117         new_start = (char *)per_cpu_info + (max_cpus * sizeof(*per_cpu_info));
118         new_space = (new_max - max_cpus) * sizeof(*per_cpu_info);
119         memset(new_start, 0, new_space);
120         max_cpus = new_max;
121 }
122
123 static struct per_cpu_info *get_cpu_info(int cpu)
124 {
125         struct per_cpu_info *pci;
126
127         if (cpu >= max_cpus)
128                 resize_cpu_info(cpu);
129
130         /*
131          * ->cpu might already be set, but just set it unconditionally
132          */
133         pci = &per_cpu_info[cpu];
134         pci->cpu = cpu;
135
136         return pci;
137 }
138
139 static inline void check_time(struct blk_io_trace *bit)
140 {
141         unsigned long long this = bit->time;
142         unsigned long long last = last_reported_time;
143
144         backwards = (this < last) ? 'B' : ' ';
145         last_reported_time = this;
146 }
147
148 static inline void account_m(struct per_cpu_info *pci, int rw,
149                              unsigned int bytes)
150 {
151         if (rw) {
152                 pci->mwrites++;
153                 pci->qwrite_kb += bytes >> 10;
154         } else {
155                 pci->mreads++;
156                 pci->qread_kb += bytes >> 10;
157         }
158 }
159
160 static inline void account_q(struct per_cpu_info *pci, int rw,
161                              unsigned int bytes)
162 {
163         if (rw) {
164                 pci->qwrites++;
165                 pci->qwrite_kb += bytes >> 10;
166         } else {
167                 pci->qreads++;
168                 pci->qread_kb += bytes >> 10;
169         }
170 }
171
172 static inline void account_c(struct per_cpu_info *pci, int rw,
173                              unsigned int bytes)
174 {
175         if (rw) {
176                 pci->cwrites++;
177                 pci->cwrite_kb += bytes >> 10;
178         } else {
179                 pci->creads++;
180                 pci->cread_kb += bytes >> 10;
181         }
182 }
183
184 static inline void account_i(struct per_cpu_info *pci, int rw,
185                              unsigned int bytes)
186 {
187         if (rw) {
188                 pci->iwrites++;
189                 pci->iwrite_kb += bytes >> 10;
190         } else {
191                 pci->ireads++;
192                 pci->iread_kb += bytes >> 10;
193         }
194 }
195
196 static void output(struct per_cpu_info *pci, char *s)
197 {
198         printf("%s", s);
199
200         if (pci->ofp)
201                 fprintf(pci->ofp, "%s", s);
202 }
203
204 static char hstring[256];
205 static char tstring[256];
206
207 static inline char *setup_header(struct per_cpu_info *pci,
208                                  struct blk_io_trace *t, char act)
209 {
210         int w = t->action & BLK_TC_ACT(BLK_TC_WRITE);
211         int b = t->action & BLK_TC_ACT(BLK_TC_BARRIER);
212         int s = t->action & BLK_TC_ACT(BLK_TC_SYNC);
213         char rwbs[4];
214         int i = 0;
215
216         if (w)
217                 rwbs[i++] = 'W';
218         else
219                 rwbs[i++] = 'R';
220         if (b)
221                 rwbs[i++] = 'B';
222         if (s)
223                 rwbs[i++] = 'S';
224
225         rwbs[i] = '\0';
226
227         sprintf(hstring, "%c %3d %15ld %5Lu.%09Lu %5u %c %3s", backwards,
228                 pci->cpu,
229                 (unsigned long)t->sequence, SECONDS(t->time), 
230                 NANO_SECONDS(t->time), t->pid, act, rwbs);
231
232         return hstring;
233 }
234
235 static void log_complete(struct per_cpu_info *pci, struct blk_io_trace *t,
236                          char act)
237 {
238         sprintf(tstring,"%s %Lu + %u [%d]\n", setup_header(pci, t, act),
239                 (unsigned long long)t->sector, t->bytes >> 9, t->error);
240         output(pci, tstring);
241 }
242
243 static void log_queue(struct per_cpu_info *pci, struct blk_io_trace *t,
244                       char act)
245 {
246         sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
247                 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
248         output(pci, tstring);
249 }
250
251 static void log_issue(struct per_cpu_info *pci, struct blk_io_trace *t,
252                       char act)
253 {
254         sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
255                 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
256         output(pci, tstring);
257 }
258
259 static void log_merge(struct per_cpu_info *pci, struct blk_io_trace *t,
260                       char act)
261 {
262         sprintf(tstring,"%s   %Lu + %u [%s]\n", setup_header(pci, t, act),
263                 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
264         output(pci, tstring);
265 }
266
267 static void log_generic(struct per_cpu_info *pci, struct blk_io_trace *t,
268                         char act)
269 {
270         sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
271                 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
272         output(pci, tstring);
273 }
274
275 static int log_pc(struct per_cpu_info *pci, struct blk_io_trace *t, char act)
276 {
277         unsigned char *buf;
278         int i;
279
280         sprintf(tstring,"%s ", setup_header(pci, t, act));
281         output(pci, tstring);
282
283         buf = (unsigned char *) t + sizeof(*t);
284         for (i = 0; i < t->pdu_len; i++) {
285                 sprintf(tstring,"%02x ", buf[i]);
286                 output(pci, tstring);
287         }
288
289         if (act == 'C') {
290                 sprintf(tstring,"[%d]\n", t->error);
291                 output(pci, tstring);
292         } else {
293                 sprintf(tstring,"[%s]\n", t->comm);
294                 output(pci, tstring);
295         }
296         return 0;
297 }
298
299 static int dump_trace_pc(struct blk_io_trace *t, struct per_cpu_info *pci)
300 {
301         int ret = 0;
302
303         switch (t->action & 0xffff) {
304                 case __BLK_TA_QUEUE:
305                         log_generic(pci, t, 'Q');
306                         break;
307                 case __BLK_TA_GETRQ:
308                         log_generic(pci, t, 'G');
309                         break;
310                 case __BLK_TA_SLEEPRQ:
311                         log_generic(pci, t, 'S');
312                         break;
313                 case __BLK_TA_REQUEUE:
314                         log_generic(pci, t, 'R');
315                         break;
316                 case __BLK_TA_ISSUE:
317                         ret = log_pc(pci, t, 'D');
318                         break;
319                 case __BLK_TA_COMPLETE:
320                         log_pc(pci, t, 'C');
321                         break;
322                 default:
323                         fprintf(stderr, "Bad pc action %x\n", t->action);
324                         ret = 1;
325                         break;
326         }
327         
328         return ret;
329 }
330
331 static void dump_trace_fs(struct blk_io_trace *t, struct per_cpu_info *pci)
332 {
333         int w = t->action & BLK_TC_ACT(BLK_TC_WRITE);
334
335         switch (t->action & 0xffff) {
336                 case __BLK_TA_QUEUE:
337                         account_q(pci, w, t->bytes);
338                         log_queue(pci, t, 'Q');
339                         break;
340                 case __BLK_TA_BACKMERGE:
341                         account_m(pci, w, t->bytes);
342                         log_merge(pci, t, 'M');
343                         break;
344                 case __BLK_TA_FRONTMERGE:
345                         account_m(pci, w, t->bytes);
346                         log_merge(pci, t, 'F');
347                         break;
348                 case __BLK_TA_GETRQ:
349                         log_generic(pci, t, 'G');
350                         break;
351                 case __BLK_TA_SLEEPRQ:
352                         log_generic(pci, t, 'S');
353                         break;
354                 case __BLK_TA_REQUEUE:
355                         account_c(pci, w, -t->bytes);
356                         log_queue(pci, t, 'R');
357                         break;
358                 case __BLK_TA_ISSUE:
359                         account_i(pci, w, t->bytes);
360                         log_issue(pci, t, 'D');
361                         break;
362                 case __BLK_TA_COMPLETE:
363                         account_c(pci, w, t->bytes);
364                         log_complete(pci, t, 'C');
365                         break;
366                 default:
367                         fprintf(stderr, "Bad fs action %x\n", t->action);
368                         break;
369         }
370 }
371
372 static int dump_trace(struct blk_io_trace *t, struct per_cpu_info *pci)
373 {
374         int ret = 0;
375
376         if (output_name && !pci->ofp) {
377                 snprintf(pci->ofname, sizeof(pci->ofname) - 1,
378                                 "%s_log.%d", output_name, pci->cpu);
379
380                 pci->ofp = fopen(pci->ofname, "w");
381                 if (pci->ofp == NULL) {
382                         perror(pci->ofname);
383                         return 1;
384                 }
385         }
386
387         if (t->action & BLK_TC_ACT(BLK_TC_PC))
388                 ret = dump_trace_pc(t, pci);
389         else
390                 dump_trace_fs(t, pci);
391
392         events++;
393         return ret;
394 }
395
396 static void dump_pci_stats(struct per_cpu_info *pci)
397 {
398         printf(" Reads Queued:    %'8lu, %'8LuKiB\t", pci->qreads, pci->qread_kb);
399         printf(" Writes Queued:    %'8lu, %'8LuKiB\n", pci->qwrites,pci->qwrite_kb);
400
401         printf(" Read Dispatches: %'8lu, %'8LuKiB\t", pci->ireads, pci->iread_kb);
402         printf(" Write Dispatches: %'8lu, %'8LuKiB\n", pci->iwrites,pci->iwrite_kb);
403         printf(" Reads Completed: %'8lu, %'8LuKiB\t", pci->creads, pci->cread_kb);
404         printf(" Writes Completed: %'8lu, %'8LuKiB\n", pci->cwrites,pci->cwrite_kb);
405         printf(" Read Merges:     %'8lu%8c\t", pci->mreads, ' ');
406
407         printf(" Write Merges:     %'8lu\n", pci->mwrites);
408 }
409
410 static void show_stats(void)
411 {
412         struct per_cpu_info foo, *pci;
413         int i, pci_events = 0;
414
415         memset(&foo, 0, sizeof(foo));
416
417         for (i = 0; i < max_cpus; i++) {
418                 pci = &per_cpu_info[i];
419
420                 if (!pci->nelems)
421                         continue;
422
423                 foo.qreads += pci->qreads;
424                 foo.qwrites += pci->qwrites;
425                 foo.creads += pci->creads;
426                 foo.cwrites += pci->cwrites;
427                 foo.mreads += pci->mreads;
428                 foo.mwrites += pci->mwrites;
429                 foo.qread_kb += pci->qread_kb;
430                 foo.qwrite_kb += pci->qwrite_kb;
431                 foo.cread_kb += pci->cread_kb;
432                 foo.cwrite_kb += pci->cwrite_kb;
433
434                 printf("CPU%d:\n", i);
435                 dump_pci_stats(pci);
436                 pci_events++;
437         }
438
439         if (pci_events > 1) {
440                 printf("Total:\n");
441                 dump_pci_stats(&foo);
442         }
443
444         printf("Events: %'Lu\n", events);
445 }
446
447 static inline int trace_rb_insert(struct trace *t)
448 {
449         struct rb_node **p = &rb_root.rb_node;
450         struct rb_node *parent = NULL;
451         struct trace *__t;
452
453         while (*p) {
454                 parent = *p;
455                 __t = rb_entry(parent, struct trace, rb_node);
456
457                 if (t->bit->sequence < __t->bit->sequence)
458                         p = &(*p)->rb_left;
459                 else if (t->bit->sequence > __t->bit->sequence)
460                         p = &(*p)->rb_right;
461                 else {
462                         fprintf(stderr, "sequence alias!\n");
463                         return 1;
464                 }
465         }
466
467         rb_link_node(&t->rb_node, parent, p);
468         rb_insert_color(&t->rb_node, &rb_root);
469         return 0;
470 }
471
472 static int sort_entries(void *traces, unsigned long offset, int nr)
473 {
474         struct per_cpu_info *pci;
475         struct blk_io_trace *bit;
476         struct trace *t;
477         void *start = traces;
478         int nelems = 0;
479
480         while (traces - start <= offset - sizeof(*bit)) {
481                 if (!nr)
482                         break;
483
484                 bit = traces;
485
486                 t = malloc(sizeof(*t));
487                 t->bit = bit;
488                 memset(&t->rb_node, 0, sizeof(t->rb_node));
489
490                 trace_to_cpu(bit);
491
492                 if (verify_trace(bit))
493                         break;
494
495                 pci = get_cpu_info(bit->cpu);
496                 pci->nelems++;
497
498                 if (trace_rb_insert(t))
499                         return -1;
500
501                 traces += sizeof(*bit) + bit->pdu_len;
502                 nelems++;
503                 nr--;
504         }
505
506         return nelems;
507 }
508
509 static void free_entries_rb(void)
510 {
511         struct rb_node *n;
512
513         while ((n = rb_first(&rb_root)) != NULL) {
514                 struct trace *t = rb_entry(n, struct trace, rb_node);
515
516                 rb_erase(&t->rb_node, &rb_root);
517                 free(t);
518         }
519 }
520
521 static void show_entries_rb(void)
522 {
523         struct blk_io_trace *bit;
524         struct rb_node *n;
525         struct trace *t;
526         int cpu;
527
528         n = rb_first(&rb_root);
529         if (!n)
530                 return;
531
532         do {
533                 t = rb_entry(n, struct trace, rb_node);
534                 bit = t->bit;
535
536                 cpu = bit->cpu;
537                 if (cpu > max_cpus) {
538                         fprintf(stderr, "CPU number too large (%d)\n", cpu);
539                         break;
540                 }
541
542                 if (genesis_time == 0)
543                         genesis_time = bit->time;
544                 bit->time -= genesis_time;
545
546                 check_time(bit);
547
548                 if (dump_trace(bit, &per_cpu_info[cpu]))
549                         break;
550
551         } while ((n = rb_next(n)) != NULL);
552 }
553
554 static int read_data(int fd, void *buffer, int bytes, int block)
555 {
556         int ret, bytes_left, fl;
557         void *p;
558
559         fl = fcntl(fd, F_GETFL);
560
561         if (!block)
562                 fcntl(fd, F_SETFL, fl | O_NONBLOCK);
563         else
564                 fcntl(fd, F_SETFL, fl & ~O_NONBLOCK);
565
566         bytes_left = bytes;
567         p = buffer;
568         while (bytes_left > 0) {
569                 ret = read(fd, p, bytes_left);
570                 if (!ret)
571                         return 1;
572                 else if (ret < 0) {
573                         if (errno != EAGAIN)
574                                 perror("read");
575                         return -1;
576                 } else {
577                         p += ret;
578                         bytes_left -= ret;
579                 }
580         }
581
582         return 0;
583 }
584
585 static int do_file(void)
586 {
587         int i, nfiles;
588
589         for (i = 0, nfiles = 0;; i++, nfiles++) {
590                 struct per_cpu_info *pci;
591                 struct stat st;
592                 void *tb;
593
594                 pci = get_cpu_info(i);
595                 pci->ofp = NULL;
596
597                 snprintf(pci->fname, sizeof(pci->fname)-1,"%s_out.%d", dev, i);
598                 if (stat(pci->fname, &st) < 0)
599                         break;
600                 if (!st.st_size)
601                         continue;
602
603                 printf("Processing %s\n", pci->fname);
604
605                 tb = malloc(st.st_size);
606
607                 pci->fd = open(pci->fname, O_RDONLY);
608                 if (pci->fd < 0) {
609                         perror(pci->fname);
610                         break;
611                 }
612
613                 if (read_data(pci->fd, tb, st.st_size, 1))
614                         break;
615
616                 if (sort_entries(tb, st.st_size, ~0U) == -1)
617                         break;
618
619                 close(pci->fd);
620                 printf("\t%2d %10s %15d\n", i, pci->fname, pci->nelems);
621
622         }
623
624         if (!nfiles) {
625                 fprintf(stderr, "No files found\n");
626                 return 1;
627         }
628
629         show_entries_rb();
630         return 0;
631 }
632
633 static void resize_buffer(void **buffer, long *old_size)
634 {
635         long cur_size = *old_size;
636         void *ptr;
637
638         *old_size *= 2;
639         ptr = malloc(*old_size);
640         memcpy(ptr, *buffer, cur_size);
641         free(*buffer);
642         *buffer = ptr;
643 }
644
645 static int read_sort_events(int fd, void **buffer)
646 {
647         long offset, max_offset;
648         int events;
649
650         max_offset = 128 * sizeof(struct blk_io_trace);
651         *buffer = malloc(max_offset);
652         events = 0;
653         offset = 0;
654
655         do {
656                 struct blk_io_trace *t;
657                 int pdu_len;
658
659                 if (max_offset - offset < sizeof(*t))
660                         resize_buffer(buffer, &max_offset);
661
662                 if (read_data(fd, *buffer + offset, sizeof(*t), !events)) {
663                         if (events)
664                                 break;
665
666                         usleep(1000);
667                         continue;
668                 }
669
670                 t = *buffer + offset;
671                 offset += sizeof(*t);
672
673                 pdu_len = be16_to_cpu(t->pdu_len);
674
675                 if (max_offset - offset < pdu_len)
676                         resize_buffer(buffer, &max_offset);
677
678                 if (read_data(fd, *buffer + offset, pdu_len, 1))
679                         break;
680
681                 offset += pdu_len;
682                 events++;
683         } while (!is_done() && events < rb_batch);
684
685         return events;
686 }
687
688 static int do_stdin(void)
689 {
690         int fd;
691         void *ptr;
692
693         fd = dup(STDIN_FILENO);
694         do {
695                 int events;
696
697                 events = read_sort_events(fd, &ptr);
698                 if (!events)
699                         break;
700         
701                 sort_entries(ptr, ~0UL, events);
702                 show_entries_rb();
703                 free_entries_rb();
704         } while (1);
705
706         close(fd);
707         free(ptr);
708         return 0;
709 }
710
711 static void flush_output(void)
712 {
713         int i;
714
715         for (i = 0; i < max_cpus; i++) {
716                 struct per_cpu_info *pci = &per_cpu_info[i];
717
718                 if (pci->ofp) {
719                         fflush(pci->ofp);
720                         fclose(pci->ofp);
721                         pci->ofp = NULL;
722                 }
723         }
724 }
725
726 static void handle_sigint(int sig)
727 {
728         done = 1;
729         flush_output();
730 }
731
732 static void usage(char *prog)
733 {
734         fprintf(stderr, "Usage: %s -i <name> [-o <output>]\n", prog);
735 }
736
737 int main(int argc, char *argv[])
738 {
739         int c, ret;
740
741         while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) != -1) {
742                 switch (c) {
743                 case 'i':
744                         dev = strdup(optarg);
745                         break;
746                 case 'o':
747                         output_name = strdup(optarg);
748                         break;
749                 case 'b':
750                         rb_batch = atoi(optarg);
751                         if (rb_batch <= 0)
752                                 rb_batch = RB_BATCH_DEFAULT;
753                         break;
754                 default:
755                         usage(argv[0]);
756                         return 1;
757                 }
758         }
759
760         if (!dev) {
761                 usage(argv[0]);
762                 return 1;
763         }
764
765         memset(&rb_root, 0, sizeof(rb_root));
766
767         signal(SIGINT, handle_sigint);
768         signal(SIGHUP, handle_sigint);
769         signal(SIGTERM, handle_sigint);
770
771         if (!strcmp(dev, "-"))
772                 ret = do_stdin();
773         else
774                 ret = do_file();
775
776         show_stats();
777         flush_output();
778         return ret;
779 }