[PATCH] Pretty up output on large SMP machines
[blktrace.git] / blkparse.c
1 /*
2  * block queue tracing parse application
3  *
4  * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, write to the Free Software
18  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  *
20  */
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 #include <unistd.h>
24 #include <stdio.h>
25 #include <fcntl.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <getopt.h>
29 #include <errno.h>
30 #include <signal.h>
31
32 #include "blktrace.h"
33 #include "rbtree.h"
34
35 #define SECONDS(x)      ((unsigned long long)(x) / 1000000000)
36 #define NANO_SECONDS(x) ((unsigned long long)(x) % 1000000000)
37
38 static int backwards;
39 static unsigned long long genesis_time, last_reported_time;
40
41 struct per_cpu_info {
42         int cpu;
43         int nelems;
44
45         int fd;
46         char fname[128];
47
48         FILE *ofp;
49         char ofname[128];
50
51         unsigned long qreads, qwrites, creads, cwrites, mreads, mwrites;
52         unsigned long ireads, iwrites;
53         unsigned long long qread_kb, qwrite_kb, cread_kb, cwrite_kb;
54         unsigned long long iread_kb, iwrite_kb;
55 };
56
57 #define S_OPTS  "i:o:"
58 static struct option l_opts[] = {
59         {
60                 .name = "input",
61                 .has_arg = 1,
62                 .flag = NULL,
63                 .val = 'i'
64         },
65         {
66                 .name = "output",
67                 .has_arg = 1,
68                 .flag = NULL,
69                 .val = 'o'
70         },
71         {
72                 .name = NULL,
73                 .has_arg = 0,
74                 .flag = NULL,
75                 .val = 0
76         }
77 };
78
79 static struct rb_root rb_root;
80
81 struct trace {
82         struct blk_io_trace *bit;
83         struct rb_node rb_node;
84 };
85
86 static int max_cpus;
87 static struct per_cpu_info *per_cpu_info;
88
89 static unsigned long long events;
90
91 static char *dev, *output_name;
92
93 #define is_done()       (*(volatile int *)(&done))
94 static volatile int done;
95
96 static void resize_cpu_info(int cpuid)
97 {
98         int new_space, new_max = cpuid + 1;
99         char *new_start;
100
101         per_cpu_info = realloc(per_cpu_info, new_max * sizeof(*per_cpu_info));
102         if (!per_cpu_info) {
103                 fprintf(stderr, "Cannot allocate CPU info -- %d x %d bytes\n",
104                         new_max, (int) sizeof(*per_cpu_info));
105                 exit(1);
106         }
107
108         new_start = (char *)per_cpu_info + (max_cpus * sizeof(*per_cpu_info));
109         new_space = (new_max - max_cpus) * sizeof(*per_cpu_info);
110         memset(new_start, 0, new_space);
111         max_cpus = new_max;
112 }
113
114 static struct per_cpu_info *get_cpu_info(int cpu)
115 {
116         struct per_cpu_info *pci;
117
118         if (cpu >= max_cpus)
119                 resize_cpu_info(cpu);
120
121         /*
122          * ->cpu might already be set, but just set it unconditionally
123          */
124         pci = &per_cpu_info[cpu];
125         pci->cpu = cpu;
126
127         return pci;
128 }
129
130 static inline void check_time(struct blk_io_trace *bit)
131 {
132         unsigned long long this = bit->time;
133         unsigned long long last = last_reported_time;
134
135         backwards = (this < last) ? 'B' : ' ';
136         last_reported_time = this;
137 }
138
139 static inline void account_m(struct per_cpu_info *pci, int rw,
140                              unsigned int bytes)
141 {
142         if (rw) {
143                 pci->mwrites++;
144                 pci->qwrite_kb += bytes >> 10;
145         } else {
146                 pci->mreads++;
147                 pci->qread_kb += bytes >> 10;
148         }
149 }
150
151 static inline void account_q(struct per_cpu_info *pci, int rw,
152                              unsigned int bytes)
153 {
154         if (rw) {
155                 pci->qwrites++;
156                 pci->qwrite_kb += bytes >> 10;
157         } else {
158                 pci->qreads++;
159                 pci->qread_kb += bytes >> 10;
160         }
161 }
162
163 static inline void account_c(struct per_cpu_info *pci, int rw,
164                              unsigned int bytes)
165 {
166         if (rw) {
167                 pci->cwrites++;
168                 pci->cwrite_kb += bytes >> 10;
169         } else {
170                 pci->creads++;
171                 pci->cread_kb += bytes >> 10;
172         }
173 }
174
175 static inline void account_i(struct per_cpu_info *pci, int rw,
176                              unsigned int bytes)
177 {
178         if (rw) {
179                 pci->iwrites++;
180                 pci->iwrite_kb += bytes >> 10;
181         } else {
182                 pci->ireads++;
183                 pci->iread_kb += bytes >> 10;
184         }
185 }
186
187 static void output(struct per_cpu_info *pci, char *s)
188 {
189         printf("%s", s);
190
191         if (pci->ofp)
192                 fprintf(pci->ofp, "%s", s);
193 }
194
195 static char hstring[256];
196 static char tstring[256];
197
198 static inline char *setup_header(struct per_cpu_info *pci,
199                                  struct blk_io_trace *t, char act)
200 {
201         int w = t->action & BLK_TC_ACT(BLK_TC_WRITE);
202         int b = t->action & BLK_TC_ACT(BLK_TC_BARRIER);
203         int s = t->action & BLK_TC_ACT(BLK_TC_SYNC);
204         char rwbs[4];
205         int i = 0;
206
207         if (w)
208                 rwbs[i++] = 'W';
209         else
210                 rwbs[i++] = 'R';
211         if (b)
212                 rwbs[i++] = 'B';
213         if (s)
214                 rwbs[i++] = 'S';
215
216         rwbs[i] = '\0';
217
218         sprintf(hstring, "%c %3d %15ld %5Lu.%09Lu %5u %c %3s", backwards,
219                 pci->cpu,
220                 (unsigned long)t->sequence, SECONDS(t->time), 
221                 NANO_SECONDS(t->time), t->pid, act, rwbs);
222
223         return hstring;
224 }
225
226 static void log_complete(struct per_cpu_info *pci, struct blk_io_trace *t,
227                          char act)
228 {
229         sprintf(tstring,"%s %Lu + %u [%d]\n", setup_header(pci, t, act),
230                 (unsigned long long)t->sector, t->bytes >> 9, t->error);
231         output(pci, tstring);
232 }
233
234 static void log_queue(struct per_cpu_info *pci, struct blk_io_trace *t,
235                       char act)
236 {
237         sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
238                 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
239         output(pci, tstring);
240 }
241
242 static void log_issue(struct per_cpu_info *pci, struct blk_io_trace *t,
243                       char act)
244 {
245         sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
246                 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
247         output(pci, tstring);
248 }
249
250 static void log_merge(struct per_cpu_info *pci, struct blk_io_trace *t,
251                       char act)
252 {
253         sprintf(tstring,"%s   %Lu + %u [%s]\n", setup_header(pci, t, act),
254                 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
255         output(pci, tstring);
256 }
257
258 static void log_generic(struct per_cpu_info *pci, struct blk_io_trace *t,
259                         char act)
260 {
261         sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
262                 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
263         output(pci, tstring);
264 }
265
266 static int log_pc(struct per_cpu_info *pci, struct blk_io_trace *t, char act)
267 {
268         unsigned char *buf;
269         int i;
270
271         sprintf(tstring,"%s ", setup_header(pci, t, act));
272         output(pci, tstring);
273
274         buf = (unsigned char *) t + sizeof(*t);
275         for (i = 0; i < t->pdu_len; i++) {
276                 sprintf(tstring,"%02x ", buf[i]);
277                 output(pci, tstring);
278         }
279
280         if (act == 'C') {
281                 sprintf(tstring,"[%d]\n", t->error);
282                 output(pci, tstring);
283         } else {
284                 sprintf(tstring,"[%s]\n", t->comm);
285                 output(pci, tstring);
286         }
287         return 0;
288 }
289
290 static int dump_trace_pc(struct blk_io_trace *t, struct per_cpu_info *pci)
291 {
292         int ret = 0;
293
294         switch (t->action & 0xffff) {
295                 case __BLK_TA_QUEUE:
296                         log_generic(pci, t, 'Q');
297                         break;
298                 case __BLK_TA_GETRQ:
299                         log_generic(pci, t, 'G');
300                         break;
301                 case __BLK_TA_SLEEPRQ:
302                         log_generic(pci, t, 'S');
303                         break;
304                 case __BLK_TA_REQUEUE:
305                         log_generic(pci, t, 'R');
306                         break;
307                 case __BLK_TA_ISSUE:
308                         ret = log_pc(pci, t, 'D');
309                         break;
310                 case __BLK_TA_COMPLETE:
311                         log_pc(pci, t, 'C');
312                         break;
313                 default:
314                         fprintf(stderr, "Bad pc action %x\n", t->action);
315                         ret = 1;
316                         break;
317         }
318         
319         return ret;
320 }
321
322 static void dump_trace_fs(struct blk_io_trace *t, struct per_cpu_info *pci)
323 {
324         int w = t->action & BLK_TC_ACT(BLK_TC_WRITE);
325
326         switch (t->action & 0xffff) {
327                 case __BLK_TA_QUEUE:
328                         account_q(pci, w, t->bytes);
329                         log_queue(pci, t, 'Q');
330                         break;
331                 case __BLK_TA_BACKMERGE:
332                         account_m(pci, w, t->bytes);
333                         log_merge(pci, t, 'M');
334                         break;
335                 case __BLK_TA_FRONTMERGE:
336                         account_m(pci, w, t->bytes);
337                         log_merge(pci, t, 'F');
338                         break;
339                 case __BLK_TA_GETRQ:
340                         log_generic(pci, t, 'G');
341                         break;
342                 case __BLK_TA_SLEEPRQ:
343                         log_generic(pci, t, 'S');
344                         break;
345                 case __BLK_TA_REQUEUE:
346                         account_c(pci, w, -t->bytes);
347                         log_queue(pci, t, 'R');
348                         break;
349                 case __BLK_TA_ISSUE:
350                         account_i(pci, w, t->bytes);
351                         log_issue(pci, t, 'D');
352                         break;
353                 case __BLK_TA_COMPLETE:
354                         account_c(pci, w, t->bytes);
355                         log_complete(pci, t, 'C');
356                         break;
357                 default:
358                         fprintf(stderr, "Bad fs action %x\n", t->action);
359                         break;
360         }
361 }
362
363 static int dump_trace(struct blk_io_trace *t, struct per_cpu_info *pci)
364 {
365         int ret = 0;
366
367         if (output_name && !pci->ofp) {
368                 snprintf(pci->ofname, sizeof(pci->ofname) - 1,
369                                 "%s_log.%d", output_name, pci->cpu);
370
371                 pci->ofp = fopen(pci->ofname, "w");
372                 if (pci->ofp == NULL) {
373                         perror(pci->ofname);
374                         return 1;
375                 }
376         }
377
378         if (t->action & BLK_TC_ACT(BLK_TC_PC))
379                 ret = dump_trace_pc(t, pci);
380         else
381                 dump_trace_fs(t, pci);
382
383         events++;
384         return ret;
385 }
386
387 static void dump_pci_stats(struct per_cpu_info *pci)
388 {
389         printf(" Reads Queued:    %'8lu, %'8LuKiB\t", pci->qreads, pci->qread_kb);
390         printf(" Writes Queued:    %'8lu, %'8LuKiB\n", pci->qwrites,pci->qwrite_kb);
391
392         printf(" Read Dispatches: %'8lu, %'8LuKiB\t", pci->ireads, pci->iread_kb);
393         printf(" Write Dispatches: %'8lu, %'8LuKiB\n", pci->iwrites,pci->iwrite_kb);
394         printf(" Reads Completed: %'8lu, %'8LuKiB\t", pci->creads, pci->cread_kb);
395         printf(" Writes Completed: %'8lu, %'8LuKiB\n", pci->cwrites,pci->cwrite_kb);
396         printf(" Read Merges:     %'8lu%8c\t", pci->mreads, ' ');
397
398         printf(" Write Merges:     %'8lu\n", pci->mwrites);
399 }
400
401 static void show_stats(void)
402 {
403         struct per_cpu_info foo, *pci;
404         int i, pci_events = 0;
405
406         memset(&foo, 0, sizeof(foo));
407
408         for (i = 0; i < max_cpus; i++) {
409                 pci = &per_cpu_info[i];
410
411                 if (!pci->nelems)
412                         continue;
413
414                 foo.qreads += pci->qreads;
415                 foo.qwrites += pci->qwrites;
416                 foo.creads += pci->creads;
417                 foo.cwrites += pci->cwrites;
418                 foo.mreads += pci->mreads;
419                 foo.mwrites += pci->mwrites;
420                 foo.qread_kb += pci->qread_kb;
421                 foo.qwrite_kb += pci->qwrite_kb;
422                 foo.cread_kb += pci->cread_kb;
423                 foo.cwrite_kb += pci->cwrite_kb;
424
425                 printf("CPU%d:\n", i);
426                 dump_pci_stats(pci);
427                 pci_events++;
428         }
429
430         if (pci_events > 1) {
431                 printf("Total:\n");
432                 dump_pci_stats(&foo);
433         }
434
435         printf("Events: %'Lu\n", events);
436 }
437
438 static inline int trace_rb_insert(struct trace *t)
439 {
440         struct rb_node **p = &rb_root.rb_node;
441         struct rb_node *parent = NULL;
442         struct trace *__t;
443
444         while (*p) {
445                 parent = *p;
446                 __t = rb_entry(parent, struct trace, rb_node);
447
448                 if (t->bit->sequence < __t->bit->sequence)
449                         p = &(*p)->rb_left;
450                 else if (t->bit->sequence > __t->bit->sequence)
451                         p = &(*p)->rb_right;
452                 else {
453                         fprintf(stderr, "sequence alias!\n");
454                         return 1;
455                 }
456         }
457
458         rb_link_node(&t->rb_node, parent, p);
459         rb_insert_color(&t->rb_node, &rb_root);
460         return 0;
461 }
462
463 static int sort_entries(void *traces, unsigned long offset, int nr)
464 {
465         struct per_cpu_info *pci;
466         struct blk_io_trace *bit;
467         struct trace *t;
468         void *start = traces;
469         int nelems = 0;
470
471         while (traces - start <= offset - sizeof(*bit)) {
472                 if (!nr)
473                         break;
474
475                 bit = traces;
476
477                 t = malloc(sizeof(*t));
478                 t->bit = bit;
479                 memset(&t->rb_node, 0, sizeof(t->rb_node));
480
481                 trace_to_cpu(bit);
482
483                 if (verify_trace(bit))
484                         break;
485
486                 pci = get_cpu_info(bit->cpu);
487                 pci->nelems++;
488
489                 if (trace_rb_insert(t))
490                         return -1;
491
492                 traces += sizeof(*bit) + bit->pdu_len;
493                 nelems++;
494                 nr--;
495         }
496
497         return nelems;
498 }
499
500 static void free_entries_rb(void)
501 {
502         struct rb_node *n;
503
504         while ((n = rb_first(&rb_root)) != NULL) {
505                 struct trace *t = rb_entry(n, struct trace, rb_node);
506
507                 rb_erase(&t->rb_node, &rb_root);
508                 free(t);
509         }
510 }
511
512 static void show_entries_rb(void)
513 {
514         struct blk_io_trace *bit;
515         struct rb_node *n;
516         struct trace *t;
517         int cpu;
518
519         n = rb_first(&rb_root);
520         if (!n)
521                 return;
522
523         do {
524                 t = rb_entry(n, struct trace, rb_node);
525                 bit = t->bit;
526
527                 cpu = bit->cpu;
528                 if (cpu > max_cpus) {
529                         fprintf(stderr, "CPU number too large (%d)\n", cpu);
530                         break;
531                 }
532
533                 if (genesis_time == 0)
534                         genesis_time = bit->time;
535                 bit->time -= genesis_time;
536
537                 check_time(bit);
538
539                 if (dump_trace(bit, &per_cpu_info[cpu]))
540                         break;
541
542         } while ((n = rb_next(n)) != NULL);
543 }
544
545 static int read_data(int fd, void *buffer, int bytes, int block)
546 {
547         int ret, bytes_left, fl;
548         void *p;
549
550         fl = fcntl(fd, F_GETFL);
551
552         if (!block)
553                 fcntl(fd, F_SETFL, fl | O_NONBLOCK);
554         else
555                 fcntl(fd, F_SETFL, fl & ~O_NONBLOCK);
556
557         bytes_left = bytes;
558         p = buffer;
559         while (bytes_left > 0) {
560                 ret = read(fd, p, bytes_left);
561                 if (!ret)
562                         return 1;
563                 else if (ret < 0) {
564                         if (errno != EAGAIN)
565                                 perror("read");
566                         return -1;
567                 } else {
568                         p += ret;
569                         bytes_left -= ret;
570                 }
571         }
572
573         return 0;
574 }
575
576 static int do_file(void)
577 {
578         int i, nfiles;
579
580         for (i = 0, nfiles = 0;; i++, nfiles++) {
581                 struct per_cpu_info *pci;
582                 struct stat st;
583                 void *tb;
584
585                 pci = get_cpu_info(i);
586                 pci->ofp = NULL;
587
588                 snprintf(pci->fname, sizeof(pci->fname)-1,"%s_out.%d", dev, i);
589                 if (stat(pci->fname, &st) < 0)
590                         break;
591                 if (!st.st_size)
592                         continue;
593
594                 printf("Processing %s\n", pci->fname);
595
596                 tb = malloc(st.st_size);
597
598                 pci->fd = open(pci->fname, O_RDONLY);
599                 if (pci->fd < 0) {
600                         perror(pci->fname);
601                         break;
602                 }
603
604                 if (read_data(pci->fd, tb, st.st_size, 1))
605                         break;
606
607                 if (sort_entries(tb, st.st_size, ~0U) == -1)
608                         break;
609
610                 close(pci->fd);
611                 printf("\t%2d %10s %15d\n", i, pci->fname, pci->nelems);
612
613         }
614
615         if (!nfiles) {
616                 fprintf(stderr, "No files found\n");
617                 return 1;
618         }
619
620         show_entries_rb();
621         return 0;
622 }
623
624 static void resize_buffer(void **buffer, long *old_size)
625 {
626         long cur_size = *old_size;
627         void *ptr;
628
629         *old_size *= 2;
630         ptr = malloc(*old_size);
631         memcpy(ptr, *buffer, cur_size);
632         free(*buffer);
633         *buffer = ptr;
634 }
635
636 static int read_sort_events(int fd, void **buffer)
637 {
638         long offset, max_offset;
639         int events;
640
641         max_offset = 128 * sizeof(struct blk_io_trace);
642         *buffer = malloc(max_offset);
643         events = 0;
644         offset = 0;
645
646         do {
647                 struct blk_io_trace *t;
648                 int pdu_len;
649
650                 if (max_offset - offset < sizeof(*t))
651                         resize_buffer(buffer, &max_offset);
652
653                 if (read_data(fd, *buffer + offset, sizeof(*t), !events)) {
654                         if (events)
655                                 break;
656
657                         usleep(1000);
658                         continue;
659                 }
660
661                 t = *buffer + offset;
662                 offset += sizeof(*t);
663
664                 pdu_len = be16_to_cpu(t->pdu_len);
665
666                 if (max_offset - offset < pdu_len)
667                         resize_buffer(buffer, &max_offset);
668
669                 if (read_data(fd, *buffer + offset, pdu_len, 1))
670                         break;
671
672                 offset += pdu_len;
673                 events++;
674         } while (!is_done());
675
676         return events;
677 }
678
679 static int do_stdin(void)
680 {
681         int fd;
682         void *ptr;
683
684         fd = dup(STDIN_FILENO);
685         do {
686                 int events;
687
688                 events = read_sort_events(fd, &ptr);
689                 if (!events)
690                         break;
691         
692                 sort_entries(ptr, ~0UL, events);
693                 show_entries_rb();
694                 free_entries_rb();
695         } while (1);
696
697         close(fd);
698         free(ptr);
699         return 0;
700 }
701
702 static void flush_output(void)
703 {
704         int i;
705
706         for (i = 0; i < max_cpus; i++) {
707                 struct per_cpu_info *pci = &per_cpu_info[i];
708
709                 if (pci->ofp) {
710                         fflush(pci->ofp);
711                         fclose(pci->ofp);
712                         pci->ofp = NULL;
713                 }
714         }
715 }
716
717 static void handle_sigint(int sig)
718 {
719         done = 1;
720         flush_output();
721 }
722
723 static void usage(char *prog)
724 {
725         fprintf(stderr, "Usage: %s -i <name> [-o <output>]\n", prog);
726 }
727
728 int main(int argc, char *argv[])
729 {
730         int c, ret;
731
732         while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) != -1) {
733                 switch (c) {
734                 case 'i':
735                         dev = strdup(optarg);
736                         break;
737                 case 'o':
738                         output_name = strdup(optarg);
739                         break;
740                 default:
741                         usage(argv[0]);
742                         return 1;
743                 }
744         }
745
746         if (!dev) {
747                 usage(argv[0]);
748                 return 1;
749         }
750
751         memset(&rb_root, 0, sizeof(rb_root));
752
753         signal(SIGINT, handle_sigint);
754         signal(SIGHUP, handle_sigint);
755         signal(SIGTERM, handle_sigint);
756
757         if (!strcmp(dev, "-"))
758                 ret = do_stdin();
759         else
760                 ret = do_file();
761
762         show_stats();
763         flush_output();
764         return ret;
765 }