Merge refs/heads/master from origin
[blktrace.git] / blkparse.c
1 /*
2  * block queue tracing parse application
3  *
4  * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, write to the Free Software
18  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  *
20  */
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 #include <unistd.h>
24 #include <stdio.h>
25 #include <fcntl.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <getopt.h>
29 #include <errno.h>
30 #include <signal.h>
31 #include <locale.h>
32
33 #include "blktrace.h"
34 #include "rbtree.h"
35
36 #define SECONDS(x)      ((unsigned long long)(x) / 1000000000)
37 #define NANO_SECONDS(x) ((unsigned long long)(x) % 1000000000)
38
39 static int backwards;
40 static unsigned long long genesis_time, last_reported_time;
41
42 struct per_cpu_info {
43         int cpu;
44         int nelems;
45
46         int fd;
47         char fname[128];
48
49         FILE *ofp;
50         char ofname[128];
51
52         unsigned long qreads, qwrites, creads, cwrites, mreads, mwrites;
53         unsigned long ireads, iwrites;
54         unsigned long long qread_kb, qwrite_kb, cread_kb, cwrite_kb;
55         unsigned long long iread_kb, iwrite_kb;
56 };
57
58 #define S_OPTS  "i:o:b:"
59 static struct option l_opts[] = {
60         {
61                 .name = "input",
62                 .has_arg = 1,
63                 .flag = NULL,
64                 .val = 'i'
65         },
66         {
67                 .name = "output",
68                 .has_arg = 1,
69                 .flag = NULL,
70                 .val = 'o'
71         },
72         {
73                 .name = "batch",
74                 .has_arg = 1,
75                 .flag = NULL,
76                 .val = 'b'
77         },
78         {
79                 .name = NULL,
80                 .has_arg = 0,
81                 .flag = NULL,
82                 .val = 0
83         }
84 };
85
86 static struct rb_root rb_root;
87
88 struct trace {
89         struct blk_io_trace *bit;
90         struct rb_node rb_node;
91 };
92
93 static int max_cpus;
94 static struct per_cpu_info *per_cpu_info;
95
96 static unsigned long long events;
97
98 static char *dev, *output_name;
99
100 #define RB_BATCH_DEFAULT        (1024)
101 static int rb_batch = RB_BATCH_DEFAULT;
102
103 #define is_done()       (*(volatile int *)(&done))
104 static volatile int done;
105
106 static void resize_cpu_info(int cpuid)
107 {
108         int new_space, new_max = cpuid + 1;
109         char *new_start;
110
111         per_cpu_info = realloc(per_cpu_info, new_max * sizeof(*per_cpu_info));
112         if (!per_cpu_info) {
113                 fprintf(stderr, "Cannot allocate CPU info -- %d x %d bytes\n",
114                         new_max, (int) sizeof(*per_cpu_info));
115                 exit(1);
116         }
117
118         new_start = (char *)per_cpu_info + (max_cpus * sizeof(*per_cpu_info));
119         new_space = (new_max - max_cpus) * sizeof(*per_cpu_info);
120         memset(new_start, 0, new_space);
121         max_cpus = new_max;
122 }
123
124 static struct per_cpu_info *get_cpu_info(int cpu)
125 {
126         struct per_cpu_info *pci;
127
128         if (cpu >= max_cpus)
129                 resize_cpu_info(cpu);
130
131         /*
132          * ->cpu might already be set, but just set it unconditionally
133          */
134         pci = &per_cpu_info[cpu];
135         pci->cpu = cpu;
136
137         return pci;
138 }
139
140 static inline void check_time(struct blk_io_trace *bit)
141 {
142         unsigned long long this = bit->time;
143         unsigned long long last = last_reported_time;
144
145         backwards = (this < last) ? 'B' : ' ';
146         last_reported_time = this;
147 }
148
149 static inline void account_m(struct per_cpu_info *pci, int rw,
150                              unsigned int bytes)
151 {
152         if (rw) {
153                 pci->mwrites++;
154                 pci->qwrite_kb += bytes >> 10;
155         } else {
156                 pci->mreads++;
157                 pci->qread_kb += bytes >> 10;
158         }
159 }
160
161 static inline void account_q(struct per_cpu_info *pci, int rw,
162                              unsigned int bytes)
163 {
164         if (rw) {
165                 pci->qwrites++;
166                 pci->qwrite_kb += bytes >> 10;
167         } else {
168                 pci->qreads++;
169                 pci->qread_kb += bytes >> 10;
170         }
171 }
172
173 static inline void account_c(struct per_cpu_info *pci, int rw,
174                              unsigned int bytes)
175 {
176         if (rw) {
177                 pci->cwrites++;
178                 pci->cwrite_kb += bytes >> 10;
179         } else {
180                 pci->creads++;
181                 pci->cread_kb += bytes >> 10;
182         }
183 }
184
185 static inline void account_i(struct per_cpu_info *pci, int rw,
186                              unsigned int bytes)
187 {
188         if (rw) {
189                 pci->iwrites++;
190                 pci->iwrite_kb += bytes >> 10;
191         } else {
192                 pci->ireads++;
193                 pci->iread_kb += bytes >> 10;
194         }
195 }
196
197 static void output(struct per_cpu_info *pci, char *s)
198 {
199         printf("%s", s);
200
201         if (pci->ofp)
202                 fprintf(pci->ofp, "%s", s);
203 }
204
205 static char hstring[256];
206 static char tstring[256];
207
208 static inline char *setup_header(struct per_cpu_info *pci,
209                                  struct blk_io_trace *t, char act)
210 {
211         int w = t->action & BLK_TC_ACT(BLK_TC_WRITE);
212         int b = t->action & BLK_TC_ACT(BLK_TC_BARRIER);
213         int s = t->action & BLK_TC_ACT(BLK_TC_SYNC);
214         char rwbs[4];
215         int i = 0;
216
217         if (w)
218                 rwbs[i++] = 'W';
219         else
220                 rwbs[i++] = 'R';
221         if (b)
222                 rwbs[i++] = 'B';
223         if (s)
224                 rwbs[i++] = 'S';
225
226         rwbs[i] = '\0';
227
228         sprintf(hstring, "%c %3d %15ld %5Lu.%09Lu %5u %c %3s", backwards,
229                 pci->cpu,
230                 (unsigned long)t->sequence, SECONDS(t->time), 
231                 NANO_SECONDS(t->time), t->pid, act, rwbs);
232
233         return hstring;
234 }
235
236 static void log_complete(struct per_cpu_info *pci, struct blk_io_trace *t,
237                          char act)
238 {
239         sprintf(tstring,"%s %Lu + %u [%d]\n", setup_header(pci, t, act),
240                 (unsigned long long)t->sector, t->bytes >> 9, t->error);
241         output(pci, tstring);
242 }
243
244 static void log_queue(struct per_cpu_info *pci, struct blk_io_trace *t,
245                       char act)
246 {
247         sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
248                 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
249         output(pci, tstring);
250 }
251
252 static void log_issue(struct per_cpu_info *pci, struct blk_io_trace *t,
253                       char act)
254 {
255         sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
256                 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
257         output(pci, tstring);
258 }
259
260 static void log_merge(struct per_cpu_info *pci, struct blk_io_trace *t,
261                       char act)
262 {
263         sprintf(tstring,"%s   %Lu + %u [%s]\n", setup_header(pci, t, act),
264                 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
265         output(pci, tstring);
266 }
267
268 static void log_generic(struct per_cpu_info *pci, struct blk_io_trace *t,
269                         char act)
270 {
271         sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
272                 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
273         output(pci, tstring);
274 }
275
276 static int log_pc(struct per_cpu_info *pci, struct blk_io_trace *t, char act)
277 {
278         unsigned char *buf;
279         int i;
280
281         sprintf(tstring,"%s ", setup_header(pci, t, act));
282         output(pci, tstring);
283
284         buf = (unsigned char *) t + sizeof(*t);
285         for (i = 0; i < t->pdu_len; i++) {
286                 sprintf(tstring,"%02x ", buf[i]);
287                 output(pci, tstring);
288         }
289
290         if (act == 'C') {
291                 sprintf(tstring,"[%d]\n", t->error);
292                 output(pci, tstring);
293         } else {
294                 sprintf(tstring,"[%s]\n", t->comm);
295                 output(pci, tstring);
296         }
297         return 0;
298 }
299
300 static int dump_trace_pc(struct blk_io_trace *t, struct per_cpu_info *pci)
301 {
302         int ret = 0;
303
304         switch (t->action & 0xffff) {
305                 case __BLK_TA_QUEUE:
306                         log_generic(pci, t, 'Q');
307                         break;
308                 case __BLK_TA_GETRQ:
309                         log_generic(pci, t, 'G');
310                         break;
311                 case __BLK_TA_SLEEPRQ:
312                         log_generic(pci, t, 'S');
313                         break;
314                 case __BLK_TA_REQUEUE:
315                         log_generic(pci, t, 'R');
316                         break;
317                 case __BLK_TA_ISSUE:
318                         ret = log_pc(pci, t, 'D');
319                         break;
320                 case __BLK_TA_COMPLETE:
321                         log_pc(pci, t, 'C');
322                         break;
323                 default:
324                         fprintf(stderr, "Bad pc action %x\n", t->action);
325                         ret = 1;
326                         break;
327         }
328         
329         return ret;
330 }
331
332 static void dump_trace_fs(struct blk_io_trace *t, struct per_cpu_info *pci)
333 {
334         int w = t->action & BLK_TC_ACT(BLK_TC_WRITE);
335
336         switch (t->action & 0xffff) {
337                 case __BLK_TA_QUEUE:
338                         account_q(pci, w, t->bytes);
339                         log_queue(pci, t, 'Q');
340                         break;
341                 case __BLK_TA_BACKMERGE:
342                         account_m(pci, w, t->bytes);
343                         log_merge(pci, t, 'M');
344                         break;
345                 case __BLK_TA_FRONTMERGE:
346                         account_m(pci, w, t->bytes);
347                         log_merge(pci, t, 'F');
348                         break;
349                 case __BLK_TA_GETRQ:
350                         log_generic(pci, t, 'G');
351                         break;
352                 case __BLK_TA_SLEEPRQ:
353                         log_generic(pci, t, 'S');
354                         break;
355                 case __BLK_TA_REQUEUE:
356                         account_c(pci, w, -t->bytes);
357                         log_queue(pci, t, 'R');
358                         break;
359                 case __BLK_TA_ISSUE:
360                         account_i(pci, w, t->bytes);
361                         log_issue(pci, t, 'D');
362                         break;
363                 case __BLK_TA_COMPLETE:
364                         account_c(pci, w, t->bytes);
365                         log_complete(pci, t, 'C');
366                         break;
367                 default:
368                         fprintf(stderr, "Bad fs action %x\n", t->action);
369                         break;
370         }
371 }
372
373 static int dump_trace(struct blk_io_trace *t, struct per_cpu_info *pci)
374 {
375         int ret = 0;
376
377         if (output_name && !pci->ofp) {
378                 snprintf(pci->ofname, sizeof(pci->ofname) - 1,
379                                 "%s_log.%d", output_name, pci->cpu);
380
381                 pci->ofp = fopen(pci->ofname, "w");
382                 if (pci->ofp == NULL) {
383                         perror(pci->ofname);
384                         return 1;
385                 }
386         }
387
388         if (t->action & BLK_TC_ACT(BLK_TC_PC))
389                 ret = dump_trace_pc(t, pci);
390         else
391                 dump_trace_fs(t, pci);
392
393         events++;
394         return ret;
395 }
396
397 static void dump_pci_stats(struct per_cpu_info *pci)
398 {
399         printf(" Reads Queued:    %'8lu, %'8LuKiB\t", pci->qreads, pci->qread_kb);
400         printf(" Writes Queued:    %'8lu, %'8LuKiB\n", pci->qwrites,pci->qwrite_kb);
401
402         printf(" Read Dispatches: %'8lu, %'8LuKiB\t", pci->ireads, pci->iread_kb);
403         printf(" Write Dispatches: %'8lu, %'8LuKiB\n", pci->iwrites,pci->iwrite_kb);
404         printf(" Reads Completed: %'8lu, %'8LuKiB\t", pci->creads, pci->cread_kb);
405         printf(" Writes Completed: %'8lu, %'8LuKiB\n", pci->cwrites,pci->cwrite_kb);
406         printf(" Read Merges:     %'8lu%8c\t", pci->mreads, ' ');
407
408         printf(" Write Merges:     %'8lu\n", pci->mwrites);
409 }
410
411 static void show_stats(void)
412 {
413         struct per_cpu_info foo, *pci;
414         int i, pci_events = 0;
415
416         memset(&foo, 0, sizeof(foo));
417
418         for (i = 0; i < max_cpus; i++) {
419                 pci = &per_cpu_info[i];
420
421                 if (!pci->nelems)
422                         continue;
423
424                 foo.qreads += pci->qreads;
425                 foo.qwrites += pci->qwrites;
426                 foo.creads += pci->creads;
427                 foo.cwrites += pci->cwrites;
428                 foo.mreads += pci->mreads;
429                 foo.mwrites += pci->mwrites;
430                 foo.qread_kb += pci->qread_kb;
431                 foo.qwrite_kb += pci->qwrite_kb;
432                 foo.cread_kb += pci->cread_kb;
433                 foo.cwrite_kb += pci->cwrite_kb;
434
435                 printf("CPU%d:\n", i);
436                 dump_pci_stats(pci);
437                 pci_events++;
438         }
439
440         if (pci_events > 1) {
441                 printf("Total:\n");
442                 dump_pci_stats(&foo);
443         }
444
445         printf("Events: %'Lu\n", events);
446 }
447
448 static inline int trace_rb_insert(struct trace *t)
449 {
450         struct rb_node **p = &rb_root.rb_node;
451         struct rb_node *parent = NULL;
452         struct trace *__t;
453
454         while (*p) {
455                 parent = *p;
456                 __t = rb_entry(parent, struct trace, rb_node);
457
458                 if (t->bit->sequence < __t->bit->sequence)
459                         p = &(*p)->rb_left;
460                 else if (t->bit->sequence > __t->bit->sequence)
461                         p = &(*p)->rb_right;
462                 else {
463                         fprintf(stderr, "sequence alias!\n");
464                         return 1;
465                 }
466         }
467
468         rb_link_node(&t->rb_node, parent, p);
469         rb_insert_color(&t->rb_node, &rb_root);
470         return 0;
471 }
472
473 #define min(a, b)       ((a) < (b) ? (a) : (b))
474
475 static struct blk_io_trace *find_trace(void *p, unsigned long offset, int nr)
476 {
477         unsigned long max_offset = min(offset,nr * sizeof(struct blk_io_trace));
478         unsigned long off;
479         struct blk_io_trace *bit;
480         __u32 magic;
481
482         for (off = 0; off < max_offset; off++) {
483                 bit = p + off;
484
485                 magic = be32_to_cpu(bit->magic);
486                 if ((magic & 0xffffff00) == BLK_IO_TRACE_MAGIC)
487                         return bit;
488         }
489
490         return NULL;
491 }
492
493 static int sort_entries(void *traces, unsigned long offset, int nr)
494 {
495         struct per_cpu_info *pci;
496         struct blk_io_trace *bit;
497         struct trace *t;
498         void *start = traces;
499         int nelems = 0;
500
501         while (traces - start <= offset - sizeof(*bit)) {
502                 if (!nr)
503                         break;
504
505                 bit = find_trace(traces, offset - (traces - start), nr);
506                 if (!bit)
507                         break;
508
509                 t = malloc(sizeof(*t));
510                 t->bit = bit;
511                 memset(&t->rb_node, 0, sizeof(t->rb_node));
512
513                 trace_to_cpu(bit);
514
515                 if (verify_trace(bit))
516                         break;
517
518                 pci = get_cpu_info(bit->cpu);
519                 pci->nelems++;
520
521                 if (trace_rb_insert(t))
522                         return -1;
523
524                 traces += sizeof(*bit) + bit->pdu_len;
525                 nelems++;
526                 nr--;
527         }
528
529         return nelems;
530 }
531
532 static void free_entries_rb(void)
533 {
534         struct rb_node *n;
535
536         while ((n = rb_first(&rb_root)) != NULL) {
537                 struct trace *t = rb_entry(n, struct trace, rb_node);
538
539                 rb_erase(&t->rb_node, &rb_root);
540                 free(t);
541         }
542 }
543
544 static void show_entries_rb(void)
545 {
546         struct blk_io_trace *bit;
547         struct rb_node *n;
548         struct trace *t;
549         int cpu;
550
551         n = rb_first(&rb_root);
552         if (!n)
553                 return;
554
555         do {
556                 t = rb_entry(n, struct trace, rb_node);
557                 bit = t->bit;
558
559                 cpu = bit->cpu;
560                 if (cpu > max_cpus) {
561                         fprintf(stderr, "CPU number too large (%d)\n", cpu);
562                         break;
563                 }
564
565                 if (genesis_time == 0)
566                         genesis_time = bit->time;
567                 bit->time -= genesis_time;
568
569                 check_time(bit);
570
571                 if (dump_trace(bit, &per_cpu_info[cpu]))
572                         break;
573
574         } while ((n = rb_next(n)) != NULL);
575 }
576
577 static int read_data(int fd, void *buffer, int bytes, int block)
578 {
579         int ret, bytes_left, fl;
580         void *p;
581
582         fl = fcntl(fd, F_GETFL);
583
584         if (!block)
585                 fcntl(fd, F_SETFL, fl | O_NONBLOCK);
586         else
587                 fcntl(fd, F_SETFL, fl & ~O_NONBLOCK);
588
589         bytes_left = bytes;
590         p = buffer;
591         while (bytes_left > 0) {
592                 ret = read(fd, p, bytes_left);
593                 if (!ret)
594                         return 1;
595                 else if (ret < 0) {
596                         if (errno != EAGAIN)
597                                 perror("read");
598                         return -1;
599                 } else {
600                         p += ret;
601                         bytes_left -= ret;
602                 }
603         }
604
605         return 0;
606 }
607
608 static int do_file(void)
609 {
610         int i, nfiles;
611
612         for (i = 0, nfiles = 0;; i++, nfiles++) {
613                 struct per_cpu_info *pci;
614                 struct stat st;
615                 void *tb;
616
617                 pci = get_cpu_info(i);
618                 pci->ofp = NULL;
619
620                 snprintf(pci->fname, sizeof(pci->fname)-1,"%s_out.%d", dev, i);
621                 if (stat(pci->fname, &st) < 0)
622                         break;
623                 if (!st.st_size)
624                         continue;
625
626                 printf("Processing %s\n", pci->fname);
627
628                 tb = malloc(st.st_size);
629
630                 pci->fd = open(pci->fname, O_RDONLY);
631                 if (pci->fd < 0) {
632                         perror(pci->fname);
633                         break;
634                 }
635
636                 if (read_data(pci->fd, tb, st.st_size, 1))
637                         break;
638
639                 if (sort_entries(tb, st.st_size, ~0U) == -1)
640                         break;
641
642                 close(pci->fd);
643                 printf("\t%2d %10s %15d\n", i, pci->fname, pci->nelems);
644
645         }
646
647         if (!nfiles) {
648                 fprintf(stderr, "No files found\n");
649                 return 1;
650         }
651
652         show_entries_rb();
653         return 0;
654 }
655
656 static void resize_buffer(void **buffer, long *size, long offset)
657 {
658         long old_size = *size;
659
660         *size *= 2;
661         *buffer = realloc(*buffer, *size);
662         memset(*buffer + offset, 0, *size - old_size);
663 }
664
665 static int read_sort_events(int fd, void **buffer)
666 {
667         long offset, max_offset;
668         int events;
669
670         max_offset = 128 * sizeof(struct blk_io_trace);
671         *buffer = malloc(max_offset);
672         events = 0;
673         offset = 0;
674
675         do {
676                 struct blk_io_trace *t;
677                 int pdu_len;
678
679                 if (max_offset - offset < sizeof(*t))
680                         resize_buffer(buffer, &max_offset, offset);
681
682                 if (read_data(fd, *buffer + offset, sizeof(*t), !events)) {
683                         if (events)
684                                 break;
685
686                         usleep(1000);
687                         continue;
688                 }
689
690                 t = *buffer + offset;
691                 offset += sizeof(*t);
692
693                 pdu_len = be16_to_cpu(t->pdu_len);
694                 if (pdu_len) {
695                         if (max_offset - offset < pdu_len)
696                                 resize_buffer(buffer, &max_offset, offset);
697
698                         if (read_data(fd, *buffer + offset, pdu_len, 1))
699                                 break;
700
701                         offset += pdu_len;
702                 }
703
704                 events++;
705         } while (!is_done() && events < rb_batch);
706
707         return events;
708 }
709
710 static int do_stdin(void)
711 {
712         int fd;
713         void *ptr;
714
715         fd = dup(STDIN_FILENO);
716         do {
717                 int events;
718
719                 events = read_sort_events(fd, &ptr);
720                 if (!events)
721                         break;
722         
723                 if (sort_entries(ptr, ~0UL, events) == -1)
724                         break;
725
726                 show_entries_rb();
727                 free_entries_rb();
728         } while (1);
729
730         close(fd);
731         free(ptr);
732         return 0;
733 }
734
735 static void flush_output(void)
736 {
737         int i;
738
739         for (i = 0; i < max_cpus; i++) {
740                 struct per_cpu_info *pci = &per_cpu_info[i];
741
742                 if (pci->ofp) {
743                         fflush(pci->ofp);
744                         fclose(pci->ofp);
745                         pci->ofp = NULL;
746                 }
747         }
748 }
749
750 static void handle_sigint(int sig)
751 {
752         done = 1;
753         flush_output();
754 }
755
756 static void usage(char *prog)
757 {
758         fprintf(stderr, "Usage: %s -i <name> [-o <output>]\n", prog);
759 }
760
761 int main(int argc, char *argv[])
762 {
763         int c, ret;
764
765         while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) != -1) {
766                 switch (c) {
767                 case 'i':
768                         dev = strdup(optarg);
769                         break;
770                 case 'o':
771                         output_name = strdup(optarg);
772                         break;
773                 case 'b':
774                         rb_batch = atoi(optarg);
775                         if (rb_batch <= 0)
776                                 rb_batch = RB_BATCH_DEFAULT;
777                         break;
778                 default:
779                         usage(argv[0]);
780                         return 1;
781                 }
782         }
783
784         if (!dev) {
785                 usage(argv[0]);
786                 return 1;
787         }
788
789         memset(&rb_root, 0, sizeof(rb_root));
790
791         signal(SIGINT, handle_sigint);
792         signal(SIGHUP, handle_sigint);
793         signal(SIGTERM, handle_sigint);
794
795         setlocale(LC_NUMERIC, "en_US");
796
797         if (!strcmp(dev, "-"))
798                 ret = do_stdin();
799         else
800                 ret = do_file();
801
802         show_stats();
803         flush_output();
804         return ret;
805 }