[PATCH] blkparse: change formatting of output a little
[blktrace.git] / blkparse.c
CommitLineData
d956a2cd
JA
1/*
2 * block queue tracing parse application
3 *
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
20 */
d0ca268b
JA
21#include <sys/types.h>
22#include <sys/stat.h>
23#include <unistd.h>
24#include <stdio.h>
25#include <fcntl.h>
26#include <stdlib.h>
8fc0abbc 27#include <string.h>
d5396421 28#include <getopt.h>
412819ce
JA
29#include <errno.h>
30#include <signal.h>
d69db225 31#include <locale.h>
d0ca268b 32
8fc0abbc
JA
33#include "blktrace.h"
34#include "rbtree.h"
d0ca268b 35
2e3e8ded
JA
36#define SECONDS(x) ((unsigned long long)(x) / 1000000000)
37#define NANO_SECONDS(x) ((unsigned long long)(x) % 1000000000)
cfab07eb 38
d5396421
JA
39static int backwards;
40static unsigned long long genesis_time, last_reported_time;
41
152f6476
JA
42struct io_stats {
43 unsigned long qreads, qwrites, creads, cwrites, mreads, mwrites;
44 unsigned long ireads, iwrites;
45 unsigned long long qread_kb, qwrite_kb, cread_kb, cwrite_kb;
46 unsigned long long iread_kb, iwrite_kb;
47};
48
d5396421 49struct per_cpu_info {
d0ca268b
JA
50 int cpu;
51 int nelems;
d0ca268b
JA
52
53 int fd;
87b72777 54 char fname[128];
d0ca268b 55
152f6476
JA
56 struct io_stats io_stats;
57};
8fc0abbc 58
152f6476
JA
59struct per_process_info {
60 char name[16];
61 __u32 pid;
62 struct io_stats io_stats;
63 struct per_process_info *hash_next, *list_next;
d0ca268b
JA
64};
65
152f6476
JA
66#define PPI_HASH_SHIFT (8)
67static struct per_process_info *ppi_hash[1 << PPI_HASH_SHIFT];
68static struct per_process_info *ppi_list;
69
7997c5b0 70#define S_OPTS "i:o:b:st"
d5396421
JA
71static struct option l_opts[] = {
72 {
73 .name = "input",
74 .has_arg = 1,
75 .flag = NULL,
76 .val = 'i'
77 },
78 {
79 .name = "output",
80 .has_arg = 1,
81 .flag = NULL,
82 .val = 'o'
83 },
79f19470
JA
84 {
85 .name = "batch",
86 .has_arg = 1,
87 .flag = NULL,
88 .val = 'b'
89 },
152f6476
JA
90 {
91 .name = "per program stats",
92 .has_arg = 0,
93 .flag = NULL,
94 .val = 's'
95 },
7997c5b0
JA
96 {
97 .name = "track ios",
98 .has_arg = 0,
99 .flag = NULL,
100 .val = 't'
101 },
d5396421
JA
102 {
103 .name = NULL,
104 .has_arg = 0,
105 .flag = NULL,
106 .val = 0
107 }
108};
109
7997c5b0
JA
110static struct rb_root rb_sort_root;
111static struct rb_root rb_track_root;
8fc0abbc 112
7997c5b0
JA
113/*
114 * for sorting the displayed output
115 */
8fc0abbc
JA
116struct trace {
117 struct blk_io_trace *bit;
118 struct rb_node rb_node;
119};
120
7997c5b0
JA
121/*
122 * for tracking individual ios
123 */
124struct io_track {
125 struct rb_node rb_node;
126
127 __u64 sector;
128 __u32 pid;
129 unsigned long long queue_time;
130 unsigned long long dispatch_time;
131 unsigned long long completion_time;
132};
133
a718bd37
NS
134static int max_cpus;
135static struct per_cpu_info *per_cpu_info;
d0ca268b 136
87b72777
JA
137static unsigned long long events;
138
d5396421 139static char *dev, *output_name;
152f6476
JA
140static FILE *ofp;
141
142static int per_process_stats;
7997c5b0 143static int track_ios;
d0ca268b 144
79f19470
JA
145#define RB_BATCH_DEFAULT (1024)
146static int rb_batch = RB_BATCH_DEFAULT;
147
412819ce
JA
148#define is_done() (*(volatile int *)(&done))
149static volatile int done;
150
152f6476
JA
151static inline unsigned long hash_long(unsigned long val)
152{
16ef714e
JA
153#if __WORDSIZE == 32
154 val *= 0x9e370001UL;
155#elif __WORDSIZE == 64
156 val *= 0x9e37fffffffc0001UL;
157#else
158#error unknown word size
159#endif
160
161 return val >> (__WORDSIZE - PPI_HASH_SHIFT);
152f6476
JA
162}
163
164static inline void add_process_to_hash(struct per_process_info *ppi)
165{
166 const int hash_idx = hash_long(ppi->pid);
167
168 ppi->hash_next = ppi_hash[hash_idx];
169 ppi_hash[hash_idx] = ppi;
170}
171
172static inline void add_process_to_list(struct per_process_info *ppi)
173{
174 ppi->list_next = ppi_list;
175 ppi_list = ppi;
176}
177
178static struct per_process_info *find_process_by_pid(__u32 pid)
179{
180 const int hash_idx = hash_long(pid);
181 struct per_process_info *ppi;
182
183 ppi = ppi_hash[hash_idx];
184 while (ppi) {
185 if (ppi->pid == pid)
186 return ppi;
187
188 ppi = ppi->hash_next;
189 }
190
191 return NULL;
192}
193
7997c5b0
JA
194static inline int trace_rb_insert(struct trace *t)
195{
196 struct rb_node **p = &rb_sort_root.rb_node;
197 struct rb_node *parent = NULL;
198 struct trace *__t;
199
200 while (*p) {
201 parent = *p;
202 __t = rb_entry(parent, struct trace, rb_node);
203
204 if (t->bit->sequence < __t->bit->sequence)
205 p = &(*p)->rb_left;
206 else if (t->bit->sequence > __t->bit->sequence)
207 p = &(*p)->rb_right;
208 else {
209 fprintf(stderr, "sequence alias!\n");
210 return 1;
211 }
212 }
213
214 rb_link_node(&t->rb_node, parent, p);
215 rb_insert_color(&t->rb_node, &rb_sort_root);
216 return 0;
217}
218
219static inline int track_rb_insert(struct io_track *iot)
220{
221 struct rb_node **p = &rb_track_root.rb_node;
222 struct rb_node *parent = NULL;
223 struct io_track *__iot;
224
225 while (*p) {
226 parent = *p;
227
228 __iot = rb_entry(parent, struct io_track, rb_node);
229
230 if (iot->sector < __iot->sector)
231 p = &(*p)->rb_left;
232 else if (iot->sector > __iot->sector)
233 p = &(*p)->rb_right;
234 else {
235 fprintf(stderr, "sequence alias!\n");
236 return 1;
237 }
238 }
239
240 rb_link_node(&iot->rb_node, parent, p);
241 rb_insert_color(&iot->rb_node, &rb_track_root);
242 return 0;
243}
244
245static struct io_track *__find_track(__u64 sector)
246{
247 struct rb_node **p = &rb_track_root.rb_node;
248 struct rb_node *parent = NULL;
249 struct io_track *__iot;
250
251 while (*p) {
252 parent = *p;
253
254 __iot = rb_entry(parent, struct io_track, rb_node);
255
256 if (sector < __iot->sector)
257 p = &(*p)->rb_left;
258 else if (sector > __iot->sector)
259 p = &(*p)->rb_right;
260 else
261 return __iot;
262 }
263
264 return NULL;
265}
266
267static struct io_track *find_track(__u64 sector)
268{
269 struct io_track *iot = __find_track(sector);
270
271 iot = __find_track(sector);
272 if (!iot) {
273 iot = malloc(sizeof(*iot));
274 iot->sector = sector;
275 track_rb_insert(iot);
276 }
277
278 return iot;
279}
280
2e3e8ded
JA
281static void log_track_merge(struct blk_io_trace *t)
282{
283 struct io_track *iot;
284
285 if (!track_ios)
286 return;
287 if ((t->action & BLK_TC_ACT(BLK_TC_FS)) == 0)
288 return;
289
290 iot = __find_track(t->sector - (t->bytes >> 10));
291 if (!iot) {
292 fprintf(stderr, "Trying to merge on non-existing request\n");
293 return;
294 }
295
296 rb_erase(&iot->rb_node, &rb_track_root);
297 iot->sector -= t->bytes >> 10;
298 track_rb_insert(iot);
299}
300
301static void log_track_queue(struct blk_io_trace *t)
302{
303 struct io_track *iot;
304
305 if (!track_ios)
306 return;
307
308 iot = find_track(t->sector);
309 iot->queue_time = t->time;
310}
311
312/*
313 * return time between queue and issue
314 */
315static unsigned long long log_track_issue(struct blk_io_trace *t)
316{
317 struct io_track *iot;
318
319 if (!track_ios)
320 return -1;
321 if ((t->action & BLK_TC_ACT(BLK_TC_FS)) == 0)
322 return -1;
323
324 iot = __find_track(t->sector);
325 if (!iot) {
326 fprintf(stderr, "Trying to issue on non-existing request\n");
327 return -1;
328 }
329
330 iot->dispatch_time = t->time;
331 return iot->dispatch_time - iot->queue_time;
332}
333
334/*
335 * return time between dispatch and complete
336 */
337static unsigned long long log_track_complete(struct blk_io_trace *t)
338{
339 unsigned long long elapsed;
340 struct io_track *iot;
341
342 if (!track_ios)
343 return -1;
344 if ((t->action & BLK_TC_ACT(BLK_TC_FS)) == 0)
345 return -1;
346
347 iot = __find_track(t->sector);
348 if (!iot) {
349 fprintf(stderr, "Trying to dispatch on non-existing request\n");
350 return -1;
351 }
352
353 iot->completion_time = t->time;
354 elapsed = iot->completion_time - iot->dispatch_time;
355
356 /*
357 * kill the trace, we don't need it after completion
358 */
359 rb_erase(&iot->rb_node, &rb_track_root);
360 free(iot);
361
362 return elapsed;
363}
364
365
152f6476
JA
366static struct io_stats *find_process_io_stats(__u32 pid, char *name)
367{
368 struct per_process_info *ppi = find_process_by_pid(pid);
369
370 if (!ppi) {
371 ppi = malloc(sizeof(*ppi));
372 memset(ppi, 0, sizeof(*ppi));
373 strncpy(ppi->name, name, sizeof(ppi->name));
374 ppi->pid = pid;
375 add_process_to_hash(ppi);
376 add_process_to_list(ppi);
377 }
378
379 return &ppi->io_stats;
380}
381
a718bd37
NS
382static void resize_cpu_info(int cpuid)
383{
384 int new_space, new_max = cpuid + 1;
385 char *new_start;
386
387 per_cpu_info = realloc(per_cpu_info, new_max * sizeof(*per_cpu_info));
388 if (!per_cpu_info) {
389 fprintf(stderr, "Cannot allocate CPU info -- %d x %d bytes\n",
390 new_max, (int) sizeof(*per_cpu_info));
391 exit(1);
392 }
393
394 new_start = (char *)per_cpu_info + (max_cpus * sizeof(*per_cpu_info));
395 new_space = (new_max - max_cpus) * sizeof(*per_cpu_info);
396 memset(new_start, 0, new_space);
397 max_cpus = new_max;
398}
399
400static struct per_cpu_info *get_cpu_info(int cpu)
401{
c499bf38
JA
402 struct per_cpu_info *pci;
403
a718bd37
NS
404 if (cpu >= max_cpus)
405 resize_cpu_info(cpu);
406
c499bf38
JA
407 /*
408 * ->cpu might already be set, but just set it unconditionally
409 */
410 pci = &per_cpu_info[cpu];
411 pci->cpu = cpu;
412
413 return pci;
a718bd37
NS
414}
415
cfab07eb
AB
416static inline void check_time(struct blk_io_trace *bit)
417{
418 unsigned long long this = bit->time;
419 unsigned long long last = last_reported_time;
420
421 backwards = (this < last) ? 'B' : ' ';
422 last_reported_time = this;
423}
424
152f6476
JA
425static inline void __account_m(struct io_stats *ios, struct blk_io_trace *t,
426 int rw)
d0ca268b
JA
427{
428 if (rw) {
152f6476
JA
429 ios->mwrites++;
430 ios->qwrite_kb += t->bytes >> 10;
d0ca268b 431 } else {
152f6476
JA
432 ios->mreads++;
433 ios->qread_kb += t->bytes >> 10;
434 }
435}
436
437static inline void account_m(struct blk_io_trace *t, struct per_cpu_info *pci,
438 int rw)
439{
440 __account_m(&pci->io_stats, t, rw);
441
442 if (per_process_stats) {
443 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
444
445 __account_m(ios, t, rw);
d0ca268b
JA
446 }
447}
448
152f6476
JA
449static inline void __account_q(struct io_stats *ios, struct blk_io_trace *t,
450 int rw)
d0ca268b
JA
451{
452 if (rw) {
152f6476
JA
453 ios->qwrites++;
454 ios->qwrite_kb += t->bytes >> 10;
d0ca268b 455 } else {
152f6476
JA
456 ios->qreads++;
457 ios->qread_kb += t->bytes >> 10;
458 }
459}
460
461static inline void account_q(struct blk_io_trace *t, struct per_cpu_info *pci,
462 int rw)
463{
464 __account_q(&pci->io_stats, t, rw);
465
466 if (per_process_stats) {
467 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
468
469 __account_q(ios, t, rw);
d0ca268b
JA
470 }
471}
472
152f6476 473static inline void __account_c(struct io_stats *ios, int rw, unsigned int bytes)
d0ca268b
JA
474{
475 if (rw) {
152f6476
JA
476 ios->cwrites++;
477 ios->cwrite_kb += bytes >> 10;
d0ca268b 478 } else {
152f6476
JA
479 ios->creads++;
480 ios->cread_kb += bytes >> 10;
481 }
482}
483
484static inline void account_c(struct blk_io_trace *t, struct per_cpu_info *pci,
485 int rw, int bytes)
486{
487 __account_c(&pci->io_stats, rw, bytes);
488
489 if (per_process_stats) {
490 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
491
492 __account_c(ios, rw, bytes);
d0ca268b
JA
493 }
494}
495
152f6476 496static inline void __account_i(struct io_stats *ios, int rw, unsigned int bytes)
afd2d7ad 497{
498 if (rw) {
152f6476
JA
499 ios->iwrites++;
500 ios->iwrite_kb += bytes >> 10;
afd2d7ad 501 } else {
152f6476
JA
502 ios->ireads++;
503 ios->iread_kb += bytes >> 10;
afd2d7ad 504 }
505}
506
152f6476
JA
507static inline void account_i(struct blk_io_trace *t, struct per_cpu_info *pci,
508 int rw)
d0ca268b 509{
152f6476
JA
510 __account_i(&pci->io_stats, rw, t->bytes);
511
512 if (per_process_stats) {
513 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
d5396421 514
152f6476
JA
515 __account_i(ios, rw, t->bytes);
516 }
517}
518
519static void output(struct per_cpu_info *pci, char *s)
520{
521 fprintf(ofp, "%s", s);
d0ca268b
JA
522}
523
3aabcd89
JA
524static char hstring[256];
525static char tstring[256];
d0ca268b 526
d5396421
JA
527static inline char *setup_header(struct per_cpu_info *pci,
528 struct blk_io_trace *t, char act)
d0ca268b
JA
529{
530 int w = t->action & BLK_TC_ACT(BLK_TC_WRITE);
531 int b = t->action & BLK_TC_ACT(BLK_TC_BARRIER);
532 int s = t->action & BLK_TC_ACT(BLK_TC_SYNC);
533 char rwbs[4];
534 int i = 0;
535
536 if (w)
537 rwbs[i++] = 'W';
538 else
539 rwbs[i++] = 'R';
540 if (b)
541 rwbs[i++] = 'B';
542 if (s)
543 rwbs[i++] = 'S';
544
545 rwbs[i] = '\0';
546
555d3a31 547 sprintf(hstring, "%2d %8ld %5Lu.%09Lu %5u %c %3s",
d5396421 548 pci->cpu,
cfab07eb
AB
549 (unsigned long)t->sequence, SECONDS(t->time),
550 NANO_SECONDS(t->time), t->pid, act, rwbs);
d0ca268b
JA
551
552 return hstring;
553}
554
d5396421
JA
555static void log_complete(struct per_cpu_info *pci, struct blk_io_trace *t,
556 char act)
d0ca268b 557{
2e3e8ded
JA
558 unsigned long long elapsed = log_track_complete(t);
559
560 if (elapsed != -1ULL) {
561 double usec = (double) elapsed / 1000;
562
555d3a31 563 sprintf(tstring,"%s %Lu + %u (%8.2f) [%d]\n",
2e3e8ded
JA
564 setup_header(pci, t, act),
565 (unsigned long long)t->sector, t->bytes >> 9,
566 usec, t->error);
567 } else {
568 sprintf(tstring,"%s %Lu + %u [%d]\n", setup_header(pci, t, act),
569 (unsigned long long)t->sector, t->bytes >> 9, t->error);
570 }
571
d5396421 572 output(pci, tstring);
d0ca268b
JA
573}
574
d5396421
JA
575static void log_queue(struct per_cpu_info *pci, struct blk_io_trace *t,
576 char act)
d0ca268b 577{
2e3e8ded
JA
578 log_track_queue(t);
579
2955af9d
NS
580 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
581 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
d5396421 582 output(pci, tstring);
d0ca268b
JA
583}
584
d5396421
JA
585static void log_issue(struct per_cpu_info *pci, struct blk_io_trace *t,
586 char act)
d0ca268b 587{
2e3e8ded
JA
588 unsigned long long elapsed = log_track_issue(t);
589
590 if (elapsed != -1ULL) {
591 double usec = (double) elapsed / 1000;
592
555d3a31 593 sprintf(tstring,"%s %Lu + %u (%8.2f) [%s]\n",
2e3e8ded
JA
594 setup_header(pci, t, act),
595 (unsigned long long)t->sector, t->bytes >> 9,
596 usec, t->comm);
597 } else {
598 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
599 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
600 }
601
d5396421 602 output(pci, tstring);
d0ca268b
JA
603}
604
d5396421
JA
605static void log_merge(struct per_cpu_info *pci, struct blk_io_trace *t,
606 char act)
d0ca268b 607{
2e3e8ded
JA
608 log_track_merge(t);
609
984c63b7 610 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
2955af9d 611 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
d5396421 612 output(pci, tstring);
d0ca268b
JA
613}
614
d5396421
JA
615static void log_generic(struct per_cpu_info *pci, struct blk_io_trace *t,
616 char act)
d0ca268b 617{
2955af9d
NS
618 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
619 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
d5396421 620 output(pci, tstring);
d0ca268b
JA
621}
622
d5396421 623static int log_pc(struct per_cpu_info *pci, struct blk_io_trace *t, char act)
d0ca268b 624{
87b72777
JA
625 unsigned char *buf;
626 int i;
d0ca268b 627
d5396421
JA
628 sprintf(tstring,"%s ", setup_header(pci, t, act));
629 output(pci, tstring);
d0ca268b 630
87b72777 631 buf = (unsigned char *) t + sizeof(*t);
d0ca268b
JA
632 for (i = 0; i < t->pdu_len; i++) {
633 sprintf(tstring,"%02x ", buf[i]);
d5396421 634 output(pci, tstring);
d0ca268b
JA
635 }
636
637 if (act == 'C') {
2955af9d
NS
638 sprintf(tstring,"[%d]\n", t->error);
639 output(pci, tstring);
640 } else {
641 sprintf(tstring,"[%s]\n", t->comm);
d5396421 642 output(pci, tstring);
d0ca268b 643 }
87b72777 644 return 0;
d0ca268b
JA
645}
646
d5396421 647static int dump_trace_pc(struct blk_io_trace *t, struct per_cpu_info *pci)
d0ca268b 648{
87b72777
JA
649 int ret = 0;
650
d0ca268b
JA
651 switch (t->action & 0xffff) {
652 case __BLK_TA_QUEUE:
d5396421 653 log_generic(pci, t, 'Q');
d0ca268b
JA
654 break;
655 case __BLK_TA_GETRQ:
d5396421 656 log_generic(pci, t, 'G');
d0ca268b
JA
657 break;
658 case __BLK_TA_SLEEPRQ:
d5396421 659 log_generic(pci, t, 'S');
d0ca268b
JA
660 break;
661 case __BLK_TA_REQUEUE:
d5396421 662 log_generic(pci, t, 'R');
d0ca268b
JA
663 break;
664 case __BLK_TA_ISSUE:
d5396421 665 ret = log_pc(pci, t, 'D');
d0ca268b
JA
666 break;
667 case __BLK_TA_COMPLETE:
d5396421 668 log_pc(pci, t, 'C');
d0ca268b
JA
669 break;
670 default:
671 fprintf(stderr, "Bad pc action %x\n", t->action);
87b72777
JA
672 ret = 1;
673 break;
d0ca268b
JA
674 }
675
87b72777 676 return ret;
d0ca268b
JA
677}
678
d5396421 679static void dump_trace_fs(struct blk_io_trace *t, struct per_cpu_info *pci)
d0ca268b
JA
680{
681 int w = t->action & BLK_TC_ACT(BLK_TC_WRITE);
7997c5b0 682 int act = t->action & 0xffff;
d0ca268b 683
7997c5b0 684 switch (act) {
d0ca268b 685 case __BLK_TA_QUEUE:
152f6476 686 account_q(t, pci, w);
d5396421 687 log_queue(pci, t, 'Q');
d0ca268b
JA
688 break;
689 case __BLK_TA_BACKMERGE:
152f6476 690 account_m(t, pci, w);
d5396421 691 log_merge(pci, t, 'M');
d0ca268b
JA
692 break;
693 case __BLK_TA_FRONTMERGE:
152f6476 694 account_m(t, pci, w);
d5396421 695 log_merge(pci, t, 'F');
d0ca268b
JA
696 break;
697 case __BLK_TA_GETRQ:
d5396421 698 log_generic(pci, t, 'G');
d0ca268b
JA
699 break;
700 case __BLK_TA_SLEEPRQ:
d5396421 701 log_generic(pci, t, 'S');
d0ca268b
JA
702 break;
703 case __BLK_TA_REQUEUE:
152f6476 704 account_c(t, pci, w, -t->bytes);
d5396421 705 log_queue(pci, t, 'R');
d0ca268b
JA
706 break;
707 case __BLK_TA_ISSUE:
152f6476 708 account_i(t, pci, w);
d5396421 709 log_issue(pci, t, 'D');
d0ca268b
JA
710 break;
711 case __BLK_TA_COMPLETE:
152f6476 712 account_c(t, pci, w, t->bytes);
d5396421 713 log_complete(pci, t, 'C');
d0ca268b
JA
714 break;
715 default:
716 fprintf(stderr, "Bad fs action %x\n", t->action);
1f79c4a0 717 break;
d0ca268b 718 }
d0ca268b
JA
719}
720
d5396421 721static int dump_trace(struct blk_io_trace *t, struct per_cpu_info *pci)
d0ca268b 722{
87b72777
JA
723 int ret = 0;
724
d0ca268b 725 if (t->action & BLK_TC_ACT(BLK_TC_PC))
d5396421 726 ret = dump_trace_pc(t, pci);
d0ca268b 727 else
d5396421 728 dump_trace_fs(t, pci);
87b72777
JA
729
730 events++;
731 return ret;
d0ca268b
JA
732}
733
152f6476 734static void dump_io_stats(struct io_stats *ios, char *msg)
5c017e4b 735{
152f6476
JA
736 fprintf(ofp, "%s\n", msg);
737
738 fprintf(ofp, " Reads Queued: %'8lu, %'8LuKiB\t", ios->qreads, ios->qread_kb);
739 fprintf(ofp, " Writes Queued: %'8lu, %'8LuKiB\n", ios->qwrites,ios->qwrite_kb);
0a6b8fc4 740
152f6476
JA
741 fprintf(ofp, " Read Dispatches: %'8lu, %'8LuKiB\t", ios->ireads, ios->iread_kb);
742 fprintf(ofp, " Write Dispatches: %'8lu, %'8LuKiB\n", ios->iwrites,ios->iwrite_kb);
743 fprintf(ofp, " Reads Completed: %'8lu, %'8LuKiB\t", ios->creads, ios->cread_kb);
744 fprintf(ofp, " Writes Completed: %'8lu, %'8LuKiB\n", ios->cwrites,ios->cwrite_kb);
745 fprintf(ofp, " Read Merges: %'8lu%8c\t", ios->mreads, ' ');
0a6b8fc4 746
152f6476 747 fprintf(ofp, " Write Merges: %'8lu\n", ios->mwrites);
5c017e4b
JA
748}
749
152f6476
JA
750static void show_process_stats(void)
751{
752 struct per_process_info *ppi;
753
754 ppi = ppi_list;
755 while (ppi) {
756 dump_io_stats(&ppi->io_stats, ppi->name);
757 ppi = ppi->list_next;
758 }
759
760 fprintf(ofp, "\n");
761}
762
763static void show_cpu_stats(void)
d0ca268b 764{
d5396421 765 struct per_cpu_info foo, *pci;
152f6476 766 struct io_stats *ios;
412819ce 767 int i, pci_events = 0;
5c017e4b
JA
768
769 memset(&foo, 0, sizeof(foo));
d0ca268b 770
a718bd37 771 for (i = 0; i < max_cpus; i++) {
152f6476
JA
772 char cpu[8];
773
d5396421 774 pci = &per_cpu_info[i];
152f6476 775 ios = &pci->io_stats;
5c017e4b 776
d5396421 777 if (!pci->nelems)
afd2d7ad 778 continue;
779
152f6476
JA
780 foo.io_stats.qreads += ios->qreads;
781 foo.io_stats.qwrites += ios->qwrites;
782 foo.io_stats.creads += ios->creads;
783 foo.io_stats.cwrites += ios->cwrites;
784 foo.io_stats.mreads += ios->mreads;
785 foo.io_stats.mwrites += ios->mwrites;
11e51068
NS
786 foo.io_stats.ireads += ios->ireads;
787 foo.io_stats.iwrites += ios->iwrites;
152f6476
JA
788 foo.io_stats.qread_kb += ios->qread_kb;
789 foo.io_stats.qwrite_kb += ios->qwrite_kb;
790 foo.io_stats.cread_kb += ios->cread_kb;
791 foo.io_stats.cwrite_kb += ios->cwrite_kb;
11e51068
NS
792 foo.io_stats.iread_kb += ios->iread_kb;
793 foo.io_stats.iwrite_kb += ios->iwrite_kb;
152f6476
JA
794
795 snprintf(cpu, sizeof(cpu) - 1, "CPU%d:", i);
796 dump_io_stats(ios, cpu);
412819ce 797 pci_events++;
5c017e4b
JA
798 }
799
412819ce 800 if (pci_events > 1) {
152f6476
JA
801 fprintf(ofp, "\n");
802 dump_io_stats(&foo.io_stats, "Total:");
5c017e4b 803 }
d0ca268b 804
152f6476 805 fprintf(ofp, "\nEvents: %'Lu\n", events);
d0ca268b
JA
806}
807
2ff323b0
JA
808#define min(a, b) ((a) < (b) ? (a) : (b))
809
810static struct blk_io_trace *find_trace(void *p, unsigned long offset, int nr)
811{
812 unsigned long max_offset = min(offset,nr * sizeof(struct blk_io_trace));
813 unsigned long off;
814 struct blk_io_trace *bit;
815 __u32 magic;
816
817 for (off = 0; off < max_offset; off++) {
818 bit = p + off;
819
820 magic = be32_to_cpu(bit->magic);
821 if ((magic & 0xffffff00) == BLK_IO_TRACE_MAGIC)
822 return bit;
823 }
824
825 return NULL;
826}
827
412819ce 828static int sort_entries(void *traces, unsigned long offset, int nr)
8fc0abbc 829{
412819ce 830 struct per_cpu_info *pci;
8fc0abbc
JA
831 struct blk_io_trace *bit;
832 struct trace *t;
833 void *start = traces;
834 int nelems = 0;
835
6fe4709e 836 while (traces - start <= offset - sizeof(*bit)) {
412819ce
JA
837 if (!nr)
838 break;
839
2ff323b0
JA
840 bit = find_trace(traces, offset - (traces - start), nr);
841 if (!bit)
842 break;
6fe4709e 843
8fc0abbc
JA
844 t = malloc(sizeof(*t));
845 t->bit = bit;
846 memset(&t->rb_node, 0, sizeof(t->rb_node));
847
6fe4709e
JA
848 trace_to_cpu(bit);
849
66fa7233
JA
850 if (verify_trace(bit))
851 break;
852
a718bd37 853 pci = get_cpu_info(bit->cpu);
412819ce
JA
854 pci->nelems++;
855
8fc0abbc
JA
856 if (trace_rb_insert(t))
857 return -1;
858
859 traces += sizeof(*bit) + bit->pdu_len;
860 nelems++;
412819ce 861 nr--;
6fe4709e 862 }
8fc0abbc
JA
863
864 return nelems;
865}
866
412819ce
JA
867static void free_entries_rb(void)
868{
869 struct rb_node *n;
870
7997c5b0 871 while ((n = rb_first(&rb_sort_root)) != NULL) {
412819ce
JA
872 struct trace *t = rb_entry(n, struct trace, rb_node);
873
7997c5b0 874 rb_erase(&t->rb_node, &rb_sort_root);
412819ce
JA
875 free(t);
876 }
877}
878
d5396421 879static void show_entries_rb(void)
8fc0abbc 880{
8fc0abbc 881 struct blk_io_trace *bit;
3aabcd89 882 struct rb_node *n;
8fc0abbc
JA
883 struct trace *t;
884 int cpu;
885
7997c5b0 886 n = rb_first(&rb_sort_root);
3aabcd89
JA
887 if (!n)
888 return;
8fc0abbc 889
3aabcd89 890 do {
8fc0abbc
JA
891 t = rb_entry(n, struct trace, rb_node);
892 bit = t->bit;
893
d5396421 894 cpu = bit->cpu;
87b72777 895 if (cpu > max_cpus) {
8fc0abbc 896 fprintf(stderr, "CPU number too large (%d)\n", cpu);
87b72777 897 break;
8fc0abbc
JA
898 }
899
cfab07eb
AB
900 if (genesis_time == 0)
901 genesis_time = bit->time;
902 bit->time -= genesis_time;
8fc0abbc 903
cfab07eb 904 check_time(bit);
8fc0abbc 905
d5396421 906 if (dump_trace(bit, &per_cpu_info[cpu]))
87b72777
JA
907 break;
908
8fc0abbc
JA
909 } while ((n = rb_next(n)) != NULL);
910}
911
1f79c4a0
JA
912static int read_data(int fd, void *buffer, int bytes, int block)
913{
914 int ret, bytes_left, fl;
915 void *p;
916
917 fl = fcntl(fd, F_GETFL);
918
919 if (!block)
920 fcntl(fd, F_SETFL, fl | O_NONBLOCK);
921 else
922 fcntl(fd, F_SETFL, fl & ~O_NONBLOCK);
923
924 bytes_left = bytes;
925 p = buffer;
926 while (bytes_left > 0) {
927 ret = read(fd, p, bytes_left);
928 if (!ret)
929 return 1;
930 else if (ret < 0) {
931 if (errno != EAGAIN)
932 perror("read");
933 return -1;
934 } else {
935 p += ret;
936 bytes_left -= ret;
937 }
938 }
939
940 return 0;
941}
942
d5396421 943static int do_file(void)
d0ca268b 944{
a718bd37 945 int i, nfiles;
d0ca268b 946
a718bd37
NS
947 for (i = 0, nfiles = 0;; i++, nfiles++) {
948 struct per_cpu_info *pci;
87b72777
JA
949 struct stat st;
950 void *tb;
951
a718bd37 952 pci = get_cpu_info(i);
d0ca268b 953
d5396421
JA
954 snprintf(pci->fname, sizeof(pci->fname)-1,"%s_out.%d", dev, i);
955 if (stat(pci->fname, &st) < 0)
d0ca268b 956 break;
afd2d7ad 957 if (!st.st_size)
958 continue;
d0ca268b 959
d5396421 960 printf("Processing %s\n", pci->fname);
8fc0abbc 961
87b72777 962 tb = malloc(st.st_size);
8fc0abbc 963
d5396421
JA
964 pci->fd = open(pci->fname, O_RDONLY);
965 if (pci->fd < 0) {
966 perror(pci->fname);
3aabcd89 967 break;
d0ca268b 968 }
afd2d7ad 969
1f79c4a0 970 if (read_data(pci->fd, tb, st.st_size, 1))
3aabcd89 971 break;
d0ca268b 972
a718bd37 973 if (sort_entries(tb, st.st_size, ~0U) == -1)
3aabcd89 974 break;
d0ca268b 975
d5396421 976 close(pci->fd);
d5396421
JA
977 printf("\t%2d %10s %15d\n", i, pci->fname, pci->nelems);
978
979 }
980
981 if (!nfiles) {
982 fprintf(stderr, "No files found\n");
983 return 1;
984 }
985
986 show_entries_rb();
d5396421
JA
987 return 0;
988}
989
2ff323b0 990static void resize_buffer(void **buffer, long *size, long offset)
d5396421 991{
2ff323b0 992 long old_size = *size;
d5396421 993
2ff323b0
JA
994 *size *= 2;
995 *buffer = realloc(*buffer, *size);
996 memset(*buffer + offset, 0, *size - old_size);
412819ce 997}
d5396421 998
0a94916b 999static int read_sort_events(int fd, void **buffer, long *max_offset)
412819ce 1000{
0a94916b 1001 long offset;
412819ce 1002 int events;
d5396421 1003
0a94916b
JA
1004 if (*max_offset == 0) {
1005 *max_offset = 128 * sizeof(struct blk_io_trace);
1006 *buffer = malloc(*max_offset);
1007 }
1008
412819ce
JA
1009 events = 0;
1010 offset = 0;
d5396421 1011
412819ce
JA
1012 do {
1013 struct blk_io_trace *t;
1014 int pdu_len;
d5396421 1015
0a94916b
JA
1016 if (*max_offset - offset < sizeof(*t))
1017 resize_buffer(buffer, max_offset, offset);
d5396421 1018
412819ce
JA
1019 if (read_data(fd, *buffer + offset, sizeof(*t), !events)) {
1020 if (events)
d5396421 1021 break;
412819ce
JA
1022
1023 usleep(1000);
1024 continue;
d5396421
JA
1025 }
1026
412819ce
JA
1027 t = *buffer + offset;
1028 offset += sizeof(*t);
d5396421 1029
412819ce 1030 pdu_len = be16_to_cpu(t->pdu_len);
2ff323b0 1031 if (pdu_len) {
0a94916b
JA
1032 if (*max_offset - offset < pdu_len)
1033 resize_buffer(buffer, max_offset, offset);
d5396421 1034
2ff323b0
JA
1035 if (read_data(fd, *buffer + offset, pdu_len, 1))
1036 break;
d5396421 1037
2ff323b0
JA
1038 offset += pdu_len;
1039 }
d5396421 1040
412819ce 1041 events++;
79f19470 1042 } while (!is_done() && events < rb_batch);
d5396421 1043
412819ce
JA
1044 return events;
1045}
d5396421 1046
412819ce
JA
1047static int do_stdin(void)
1048{
1049 int fd;
1050 void *ptr;
0a94916b 1051 long max_offset;
d5396421 1052
1f79c4a0 1053 fd = dup(STDIN_FILENO);
0a94916b 1054 max_offset = 0;
412819ce
JA
1055 do {
1056 int events;
d5396421 1057
0a94916b 1058 events = read_sort_events(fd, &ptr, &max_offset);
412819ce
JA
1059 if (!events)
1060 break;
1061
2ff323b0
JA
1062 if (sort_entries(ptr, ~0UL, events) == -1)
1063 break;
1064
412819ce
JA
1065 show_entries_rb();
1066 free_entries_rb();
d5396421
JA
1067 } while (1);
1068
0a94916b 1069 free(ptr);
d5396421 1070 close(fd);
d5396421
JA
1071 return 0;
1072}
d0ca268b 1073
1f79c4a0 1074static void flush_output(void)
412819ce 1075{
152f6476 1076 fflush(ofp);
412819ce
JA
1077}
1078
1f79c4a0 1079static void handle_sigint(int sig)
412819ce
JA
1080{
1081 done = 1;
1082 flush_output();
1083}
1084
1f79c4a0
JA
1085static void usage(char *prog)
1086{
152f6476 1087 fprintf(stderr, "Usage: %s -i <name> [-o <output>][-s]\n", prog);
1f79c4a0
JA
1088}
1089
d5396421
JA
1090int main(int argc, char *argv[])
1091{
152f6476 1092 char *ofp_buffer;
a66877e6 1093 int c, ret, mode;
d5396421
JA
1094
1095 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) != -1) {
1096 switch (c) {
1097 case 'i':
66efebf8 1098 dev = optarg;
d5396421
JA
1099 break;
1100 case 'o':
66efebf8 1101 output_name = optarg;
d5396421 1102 break;
79f19470
JA
1103 case 'b':
1104 rb_batch = atoi(optarg);
1105 if (rb_batch <= 0)
1106 rb_batch = RB_BATCH_DEFAULT;
1107 break;
152f6476
JA
1108 case 's':
1109 per_process_stats = 1;
1110 break;
7997c5b0
JA
1111 case 't':
1112 track_ios = 1;
1113 break;
d5396421 1114 default:
1f79c4a0 1115 usage(argv[0]);
d5396421
JA
1116 return 1;
1117 }
d0ca268b
JA
1118 }
1119
d5396421 1120 if (!dev) {
1f79c4a0 1121 usage(argv[0]);
d5396421
JA
1122 return 1;
1123 }
1124
7997c5b0
JA
1125 memset(&rb_sort_root, 0, sizeof(rb_sort_root));
1126 memset(&rb_track_root, 0, sizeof(rb_track_root));
412819ce
JA
1127
1128 signal(SIGINT, handle_sigint);
1129 signal(SIGHUP, handle_sigint);
1130 signal(SIGTERM, handle_sigint);
d5396421 1131
d69db225
JA
1132 setlocale(LC_NUMERIC, "en_US");
1133
a66877e6 1134 if (!output_name) {
152f6476 1135 ofp = fdopen(STDOUT_FILENO, "w");
a66877e6
JA
1136 mode = _IOLBF;
1137 } else {
152f6476
JA
1138 char ofname[128];
1139
1140 snprintf(ofname, sizeof(ofname) - 1, "%s.log", output_name);
1141 ofp = fopen(ofname, "w");
a66877e6 1142 mode = _IOFBF;
152f6476
JA
1143 }
1144
1145 if (!ofp) {
1146 perror("fopen");
1147 return 1;
1148 }
1149
1150 ofp_buffer = malloc(4096);
a66877e6 1151 if (setvbuf(ofp, ofp_buffer, mode, 4096)) {
152f6476
JA
1152 perror("setvbuf");
1153 return 1;
1154 }
1155
d5396421
JA
1156 if (!strcmp(dev, "-"))
1157 ret = do_stdin();
1158 else
1159 ret = do_file();
1160
152f6476
JA
1161 if (per_process_stats)
1162 show_process_stats();
1163
1164 show_cpu_stats();
1165
412819ce 1166 flush_output();
d5396421 1167 return ret;
d0ca268b 1168}