[PATCH] blkparse: Add request allocation -> queue time
[blktrace.git] / blkparse.c
CommitLineData
d956a2cd
JA
1/*
2 * block queue tracing parse application
3 *
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
20 */
d0ca268b
JA
21#include <sys/types.h>
22#include <sys/stat.h>
23#include <unistd.h>
24#include <stdio.h>
25#include <fcntl.h>
26#include <stdlib.h>
8fc0abbc 27#include <string.h>
d5396421 28#include <getopt.h>
412819ce
JA
29#include <errno.h>
30#include <signal.h>
d69db225 31#include <locale.h>
d0ca268b 32
8fc0abbc
JA
33#include "blktrace.h"
34#include "rbtree.h"
d0ca268b 35
2e3e8ded
JA
36#define SECONDS(x) ((unsigned long long)(x) / 1000000000)
37#define NANO_SECONDS(x) ((unsigned long long)(x) % 1000000000)
cfab07eb 38
d5396421
JA
39static int backwards;
40static unsigned long long genesis_time, last_reported_time;
41
152f6476
JA
42struct io_stats {
43 unsigned long qreads, qwrites, creads, cwrites, mreads, mwrites;
44 unsigned long ireads, iwrites;
45 unsigned long long qread_kb, qwrite_kb, cread_kb, cwrite_kb;
46 unsigned long long iread_kb, iwrite_kb;
47};
48
d5396421 49struct per_cpu_info {
d0ca268b
JA
50 int cpu;
51 int nelems;
d0ca268b
JA
52
53 int fd;
87b72777 54 char fname[128];
d0ca268b 55
152f6476
JA
56 struct io_stats io_stats;
57};
8fc0abbc 58
152f6476
JA
59struct per_process_info {
60 char name[16];
61 __u32 pid;
62 struct io_stats io_stats;
63 struct per_process_info *hash_next, *list_next;
d0ca268b
JA
64};
65
152f6476
JA
66#define PPI_HASH_SHIFT (8)
67static struct per_process_info *ppi_hash[1 << PPI_HASH_SHIFT];
68static struct per_process_info *ppi_list;
69
7997c5b0 70#define S_OPTS "i:o:b:st"
d5396421
JA
71static struct option l_opts[] = {
72 {
73 .name = "input",
74 .has_arg = 1,
75 .flag = NULL,
76 .val = 'i'
77 },
78 {
79 .name = "output",
80 .has_arg = 1,
81 .flag = NULL,
82 .val = 'o'
83 },
79f19470
JA
84 {
85 .name = "batch",
86 .has_arg = 1,
87 .flag = NULL,
88 .val = 'b'
89 },
152f6476
JA
90 {
91 .name = "per program stats",
92 .has_arg = 0,
93 .flag = NULL,
94 .val = 's'
95 },
7997c5b0
JA
96 {
97 .name = "track ios",
98 .has_arg = 0,
99 .flag = NULL,
100 .val = 't'
101 },
d5396421
JA
102 {
103 .name = NULL,
104 .has_arg = 0,
105 .flag = NULL,
106 .val = 0
107 }
108};
109
7997c5b0
JA
110static struct rb_root rb_sort_root;
111static struct rb_root rb_track_root;
8fc0abbc 112
7997c5b0
JA
113/*
114 * for sorting the displayed output
115 */
8fc0abbc
JA
116struct trace {
117 struct blk_io_trace *bit;
118 struct rb_node rb_node;
119};
120
7997c5b0
JA
121/*
122 * for tracking individual ios
123 */
124struct io_track {
125 struct rb_node rb_node;
126
127 __u64 sector;
128 __u32 pid;
95c15013 129 unsigned long long allocation_time;
7997c5b0
JA
130 unsigned long long queue_time;
131 unsigned long long dispatch_time;
132 unsigned long long completion_time;
133};
134
a718bd37
NS
135static int max_cpus;
136static struct per_cpu_info *per_cpu_info;
d0ca268b 137
87b72777
JA
138static unsigned long long events;
139
d5396421 140static char *dev, *output_name;
152f6476
JA
141static FILE *ofp;
142
143static int per_process_stats;
7997c5b0 144static int track_ios;
d0ca268b 145
79f19470
JA
146#define RB_BATCH_DEFAULT (1024)
147static int rb_batch = RB_BATCH_DEFAULT;
148
412819ce
JA
149#define is_done() (*(volatile int *)(&done))
150static volatile int done;
151
152f6476
JA
152static inline unsigned long hash_long(unsigned long val)
153{
16ef714e
JA
154#if __WORDSIZE == 32
155 val *= 0x9e370001UL;
156#elif __WORDSIZE == 64
157 val *= 0x9e37fffffffc0001UL;
158#else
159#error unknown word size
160#endif
161
162 return val >> (__WORDSIZE - PPI_HASH_SHIFT);
152f6476
JA
163}
164
165static inline void add_process_to_hash(struct per_process_info *ppi)
166{
167 const int hash_idx = hash_long(ppi->pid);
168
169 ppi->hash_next = ppi_hash[hash_idx];
170 ppi_hash[hash_idx] = ppi;
171}
172
173static inline void add_process_to_list(struct per_process_info *ppi)
174{
175 ppi->list_next = ppi_list;
176 ppi_list = ppi;
177}
178
179static struct per_process_info *find_process_by_pid(__u32 pid)
180{
181 const int hash_idx = hash_long(pid);
182 struct per_process_info *ppi;
183
184 ppi = ppi_hash[hash_idx];
185 while (ppi) {
186 if (ppi->pid == pid)
187 return ppi;
188
189 ppi = ppi->hash_next;
190 }
191
192 return NULL;
193}
194
7997c5b0
JA
195static inline int trace_rb_insert(struct trace *t)
196{
197 struct rb_node **p = &rb_sort_root.rb_node;
198 struct rb_node *parent = NULL;
199 struct trace *__t;
200
201 while (*p) {
202 parent = *p;
203 __t = rb_entry(parent, struct trace, rb_node);
204
205 if (t->bit->sequence < __t->bit->sequence)
206 p = &(*p)->rb_left;
207 else if (t->bit->sequence > __t->bit->sequence)
208 p = &(*p)->rb_right;
209 else {
210 fprintf(stderr, "sequence alias!\n");
211 return 1;
212 }
213 }
214
215 rb_link_node(&t->rb_node, parent, p);
216 rb_insert_color(&t->rb_node, &rb_sort_root);
217 return 0;
218}
219
220static inline int track_rb_insert(struct io_track *iot)
221{
222 struct rb_node **p = &rb_track_root.rb_node;
223 struct rb_node *parent = NULL;
224 struct io_track *__iot;
225
226 while (*p) {
227 parent = *p;
228
229 __iot = rb_entry(parent, struct io_track, rb_node);
230
231 if (iot->sector < __iot->sector)
232 p = &(*p)->rb_left;
233 else if (iot->sector > __iot->sector)
234 p = &(*p)->rb_right;
235 else {
236 fprintf(stderr, "sequence alias!\n");
237 return 1;
238 }
239 }
240
241 rb_link_node(&iot->rb_node, parent, p);
242 rb_insert_color(&iot->rb_node, &rb_track_root);
243 return 0;
244}
245
246static struct io_track *__find_track(__u64 sector)
247{
248 struct rb_node **p = &rb_track_root.rb_node;
249 struct rb_node *parent = NULL;
250 struct io_track *__iot;
251
252 while (*p) {
253 parent = *p;
254
255 __iot = rb_entry(parent, struct io_track, rb_node);
256
257 if (sector < __iot->sector)
258 p = &(*p)->rb_left;
259 else if (sector > __iot->sector)
260 p = &(*p)->rb_right;
261 else
262 return __iot;
263 }
264
265 return NULL;
266}
267
268static struct io_track *find_track(__u64 sector)
269{
270 struct io_track *iot = __find_track(sector);
271
272 iot = __find_track(sector);
273 if (!iot) {
274 iot = malloc(sizeof(*iot));
275 iot->sector = sector;
276 track_rb_insert(iot);
277 }
278
279 return iot;
280}
281
2e3e8ded
JA
282static void log_track_merge(struct blk_io_trace *t)
283{
284 struct io_track *iot;
285
286 if (!track_ios)
287 return;
288 if ((t->action & BLK_TC_ACT(BLK_TC_FS)) == 0)
289 return;
290
291 iot = __find_track(t->sector - (t->bytes >> 10));
292 if (!iot) {
293 fprintf(stderr, "Trying to merge on non-existing request\n");
294 return;
295 }
296
297 rb_erase(&iot->rb_node, &rb_track_root);
298 iot->sector -= t->bytes >> 10;
299 track_rb_insert(iot);
300}
301
95c15013 302static void log_track_getrq(struct blk_io_trace *t)
2e3e8ded
JA
303{
304 struct io_track *iot;
305
306 if (!track_ios)
307 return;
308
95c15013
JA
309 iot = find_track(t->sector);
310 iot->allocation_time = t->time;
311}
312
313
314/*
315 * return time between rq allocation and queue
316 */
317static unsigned long long log_track_queue(struct blk_io_trace *t)
318{
319 struct io_track *iot;
320
321 if (!track_ios)
322 return -1;
323
2e3e8ded
JA
324 iot = find_track(t->sector);
325 iot->queue_time = t->time;
95c15013 326 return iot->queue_time - iot->allocation_time;
2e3e8ded
JA
327}
328
329/*
330 * return time between queue and issue
331 */
332static unsigned long long log_track_issue(struct blk_io_trace *t)
333{
334 struct io_track *iot;
335
336 if (!track_ios)
337 return -1;
338 if ((t->action & BLK_TC_ACT(BLK_TC_FS)) == 0)
339 return -1;
340
341 iot = __find_track(t->sector);
342 if (!iot) {
343 fprintf(stderr, "Trying to issue on non-existing request\n");
344 return -1;
345 }
346
347 iot->dispatch_time = t->time;
348 return iot->dispatch_time - iot->queue_time;
349}
350
351/*
352 * return time between dispatch and complete
353 */
354static unsigned long long log_track_complete(struct blk_io_trace *t)
355{
356 unsigned long long elapsed;
357 struct io_track *iot;
358
359 if (!track_ios)
360 return -1;
361 if ((t->action & BLK_TC_ACT(BLK_TC_FS)) == 0)
362 return -1;
363
364 iot = __find_track(t->sector);
365 if (!iot) {
366 fprintf(stderr, "Trying to dispatch on non-existing request\n");
367 return -1;
368 }
369
370 iot->completion_time = t->time;
371 elapsed = iot->completion_time - iot->dispatch_time;
372
373 /*
374 * kill the trace, we don't need it after completion
375 */
376 rb_erase(&iot->rb_node, &rb_track_root);
377 free(iot);
378
379 return elapsed;
380}
381
382
152f6476
JA
383static struct io_stats *find_process_io_stats(__u32 pid, char *name)
384{
385 struct per_process_info *ppi = find_process_by_pid(pid);
386
387 if (!ppi) {
388 ppi = malloc(sizeof(*ppi));
389 memset(ppi, 0, sizeof(*ppi));
390 strncpy(ppi->name, name, sizeof(ppi->name));
391 ppi->pid = pid;
392 add_process_to_hash(ppi);
393 add_process_to_list(ppi);
394 }
395
396 return &ppi->io_stats;
397}
398
a718bd37
NS
399static void resize_cpu_info(int cpuid)
400{
401 int new_space, new_max = cpuid + 1;
402 char *new_start;
403
404 per_cpu_info = realloc(per_cpu_info, new_max * sizeof(*per_cpu_info));
405 if (!per_cpu_info) {
406 fprintf(stderr, "Cannot allocate CPU info -- %d x %d bytes\n",
407 new_max, (int) sizeof(*per_cpu_info));
408 exit(1);
409 }
410
411 new_start = (char *)per_cpu_info + (max_cpus * sizeof(*per_cpu_info));
412 new_space = (new_max - max_cpus) * sizeof(*per_cpu_info);
413 memset(new_start, 0, new_space);
414 max_cpus = new_max;
415}
416
417static struct per_cpu_info *get_cpu_info(int cpu)
418{
c499bf38
JA
419 struct per_cpu_info *pci;
420
a718bd37
NS
421 if (cpu >= max_cpus)
422 resize_cpu_info(cpu);
423
c499bf38
JA
424 /*
425 * ->cpu might already be set, but just set it unconditionally
426 */
427 pci = &per_cpu_info[cpu];
428 pci->cpu = cpu;
429
430 return pci;
a718bd37
NS
431}
432
cfab07eb
AB
433static inline void check_time(struct blk_io_trace *bit)
434{
435 unsigned long long this = bit->time;
436 unsigned long long last = last_reported_time;
437
438 backwards = (this < last) ? 'B' : ' ';
439 last_reported_time = this;
440}
441
152f6476
JA
442static inline void __account_m(struct io_stats *ios, struct blk_io_trace *t,
443 int rw)
d0ca268b
JA
444{
445 if (rw) {
152f6476
JA
446 ios->mwrites++;
447 ios->qwrite_kb += t->bytes >> 10;
d0ca268b 448 } else {
152f6476
JA
449 ios->mreads++;
450 ios->qread_kb += t->bytes >> 10;
451 }
452}
453
454static inline void account_m(struct blk_io_trace *t, struct per_cpu_info *pci,
455 int rw)
456{
457 __account_m(&pci->io_stats, t, rw);
458
459 if (per_process_stats) {
460 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
461
462 __account_m(ios, t, rw);
d0ca268b
JA
463 }
464}
465
152f6476
JA
466static inline void __account_q(struct io_stats *ios, struct blk_io_trace *t,
467 int rw)
d0ca268b
JA
468{
469 if (rw) {
152f6476
JA
470 ios->qwrites++;
471 ios->qwrite_kb += t->bytes >> 10;
d0ca268b 472 } else {
152f6476
JA
473 ios->qreads++;
474 ios->qread_kb += t->bytes >> 10;
475 }
476}
477
478static inline void account_q(struct blk_io_trace *t, struct per_cpu_info *pci,
479 int rw)
480{
481 __account_q(&pci->io_stats, t, rw);
482
483 if (per_process_stats) {
484 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
485
486 __account_q(ios, t, rw);
d0ca268b
JA
487 }
488}
489
152f6476 490static inline void __account_c(struct io_stats *ios, int rw, unsigned int bytes)
d0ca268b
JA
491{
492 if (rw) {
152f6476
JA
493 ios->cwrites++;
494 ios->cwrite_kb += bytes >> 10;
d0ca268b 495 } else {
152f6476
JA
496 ios->creads++;
497 ios->cread_kb += bytes >> 10;
498 }
499}
500
501static inline void account_c(struct blk_io_trace *t, struct per_cpu_info *pci,
502 int rw, int bytes)
503{
504 __account_c(&pci->io_stats, rw, bytes);
505
506 if (per_process_stats) {
507 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
508
509 __account_c(ios, rw, bytes);
d0ca268b
JA
510 }
511}
512
152f6476 513static inline void __account_i(struct io_stats *ios, int rw, unsigned int bytes)
afd2d7ad 514{
515 if (rw) {
152f6476
JA
516 ios->iwrites++;
517 ios->iwrite_kb += bytes >> 10;
afd2d7ad 518 } else {
152f6476
JA
519 ios->ireads++;
520 ios->iread_kb += bytes >> 10;
afd2d7ad 521 }
522}
523
152f6476
JA
524static inline void account_i(struct blk_io_trace *t, struct per_cpu_info *pci,
525 int rw)
d0ca268b 526{
152f6476
JA
527 __account_i(&pci->io_stats, rw, t->bytes);
528
529 if (per_process_stats) {
530 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
d5396421 531
152f6476
JA
532 __account_i(ios, rw, t->bytes);
533 }
534}
535
536static void output(struct per_cpu_info *pci, char *s)
537{
538 fprintf(ofp, "%s", s);
d0ca268b
JA
539}
540
3aabcd89
JA
541static char hstring[256];
542static char tstring[256];
d0ca268b 543
d5396421
JA
544static inline char *setup_header(struct per_cpu_info *pci,
545 struct blk_io_trace *t, char act)
d0ca268b
JA
546{
547 int w = t->action & BLK_TC_ACT(BLK_TC_WRITE);
548 int b = t->action & BLK_TC_ACT(BLK_TC_BARRIER);
549 int s = t->action & BLK_TC_ACT(BLK_TC_SYNC);
550 char rwbs[4];
551 int i = 0;
552
553 if (w)
554 rwbs[i++] = 'W';
555 else
556 rwbs[i++] = 'R';
557 if (b)
558 rwbs[i++] = 'B';
559 if (s)
560 rwbs[i++] = 'S';
561
562 rwbs[i] = '\0';
563
555d3a31 564 sprintf(hstring, "%2d %8ld %5Lu.%09Lu %5u %c %3s",
d5396421 565 pci->cpu,
cfab07eb
AB
566 (unsigned long)t->sequence, SECONDS(t->time),
567 NANO_SECONDS(t->time), t->pid, act, rwbs);
d0ca268b
JA
568
569 return hstring;
570}
571
d5396421
JA
572static void log_complete(struct per_cpu_info *pci, struct blk_io_trace *t,
573 char act)
d0ca268b 574{
2e3e8ded
JA
575 unsigned long long elapsed = log_track_complete(t);
576
577 if (elapsed != -1ULL) {
578 double usec = (double) elapsed / 1000;
579
555d3a31 580 sprintf(tstring,"%s %Lu + %u (%8.2f) [%d]\n",
2e3e8ded
JA
581 setup_header(pci, t, act),
582 (unsigned long long)t->sector, t->bytes >> 9,
583 usec, t->error);
584 } else {
585 sprintf(tstring,"%s %Lu + %u [%d]\n", setup_header(pci, t, act),
586 (unsigned long long)t->sector, t->bytes >> 9, t->error);
587 }
588
d5396421 589 output(pci, tstring);
d0ca268b
JA
590}
591
d5396421
JA
592static void log_queue(struct per_cpu_info *pci, struct blk_io_trace *t,
593 char act)
d0ca268b 594{
95c15013 595 unsigned long long elapsed = log_track_queue(t);
2e3e8ded 596
95c15013
JA
597 if (elapsed != -1ULL) {
598 double usec = (double) elapsed / 1000;
599
600 sprintf(tstring,"%s %Lu + %u (%8.2f) [%s]\n",
601 setup_header(pci, t, act),
602 (unsigned long long)t->sector, t->bytes >> 9,
603 usec, t->comm);
604 } else {
605 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
606 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
607 }
d5396421 608 output(pci, tstring);
d0ca268b
JA
609}
610
d5396421
JA
611static void log_issue(struct per_cpu_info *pci, struct blk_io_trace *t,
612 char act)
d0ca268b 613{
2e3e8ded
JA
614 unsigned long long elapsed = log_track_issue(t);
615
616 if (elapsed != -1ULL) {
617 double usec = (double) elapsed / 1000;
618
555d3a31 619 sprintf(tstring,"%s %Lu + %u (%8.2f) [%s]\n",
2e3e8ded
JA
620 setup_header(pci, t, act),
621 (unsigned long long)t->sector, t->bytes >> 9,
622 usec, t->comm);
623 } else {
624 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
625 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
626 }
627
d5396421 628 output(pci, tstring);
d0ca268b
JA
629}
630
d5396421
JA
631static void log_merge(struct per_cpu_info *pci, struct blk_io_trace *t,
632 char act)
d0ca268b 633{
2e3e8ded
JA
634 log_track_merge(t);
635
984c63b7 636 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
2955af9d 637 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
d5396421 638 output(pci, tstring);
d0ca268b
JA
639}
640
d5396421
JA
641static void log_generic(struct per_cpu_info *pci, struct blk_io_trace *t,
642 char act)
d0ca268b 643{
2955af9d
NS
644 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
645 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
d5396421 646 output(pci, tstring);
d0ca268b
JA
647}
648
d5396421 649static int log_pc(struct per_cpu_info *pci, struct blk_io_trace *t, char act)
d0ca268b 650{
87b72777
JA
651 unsigned char *buf;
652 int i;
d0ca268b 653
d5396421
JA
654 sprintf(tstring,"%s ", setup_header(pci, t, act));
655 output(pci, tstring);
d0ca268b 656
87b72777 657 buf = (unsigned char *) t + sizeof(*t);
d0ca268b
JA
658 for (i = 0; i < t->pdu_len; i++) {
659 sprintf(tstring,"%02x ", buf[i]);
d5396421 660 output(pci, tstring);
d0ca268b
JA
661 }
662
663 if (act == 'C') {
2955af9d
NS
664 sprintf(tstring,"[%d]\n", t->error);
665 output(pci, tstring);
666 } else {
667 sprintf(tstring,"[%s]\n", t->comm);
d5396421 668 output(pci, tstring);
d0ca268b 669 }
87b72777 670 return 0;
d0ca268b
JA
671}
672
d5396421 673static int dump_trace_pc(struct blk_io_trace *t, struct per_cpu_info *pci)
d0ca268b 674{
87b72777
JA
675 int ret = 0;
676
d0ca268b
JA
677 switch (t->action & 0xffff) {
678 case __BLK_TA_QUEUE:
d5396421 679 log_generic(pci, t, 'Q');
d0ca268b
JA
680 break;
681 case __BLK_TA_GETRQ:
d5396421 682 log_generic(pci, t, 'G');
d0ca268b
JA
683 break;
684 case __BLK_TA_SLEEPRQ:
d5396421 685 log_generic(pci, t, 'S');
d0ca268b
JA
686 break;
687 case __BLK_TA_REQUEUE:
d5396421 688 log_generic(pci, t, 'R');
d0ca268b
JA
689 break;
690 case __BLK_TA_ISSUE:
d5396421 691 ret = log_pc(pci, t, 'D');
d0ca268b
JA
692 break;
693 case __BLK_TA_COMPLETE:
d5396421 694 log_pc(pci, t, 'C');
d0ca268b
JA
695 break;
696 default:
697 fprintf(stderr, "Bad pc action %x\n", t->action);
87b72777
JA
698 ret = 1;
699 break;
d0ca268b
JA
700 }
701
87b72777 702 return ret;
d0ca268b
JA
703}
704
d5396421 705static void dump_trace_fs(struct blk_io_trace *t, struct per_cpu_info *pci)
d0ca268b
JA
706{
707 int w = t->action & BLK_TC_ACT(BLK_TC_WRITE);
7997c5b0 708 int act = t->action & 0xffff;
d0ca268b 709
7997c5b0 710 switch (act) {
d0ca268b 711 case __BLK_TA_QUEUE:
152f6476 712 account_q(t, pci, w);
d5396421 713 log_queue(pci, t, 'Q');
d0ca268b
JA
714 break;
715 case __BLK_TA_BACKMERGE:
152f6476 716 account_m(t, pci, w);
d5396421 717 log_merge(pci, t, 'M');
d0ca268b
JA
718 break;
719 case __BLK_TA_FRONTMERGE:
152f6476 720 account_m(t, pci, w);
d5396421 721 log_merge(pci, t, 'F');
d0ca268b
JA
722 break;
723 case __BLK_TA_GETRQ:
95c15013 724 log_track_getrq(t);
d5396421 725 log_generic(pci, t, 'G');
d0ca268b
JA
726 break;
727 case __BLK_TA_SLEEPRQ:
d5396421 728 log_generic(pci, t, 'S');
d0ca268b
JA
729 break;
730 case __BLK_TA_REQUEUE:
152f6476 731 account_c(t, pci, w, -t->bytes);
d5396421 732 log_queue(pci, t, 'R');
d0ca268b
JA
733 break;
734 case __BLK_TA_ISSUE:
152f6476 735 account_i(t, pci, w);
d5396421 736 log_issue(pci, t, 'D');
d0ca268b
JA
737 break;
738 case __BLK_TA_COMPLETE:
152f6476 739 account_c(t, pci, w, t->bytes);
d5396421 740 log_complete(pci, t, 'C');
d0ca268b
JA
741 break;
742 default:
743 fprintf(stderr, "Bad fs action %x\n", t->action);
1f79c4a0 744 break;
d0ca268b 745 }
d0ca268b
JA
746}
747
d5396421 748static int dump_trace(struct blk_io_trace *t, struct per_cpu_info *pci)
d0ca268b 749{
87b72777
JA
750 int ret = 0;
751
d0ca268b 752 if (t->action & BLK_TC_ACT(BLK_TC_PC))
d5396421 753 ret = dump_trace_pc(t, pci);
d0ca268b 754 else
d5396421 755 dump_trace_fs(t, pci);
87b72777
JA
756
757 events++;
758 return ret;
d0ca268b
JA
759}
760
152f6476 761static void dump_io_stats(struct io_stats *ios, char *msg)
5c017e4b 762{
152f6476
JA
763 fprintf(ofp, "%s\n", msg);
764
765 fprintf(ofp, " Reads Queued: %'8lu, %'8LuKiB\t", ios->qreads, ios->qread_kb);
766 fprintf(ofp, " Writes Queued: %'8lu, %'8LuKiB\n", ios->qwrites,ios->qwrite_kb);
0a6b8fc4 767
152f6476
JA
768 fprintf(ofp, " Read Dispatches: %'8lu, %'8LuKiB\t", ios->ireads, ios->iread_kb);
769 fprintf(ofp, " Write Dispatches: %'8lu, %'8LuKiB\n", ios->iwrites,ios->iwrite_kb);
770 fprintf(ofp, " Reads Completed: %'8lu, %'8LuKiB\t", ios->creads, ios->cread_kb);
771 fprintf(ofp, " Writes Completed: %'8lu, %'8LuKiB\n", ios->cwrites,ios->cwrite_kb);
772 fprintf(ofp, " Read Merges: %'8lu%8c\t", ios->mreads, ' ');
0a6b8fc4 773
152f6476 774 fprintf(ofp, " Write Merges: %'8lu\n", ios->mwrites);
5c017e4b
JA
775}
776
152f6476
JA
777static void show_process_stats(void)
778{
779 struct per_process_info *ppi;
780
781 ppi = ppi_list;
782 while (ppi) {
783 dump_io_stats(&ppi->io_stats, ppi->name);
784 ppi = ppi->list_next;
785 }
786
787 fprintf(ofp, "\n");
788}
789
790static void show_cpu_stats(void)
d0ca268b 791{
d5396421 792 struct per_cpu_info foo, *pci;
152f6476 793 struct io_stats *ios;
412819ce 794 int i, pci_events = 0;
5c017e4b
JA
795
796 memset(&foo, 0, sizeof(foo));
d0ca268b 797
a718bd37 798 for (i = 0; i < max_cpus; i++) {
152f6476
JA
799 char cpu[8];
800
d5396421 801 pci = &per_cpu_info[i];
152f6476 802 ios = &pci->io_stats;
5c017e4b 803
d5396421 804 if (!pci->nelems)
afd2d7ad 805 continue;
806
152f6476
JA
807 foo.io_stats.qreads += ios->qreads;
808 foo.io_stats.qwrites += ios->qwrites;
809 foo.io_stats.creads += ios->creads;
810 foo.io_stats.cwrites += ios->cwrites;
811 foo.io_stats.mreads += ios->mreads;
812 foo.io_stats.mwrites += ios->mwrites;
11e51068
NS
813 foo.io_stats.ireads += ios->ireads;
814 foo.io_stats.iwrites += ios->iwrites;
152f6476
JA
815 foo.io_stats.qread_kb += ios->qread_kb;
816 foo.io_stats.qwrite_kb += ios->qwrite_kb;
817 foo.io_stats.cread_kb += ios->cread_kb;
818 foo.io_stats.cwrite_kb += ios->cwrite_kb;
11e51068
NS
819 foo.io_stats.iread_kb += ios->iread_kb;
820 foo.io_stats.iwrite_kb += ios->iwrite_kb;
152f6476
JA
821
822 snprintf(cpu, sizeof(cpu) - 1, "CPU%d:", i);
823 dump_io_stats(ios, cpu);
412819ce 824 pci_events++;
5c017e4b
JA
825 }
826
412819ce 827 if (pci_events > 1) {
152f6476
JA
828 fprintf(ofp, "\n");
829 dump_io_stats(&foo.io_stats, "Total:");
5c017e4b 830 }
d0ca268b 831
152f6476 832 fprintf(ofp, "\nEvents: %'Lu\n", events);
d0ca268b
JA
833}
834
2ff323b0
JA
835#define min(a, b) ((a) < (b) ? (a) : (b))
836
837static struct blk_io_trace *find_trace(void *p, unsigned long offset, int nr)
838{
839 unsigned long max_offset = min(offset,nr * sizeof(struct blk_io_trace));
840 unsigned long off;
841 struct blk_io_trace *bit;
842 __u32 magic;
843
844 for (off = 0; off < max_offset; off++) {
845 bit = p + off;
846
847 magic = be32_to_cpu(bit->magic);
848 if ((magic & 0xffffff00) == BLK_IO_TRACE_MAGIC)
849 return bit;
850 }
851
852 return NULL;
853}
854
412819ce 855static int sort_entries(void *traces, unsigned long offset, int nr)
8fc0abbc 856{
412819ce 857 struct per_cpu_info *pci;
8fc0abbc
JA
858 struct blk_io_trace *bit;
859 struct trace *t;
860 void *start = traces;
861 int nelems = 0;
862
6fe4709e 863 while (traces - start <= offset - sizeof(*bit)) {
412819ce
JA
864 if (!nr)
865 break;
866
2ff323b0
JA
867 bit = find_trace(traces, offset - (traces - start), nr);
868 if (!bit)
869 break;
6fe4709e 870
8fc0abbc
JA
871 t = malloc(sizeof(*t));
872 t->bit = bit;
873 memset(&t->rb_node, 0, sizeof(t->rb_node));
874
6fe4709e
JA
875 trace_to_cpu(bit);
876
66fa7233
JA
877 if (verify_trace(bit))
878 break;
879
a718bd37 880 pci = get_cpu_info(bit->cpu);
412819ce
JA
881 pci->nelems++;
882
8fc0abbc
JA
883 if (trace_rb_insert(t))
884 return -1;
885
886 traces += sizeof(*bit) + bit->pdu_len;
887 nelems++;
412819ce 888 nr--;
6fe4709e 889 }
8fc0abbc
JA
890
891 return nelems;
892}
893
412819ce
JA
894static void free_entries_rb(void)
895{
896 struct rb_node *n;
897
7997c5b0 898 while ((n = rb_first(&rb_sort_root)) != NULL) {
412819ce
JA
899 struct trace *t = rb_entry(n, struct trace, rb_node);
900
7997c5b0 901 rb_erase(&t->rb_node, &rb_sort_root);
412819ce
JA
902 free(t);
903 }
904}
905
d5396421 906static void show_entries_rb(void)
8fc0abbc 907{
8fc0abbc 908 struct blk_io_trace *bit;
3aabcd89 909 struct rb_node *n;
8fc0abbc
JA
910 struct trace *t;
911 int cpu;
912
7997c5b0 913 n = rb_first(&rb_sort_root);
3aabcd89
JA
914 if (!n)
915 return;
8fc0abbc 916
3aabcd89 917 do {
8fc0abbc
JA
918 t = rb_entry(n, struct trace, rb_node);
919 bit = t->bit;
920
d5396421 921 cpu = bit->cpu;
87b72777 922 if (cpu > max_cpus) {
8fc0abbc 923 fprintf(stderr, "CPU number too large (%d)\n", cpu);
87b72777 924 break;
8fc0abbc
JA
925 }
926
cfab07eb
AB
927 if (genesis_time == 0)
928 genesis_time = bit->time;
929 bit->time -= genesis_time;
8fc0abbc 930
cfab07eb 931 check_time(bit);
8fc0abbc 932
d5396421 933 if (dump_trace(bit, &per_cpu_info[cpu]))
87b72777
JA
934 break;
935
8fc0abbc
JA
936 } while ((n = rb_next(n)) != NULL);
937}
938
1f79c4a0
JA
939static int read_data(int fd, void *buffer, int bytes, int block)
940{
941 int ret, bytes_left, fl;
942 void *p;
943
944 fl = fcntl(fd, F_GETFL);
945
946 if (!block)
947 fcntl(fd, F_SETFL, fl | O_NONBLOCK);
948 else
949 fcntl(fd, F_SETFL, fl & ~O_NONBLOCK);
950
951 bytes_left = bytes;
952 p = buffer;
953 while (bytes_left > 0) {
954 ret = read(fd, p, bytes_left);
955 if (!ret)
956 return 1;
957 else if (ret < 0) {
958 if (errno != EAGAIN)
959 perror("read");
960 return -1;
961 } else {
962 p += ret;
963 bytes_left -= ret;
964 }
965 }
966
967 return 0;
968}
969
d5396421 970static int do_file(void)
d0ca268b 971{
a718bd37 972 int i, nfiles;
d0ca268b 973
a718bd37
NS
974 for (i = 0, nfiles = 0;; i++, nfiles++) {
975 struct per_cpu_info *pci;
87b72777
JA
976 struct stat st;
977 void *tb;
978
a718bd37 979 pci = get_cpu_info(i);
d0ca268b 980
d5396421
JA
981 snprintf(pci->fname, sizeof(pci->fname)-1,"%s_out.%d", dev, i);
982 if (stat(pci->fname, &st) < 0)
d0ca268b 983 break;
afd2d7ad 984 if (!st.st_size)
985 continue;
d0ca268b 986
d5396421 987 printf("Processing %s\n", pci->fname);
8fc0abbc 988
87b72777 989 tb = malloc(st.st_size);
8fc0abbc 990
d5396421
JA
991 pci->fd = open(pci->fname, O_RDONLY);
992 if (pci->fd < 0) {
993 perror(pci->fname);
3aabcd89 994 break;
d0ca268b 995 }
afd2d7ad 996
1f79c4a0 997 if (read_data(pci->fd, tb, st.st_size, 1))
3aabcd89 998 break;
d0ca268b 999
a718bd37 1000 if (sort_entries(tb, st.st_size, ~0U) == -1)
3aabcd89 1001 break;
d0ca268b 1002
d5396421 1003 close(pci->fd);
d5396421
JA
1004 printf("\t%2d %10s %15d\n", i, pci->fname, pci->nelems);
1005
1006 }
1007
1008 if (!nfiles) {
1009 fprintf(stderr, "No files found\n");
1010 return 1;
1011 }
1012
1013 show_entries_rb();
d5396421
JA
1014 return 0;
1015}
1016
2ff323b0 1017static void resize_buffer(void **buffer, long *size, long offset)
d5396421 1018{
2ff323b0 1019 long old_size = *size;
d5396421 1020
2ff323b0
JA
1021 *size *= 2;
1022 *buffer = realloc(*buffer, *size);
1023 memset(*buffer + offset, 0, *size - old_size);
412819ce 1024}
d5396421 1025
0a94916b 1026static int read_sort_events(int fd, void **buffer, long *max_offset)
412819ce 1027{
0a94916b 1028 long offset;
412819ce 1029 int events;
d5396421 1030
0a94916b
JA
1031 if (*max_offset == 0) {
1032 *max_offset = 128 * sizeof(struct blk_io_trace);
1033 *buffer = malloc(*max_offset);
1034 }
1035
412819ce
JA
1036 events = 0;
1037 offset = 0;
d5396421 1038
412819ce
JA
1039 do {
1040 struct blk_io_trace *t;
1041 int pdu_len;
d5396421 1042
0a94916b
JA
1043 if (*max_offset - offset < sizeof(*t))
1044 resize_buffer(buffer, max_offset, offset);
d5396421 1045
412819ce
JA
1046 if (read_data(fd, *buffer + offset, sizeof(*t), !events)) {
1047 if (events)
d5396421 1048 break;
412819ce
JA
1049
1050 usleep(1000);
1051 continue;
d5396421
JA
1052 }
1053
412819ce
JA
1054 t = *buffer + offset;
1055 offset += sizeof(*t);
d5396421 1056
412819ce 1057 pdu_len = be16_to_cpu(t->pdu_len);
2ff323b0 1058 if (pdu_len) {
0a94916b
JA
1059 if (*max_offset - offset < pdu_len)
1060 resize_buffer(buffer, max_offset, offset);
d5396421 1061
2ff323b0
JA
1062 if (read_data(fd, *buffer + offset, pdu_len, 1))
1063 break;
d5396421 1064
2ff323b0
JA
1065 offset += pdu_len;
1066 }
d5396421 1067
412819ce 1068 events++;
79f19470 1069 } while (!is_done() && events < rb_batch);
d5396421 1070
412819ce
JA
1071 return events;
1072}
d5396421 1073
412819ce
JA
1074static int do_stdin(void)
1075{
1076 int fd;
1077 void *ptr;
0a94916b 1078 long max_offset;
d5396421 1079
1f79c4a0 1080 fd = dup(STDIN_FILENO);
0a94916b 1081 max_offset = 0;
412819ce
JA
1082 do {
1083 int events;
d5396421 1084
0a94916b 1085 events = read_sort_events(fd, &ptr, &max_offset);
412819ce
JA
1086 if (!events)
1087 break;
1088
2ff323b0
JA
1089 if (sort_entries(ptr, ~0UL, events) == -1)
1090 break;
1091
412819ce
JA
1092 show_entries_rb();
1093 free_entries_rb();
d5396421
JA
1094 } while (1);
1095
0a94916b 1096 free(ptr);
d5396421 1097 close(fd);
d5396421
JA
1098 return 0;
1099}
d0ca268b 1100
1f79c4a0 1101static void flush_output(void)
412819ce 1102{
152f6476 1103 fflush(ofp);
412819ce
JA
1104}
1105
1f79c4a0 1106static void handle_sigint(int sig)
412819ce
JA
1107{
1108 done = 1;
1109 flush_output();
1110}
1111
1f79c4a0
JA
1112static void usage(char *prog)
1113{
152f6476 1114 fprintf(stderr, "Usage: %s -i <name> [-o <output>][-s]\n", prog);
1f79c4a0
JA
1115}
1116
d5396421
JA
1117int main(int argc, char *argv[])
1118{
152f6476 1119 char *ofp_buffer;
a66877e6 1120 int c, ret, mode;
d5396421
JA
1121
1122 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) != -1) {
1123 switch (c) {
1124 case 'i':
66efebf8 1125 dev = optarg;
d5396421
JA
1126 break;
1127 case 'o':
66efebf8 1128 output_name = optarg;
d5396421 1129 break;
79f19470
JA
1130 case 'b':
1131 rb_batch = atoi(optarg);
1132 if (rb_batch <= 0)
1133 rb_batch = RB_BATCH_DEFAULT;
1134 break;
152f6476
JA
1135 case 's':
1136 per_process_stats = 1;
1137 break;
7997c5b0
JA
1138 case 't':
1139 track_ios = 1;
1140 break;
d5396421 1141 default:
1f79c4a0 1142 usage(argv[0]);
d5396421
JA
1143 return 1;
1144 }
d0ca268b
JA
1145 }
1146
d5396421 1147 if (!dev) {
1f79c4a0 1148 usage(argv[0]);
d5396421
JA
1149 return 1;
1150 }
1151
7997c5b0
JA
1152 memset(&rb_sort_root, 0, sizeof(rb_sort_root));
1153 memset(&rb_track_root, 0, sizeof(rb_track_root));
412819ce
JA
1154
1155 signal(SIGINT, handle_sigint);
1156 signal(SIGHUP, handle_sigint);
1157 signal(SIGTERM, handle_sigint);
d5396421 1158
d69db225
JA
1159 setlocale(LC_NUMERIC, "en_US");
1160
a66877e6 1161 if (!output_name) {
152f6476 1162 ofp = fdopen(STDOUT_FILENO, "w");
a66877e6
JA
1163 mode = _IOLBF;
1164 } else {
152f6476
JA
1165 char ofname[128];
1166
1167 snprintf(ofname, sizeof(ofname) - 1, "%s.log", output_name);
1168 ofp = fopen(ofname, "w");
a66877e6 1169 mode = _IOFBF;
152f6476
JA
1170 }
1171
1172 if (!ofp) {
1173 perror("fopen");
1174 return 1;
1175 }
1176
1177 ofp_buffer = malloc(4096);
a66877e6 1178 if (setvbuf(ofp, ofp_buffer, mode, 4096)) {
152f6476
JA
1179 perror("setvbuf");
1180 return 1;
1181 }
1182
d5396421
JA
1183 if (!strcmp(dev, "-"))
1184 ret = do_stdin();
1185 else
1186 ret = do_file();
1187
152f6476
JA
1188 if (per_process_stats)
1189 show_process_stats();
1190
1191 show_cpu_stats();
1192
412819ce 1193 flush_output();
d5396421 1194 return ret;
d0ca268b 1195}