[PATCH] blkparse: add framework for tracking individual ios
[blktrace.git] / blkparse.c
CommitLineData
d956a2cd
JA
1/*
2 * block queue tracing parse application
3 *
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
20 */
d0ca268b
JA
21#include <sys/types.h>
22#include <sys/stat.h>
23#include <unistd.h>
24#include <stdio.h>
25#include <fcntl.h>
26#include <stdlib.h>
8fc0abbc 27#include <string.h>
d5396421 28#include <getopt.h>
412819ce
JA
29#include <errno.h>
30#include <signal.h>
d69db225 31#include <locale.h>
d0ca268b 32
8fc0abbc
JA
33#include "blktrace.h"
34#include "rbtree.h"
d0ca268b 35
cfab07eb
AB
36#define SECONDS(x) ((unsigned long long)(x) / 1000000000)
37#define NANO_SECONDS(x) ((unsigned long long)(x) % 1000000000)
38
d5396421
JA
39static int backwards;
40static unsigned long long genesis_time, last_reported_time;
41
152f6476
JA
42struct io_stats {
43 unsigned long qreads, qwrites, creads, cwrites, mreads, mwrites;
44 unsigned long ireads, iwrites;
45 unsigned long long qread_kb, qwrite_kb, cread_kb, cwrite_kb;
46 unsigned long long iread_kb, iwrite_kb;
47};
48
d5396421 49struct per_cpu_info {
d0ca268b
JA
50 int cpu;
51 int nelems;
d0ca268b
JA
52
53 int fd;
87b72777 54 char fname[128];
d0ca268b 55
152f6476
JA
56 struct io_stats io_stats;
57};
8fc0abbc 58
152f6476
JA
59struct per_process_info {
60 char name[16];
61 __u32 pid;
62 struct io_stats io_stats;
63 struct per_process_info *hash_next, *list_next;
d0ca268b
JA
64};
65
152f6476
JA
66#define PPI_HASH_SHIFT (8)
67static struct per_process_info *ppi_hash[1 << PPI_HASH_SHIFT];
68static struct per_process_info *ppi_list;
69
7997c5b0 70#define S_OPTS "i:o:b:st"
d5396421
JA
71static struct option l_opts[] = {
72 {
73 .name = "input",
74 .has_arg = 1,
75 .flag = NULL,
76 .val = 'i'
77 },
78 {
79 .name = "output",
80 .has_arg = 1,
81 .flag = NULL,
82 .val = 'o'
83 },
79f19470
JA
84 {
85 .name = "batch",
86 .has_arg = 1,
87 .flag = NULL,
88 .val = 'b'
89 },
152f6476
JA
90 {
91 .name = "per program stats",
92 .has_arg = 0,
93 .flag = NULL,
94 .val = 's'
95 },
7997c5b0
JA
96 {
97 .name = "track ios",
98 .has_arg = 0,
99 .flag = NULL,
100 .val = 't'
101 },
d5396421
JA
102 {
103 .name = NULL,
104 .has_arg = 0,
105 .flag = NULL,
106 .val = 0
107 }
108};
109
7997c5b0
JA
110static struct rb_root rb_sort_root;
111static struct rb_root rb_track_root;
8fc0abbc 112
7997c5b0
JA
113/*
114 * for sorting the displayed output
115 */
8fc0abbc
JA
116struct trace {
117 struct blk_io_trace *bit;
118 struct rb_node rb_node;
119};
120
7997c5b0
JA
121/*
122 * for tracking individual ios
123 */
124struct io_track {
125 struct rb_node rb_node;
126
127 __u64 sector;
128 __u32 pid;
129 unsigned long long queue_time;
130 unsigned long long dispatch_time;
131 unsigned long long completion_time;
132};
133
a718bd37
NS
134static int max_cpus;
135static struct per_cpu_info *per_cpu_info;
d0ca268b 136
87b72777
JA
137static unsigned long long events;
138
d5396421 139static char *dev, *output_name;
152f6476
JA
140static FILE *ofp;
141
142static int per_process_stats;
7997c5b0 143static int track_ios;
d0ca268b 144
79f19470
JA
145#define RB_BATCH_DEFAULT (1024)
146static int rb_batch = RB_BATCH_DEFAULT;
147
412819ce
JA
148#define is_done() (*(volatile int *)(&done))
149static volatile int done;
150
152f6476
JA
151static inline unsigned long hash_long(unsigned long val)
152{
16ef714e
JA
153#if __WORDSIZE == 32
154 val *= 0x9e370001UL;
155#elif __WORDSIZE == 64
156 val *= 0x9e37fffffffc0001UL;
157#else
158#error unknown word size
159#endif
160
161 return val >> (__WORDSIZE - PPI_HASH_SHIFT);
152f6476
JA
162}
163
164static inline void add_process_to_hash(struct per_process_info *ppi)
165{
166 const int hash_idx = hash_long(ppi->pid);
167
168 ppi->hash_next = ppi_hash[hash_idx];
169 ppi_hash[hash_idx] = ppi;
170}
171
172static inline void add_process_to_list(struct per_process_info *ppi)
173{
174 ppi->list_next = ppi_list;
175 ppi_list = ppi;
176}
177
178static struct per_process_info *find_process_by_pid(__u32 pid)
179{
180 const int hash_idx = hash_long(pid);
181 struct per_process_info *ppi;
182
183 ppi = ppi_hash[hash_idx];
184 while (ppi) {
185 if (ppi->pid == pid)
186 return ppi;
187
188 ppi = ppi->hash_next;
189 }
190
191 return NULL;
192}
193
7997c5b0
JA
194static inline int trace_rb_insert(struct trace *t)
195{
196 struct rb_node **p = &rb_sort_root.rb_node;
197 struct rb_node *parent = NULL;
198 struct trace *__t;
199
200 while (*p) {
201 parent = *p;
202 __t = rb_entry(parent, struct trace, rb_node);
203
204 if (t->bit->sequence < __t->bit->sequence)
205 p = &(*p)->rb_left;
206 else if (t->bit->sequence > __t->bit->sequence)
207 p = &(*p)->rb_right;
208 else {
209 fprintf(stderr, "sequence alias!\n");
210 return 1;
211 }
212 }
213
214 rb_link_node(&t->rb_node, parent, p);
215 rb_insert_color(&t->rb_node, &rb_sort_root);
216 return 0;
217}
218
219static inline int track_rb_insert(struct io_track *iot)
220{
221 struct rb_node **p = &rb_track_root.rb_node;
222 struct rb_node *parent = NULL;
223 struct io_track *__iot;
224
225 while (*p) {
226 parent = *p;
227
228 __iot = rb_entry(parent, struct io_track, rb_node);
229
230 if (iot->sector < __iot->sector)
231 p = &(*p)->rb_left;
232 else if (iot->sector > __iot->sector)
233 p = &(*p)->rb_right;
234 else {
235 fprintf(stderr, "sequence alias!\n");
236 return 1;
237 }
238 }
239
240 rb_link_node(&iot->rb_node, parent, p);
241 rb_insert_color(&iot->rb_node, &rb_track_root);
242 return 0;
243}
244
245static struct io_track *__find_track(__u64 sector)
246{
247 struct rb_node **p = &rb_track_root.rb_node;
248 struct rb_node *parent = NULL;
249 struct io_track *__iot;
250
251 while (*p) {
252 parent = *p;
253
254 __iot = rb_entry(parent, struct io_track, rb_node);
255
256 if (sector < __iot->sector)
257 p = &(*p)->rb_left;
258 else if (sector > __iot->sector)
259 p = &(*p)->rb_right;
260 else
261 return __iot;
262 }
263
264 return NULL;
265}
266
267static struct io_track *find_track(__u64 sector)
268{
269 struct io_track *iot = __find_track(sector);
270
271 iot = __find_track(sector);
272 if (!iot) {
273 iot = malloc(sizeof(*iot));
274 iot->sector = sector;
275 track_rb_insert(iot);
276 }
277
278 return iot;
279}
280
152f6476
JA
281static struct io_stats *find_process_io_stats(__u32 pid, char *name)
282{
283 struct per_process_info *ppi = find_process_by_pid(pid);
284
285 if (!ppi) {
286 ppi = malloc(sizeof(*ppi));
287 memset(ppi, 0, sizeof(*ppi));
288 strncpy(ppi->name, name, sizeof(ppi->name));
289 ppi->pid = pid;
290 add_process_to_hash(ppi);
291 add_process_to_list(ppi);
292 }
293
294 return &ppi->io_stats;
295}
296
a718bd37
NS
297static void resize_cpu_info(int cpuid)
298{
299 int new_space, new_max = cpuid + 1;
300 char *new_start;
301
302 per_cpu_info = realloc(per_cpu_info, new_max * sizeof(*per_cpu_info));
303 if (!per_cpu_info) {
304 fprintf(stderr, "Cannot allocate CPU info -- %d x %d bytes\n",
305 new_max, (int) sizeof(*per_cpu_info));
306 exit(1);
307 }
308
309 new_start = (char *)per_cpu_info + (max_cpus * sizeof(*per_cpu_info));
310 new_space = (new_max - max_cpus) * sizeof(*per_cpu_info);
311 memset(new_start, 0, new_space);
312 max_cpus = new_max;
313}
314
315static struct per_cpu_info *get_cpu_info(int cpu)
316{
c499bf38
JA
317 struct per_cpu_info *pci;
318
a718bd37
NS
319 if (cpu >= max_cpus)
320 resize_cpu_info(cpu);
321
c499bf38
JA
322 /*
323 * ->cpu might already be set, but just set it unconditionally
324 */
325 pci = &per_cpu_info[cpu];
326 pci->cpu = cpu;
327
328 return pci;
a718bd37
NS
329}
330
cfab07eb
AB
331static inline void check_time(struct blk_io_trace *bit)
332{
333 unsigned long long this = bit->time;
334 unsigned long long last = last_reported_time;
335
336 backwards = (this < last) ? 'B' : ' ';
337 last_reported_time = this;
338}
339
152f6476
JA
340static inline void __account_m(struct io_stats *ios, struct blk_io_trace *t,
341 int rw)
d0ca268b
JA
342{
343 if (rw) {
152f6476
JA
344 ios->mwrites++;
345 ios->qwrite_kb += t->bytes >> 10;
d0ca268b 346 } else {
152f6476
JA
347 ios->mreads++;
348 ios->qread_kb += t->bytes >> 10;
349 }
350}
351
352static inline void account_m(struct blk_io_trace *t, struct per_cpu_info *pci,
353 int rw)
354{
355 __account_m(&pci->io_stats, t, rw);
356
357 if (per_process_stats) {
358 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
359
360 __account_m(ios, t, rw);
d0ca268b
JA
361 }
362}
363
152f6476
JA
364static inline void __account_q(struct io_stats *ios, struct blk_io_trace *t,
365 int rw)
d0ca268b
JA
366{
367 if (rw) {
152f6476
JA
368 ios->qwrites++;
369 ios->qwrite_kb += t->bytes >> 10;
d0ca268b 370 } else {
152f6476
JA
371 ios->qreads++;
372 ios->qread_kb += t->bytes >> 10;
373 }
374}
375
376static inline void account_q(struct blk_io_trace *t, struct per_cpu_info *pci,
377 int rw)
378{
379 __account_q(&pci->io_stats, t, rw);
380
381 if (per_process_stats) {
382 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
383
384 __account_q(ios, t, rw);
d0ca268b
JA
385 }
386}
387
152f6476 388static inline void __account_c(struct io_stats *ios, int rw, unsigned int bytes)
d0ca268b
JA
389{
390 if (rw) {
152f6476
JA
391 ios->cwrites++;
392 ios->cwrite_kb += bytes >> 10;
d0ca268b 393 } else {
152f6476
JA
394 ios->creads++;
395 ios->cread_kb += bytes >> 10;
396 }
397}
398
399static inline void account_c(struct blk_io_trace *t, struct per_cpu_info *pci,
400 int rw, int bytes)
401{
402 __account_c(&pci->io_stats, rw, bytes);
403
404 if (per_process_stats) {
405 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
406
407 __account_c(ios, rw, bytes);
d0ca268b
JA
408 }
409}
410
152f6476 411static inline void __account_i(struct io_stats *ios, int rw, unsigned int bytes)
afd2d7ad 412{
413 if (rw) {
152f6476
JA
414 ios->iwrites++;
415 ios->iwrite_kb += bytes >> 10;
afd2d7ad 416 } else {
152f6476
JA
417 ios->ireads++;
418 ios->iread_kb += bytes >> 10;
afd2d7ad 419 }
420}
421
152f6476
JA
422static inline void account_i(struct blk_io_trace *t, struct per_cpu_info *pci,
423 int rw)
d0ca268b 424{
152f6476
JA
425 __account_i(&pci->io_stats, rw, t->bytes);
426
427 if (per_process_stats) {
428 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
d5396421 429
152f6476
JA
430 __account_i(ios, rw, t->bytes);
431 }
432}
433
434static void output(struct per_cpu_info *pci, char *s)
435{
436 fprintf(ofp, "%s", s);
d0ca268b
JA
437}
438
3aabcd89
JA
439static char hstring[256];
440static char tstring[256];
d0ca268b 441
d5396421
JA
442static inline char *setup_header(struct per_cpu_info *pci,
443 struct blk_io_trace *t, char act)
d0ca268b
JA
444{
445 int w = t->action & BLK_TC_ACT(BLK_TC_WRITE);
446 int b = t->action & BLK_TC_ACT(BLK_TC_BARRIER);
447 int s = t->action & BLK_TC_ACT(BLK_TC_SYNC);
448 char rwbs[4];
449 int i = 0;
450
451 if (w)
452 rwbs[i++] = 'W';
453 else
454 rwbs[i++] = 'R';
455 if (b)
456 rwbs[i++] = 'B';
457 if (s)
458 rwbs[i++] = 'S';
459
460 rwbs[i] = '\0';
461
cfab07eb 462 sprintf(hstring, "%c %3d %15ld %5Lu.%09Lu %5u %c %3s", backwards,
d5396421 463 pci->cpu,
cfab07eb
AB
464 (unsigned long)t->sequence, SECONDS(t->time),
465 NANO_SECONDS(t->time), t->pid, act, rwbs);
d0ca268b
JA
466
467 return hstring;
468}
469
d5396421
JA
470static void log_complete(struct per_cpu_info *pci, struct blk_io_trace *t,
471 char act)
d0ca268b 472{
d5396421 473 sprintf(tstring,"%s %Lu + %u [%d]\n", setup_header(pci, t, act),
d0ca268b 474 (unsigned long long)t->sector, t->bytes >> 9, t->error);
d5396421 475 output(pci, tstring);
d0ca268b
JA
476}
477
d5396421
JA
478static void log_queue(struct per_cpu_info *pci, struct blk_io_trace *t,
479 char act)
d0ca268b 480{
2955af9d
NS
481 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
482 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
d5396421 483 output(pci, tstring);
d0ca268b
JA
484}
485
d5396421
JA
486static void log_issue(struct per_cpu_info *pci, struct blk_io_trace *t,
487 char act)
d0ca268b 488{
2955af9d
NS
489 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
490 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
d5396421 491 output(pci, tstring);
d0ca268b
JA
492}
493
d5396421
JA
494static void log_merge(struct per_cpu_info *pci, struct blk_io_trace *t,
495 char act)
d0ca268b 496{
984c63b7 497 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
2955af9d 498 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
d5396421 499 output(pci, tstring);
d0ca268b
JA
500}
501
d5396421
JA
502static void log_generic(struct per_cpu_info *pci, struct blk_io_trace *t,
503 char act)
d0ca268b 504{
2955af9d
NS
505 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
506 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
d5396421 507 output(pci, tstring);
d0ca268b
JA
508}
509
d5396421 510static int log_pc(struct per_cpu_info *pci, struct blk_io_trace *t, char act)
d0ca268b 511{
87b72777
JA
512 unsigned char *buf;
513 int i;
d0ca268b 514
d5396421
JA
515 sprintf(tstring,"%s ", setup_header(pci, t, act));
516 output(pci, tstring);
d0ca268b 517
87b72777 518 buf = (unsigned char *) t + sizeof(*t);
d0ca268b
JA
519 for (i = 0; i < t->pdu_len; i++) {
520 sprintf(tstring,"%02x ", buf[i]);
d5396421 521 output(pci, tstring);
d0ca268b
JA
522 }
523
524 if (act == 'C') {
2955af9d
NS
525 sprintf(tstring,"[%d]\n", t->error);
526 output(pci, tstring);
527 } else {
528 sprintf(tstring,"[%s]\n", t->comm);
d5396421 529 output(pci, tstring);
d0ca268b 530 }
87b72777 531 return 0;
d0ca268b
JA
532}
533
d5396421 534static int dump_trace_pc(struct blk_io_trace *t, struct per_cpu_info *pci)
d0ca268b 535{
87b72777
JA
536 int ret = 0;
537
d0ca268b
JA
538 switch (t->action & 0xffff) {
539 case __BLK_TA_QUEUE:
d5396421 540 log_generic(pci, t, 'Q');
d0ca268b
JA
541 break;
542 case __BLK_TA_GETRQ:
d5396421 543 log_generic(pci, t, 'G');
d0ca268b
JA
544 break;
545 case __BLK_TA_SLEEPRQ:
d5396421 546 log_generic(pci, t, 'S');
d0ca268b
JA
547 break;
548 case __BLK_TA_REQUEUE:
d5396421 549 log_generic(pci, t, 'R');
d0ca268b
JA
550 break;
551 case __BLK_TA_ISSUE:
d5396421 552 ret = log_pc(pci, t, 'D');
d0ca268b
JA
553 break;
554 case __BLK_TA_COMPLETE:
d5396421 555 log_pc(pci, t, 'C');
d0ca268b
JA
556 break;
557 default:
558 fprintf(stderr, "Bad pc action %x\n", t->action);
87b72777
JA
559 ret = 1;
560 break;
d0ca268b
JA
561 }
562
87b72777 563 return ret;
d0ca268b
JA
564}
565
7997c5b0
JA
566static void log_track_merge(struct blk_io_trace *t)
567{
568 struct io_track *iot;
569
570 if (!track_ios)
571 return;
572
573 iot = __find_track(t->sector - (t->bytes >> 10));
574 if (!iot) {
575 fprintf(stderr, "Trying to merge on non-existing request\n");
576 return;
577 }
578
579 rb_erase(&iot->rb_node, &rb_track_root);
580 iot->sector -= t->bytes >> 10;
581 track_rb_insert(iot);
582}
583
584static void log_track_queue(struct blk_io_trace *t)
585{
586 struct io_track *iot;
587
588 if (!track_ios)
589 return;
590
591 iot = find_track(t->sector);
592 iot->queue_time = t->time;
593}
594
595static void log_track_issue(struct blk_io_trace *t)
596{
597 struct io_track *iot;
598
599 if (!track_ios)
600 return;
601
602 iot = __find_track(t->sector);
603 if (!iot) {
604 fprintf(stderr, "Trying to issue on non-existing request\n");
605 return;
606 }
607
608 iot->dispatch_time = t->time;
609}
610
611static void log_track_complete(struct blk_io_trace *t)
612{
613 struct io_track *iot;
614
615 if (!track_ios)
616 return;
617
618 iot = __find_track(t->sector);
619 if (!iot) {
620 fprintf(stderr, "Trying to dispatch on non-existing request\n");
621 return;
622 }
623
624 iot->completion_time = t->time;
625}
626
d5396421 627static void dump_trace_fs(struct blk_io_trace *t, struct per_cpu_info *pci)
d0ca268b
JA
628{
629 int w = t->action & BLK_TC_ACT(BLK_TC_WRITE);
7997c5b0 630 int act = t->action & 0xffff;
d0ca268b 631
7997c5b0 632 switch (act) {
d0ca268b 633 case __BLK_TA_QUEUE:
152f6476 634 account_q(t, pci, w);
d5396421 635 log_queue(pci, t, 'Q');
7997c5b0 636 log_track_queue(t);
d0ca268b
JA
637 break;
638 case __BLK_TA_BACKMERGE:
152f6476 639 account_m(t, pci, w);
d5396421 640 log_merge(pci, t, 'M');
d0ca268b
JA
641 break;
642 case __BLK_TA_FRONTMERGE:
152f6476 643 account_m(t, pci, w);
d5396421 644 log_merge(pci, t, 'F');
7997c5b0 645 log_track_merge(t);
d0ca268b
JA
646 break;
647 case __BLK_TA_GETRQ:
d5396421 648 log_generic(pci, t, 'G');
d0ca268b
JA
649 break;
650 case __BLK_TA_SLEEPRQ:
d5396421 651 log_generic(pci, t, 'S');
d0ca268b
JA
652 break;
653 case __BLK_TA_REQUEUE:
152f6476 654 account_c(t, pci, w, -t->bytes);
d5396421 655 log_queue(pci, t, 'R');
7997c5b0 656 log_track_queue(t);
d0ca268b
JA
657 break;
658 case __BLK_TA_ISSUE:
152f6476 659 account_i(t, pci, w);
d5396421 660 log_issue(pci, t, 'D');
7997c5b0 661 log_track_issue(t);
d0ca268b
JA
662 break;
663 case __BLK_TA_COMPLETE:
152f6476 664 account_c(t, pci, w, t->bytes);
d5396421 665 log_complete(pci, t, 'C');
7997c5b0 666 log_track_complete(t);
d0ca268b
JA
667 break;
668 default:
669 fprintf(stderr, "Bad fs action %x\n", t->action);
1f79c4a0 670 break;
d0ca268b 671 }
d0ca268b
JA
672}
673
d5396421 674static int dump_trace(struct blk_io_trace *t, struct per_cpu_info *pci)
d0ca268b 675{
87b72777
JA
676 int ret = 0;
677
d0ca268b 678 if (t->action & BLK_TC_ACT(BLK_TC_PC))
d5396421 679 ret = dump_trace_pc(t, pci);
d0ca268b 680 else
d5396421 681 dump_trace_fs(t, pci);
87b72777
JA
682
683 events++;
684 return ret;
d0ca268b
JA
685}
686
152f6476 687static void dump_io_stats(struct io_stats *ios, char *msg)
5c017e4b 688{
152f6476
JA
689 fprintf(ofp, "%s\n", msg);
690
691 fprintf(ofp, " Reads Queued: %'8lu, %'8LuKiB\t", ios->qreads, ios->qread_kb);
692 fprintf(ofp, " Writes Queued: %'8lu, %'8LuKiB\n", ios->qwrites,ios->qwrite_kb);
0a6b8fc4 693
152f6476
JA
694 fprintf(ofp, " Read Dispatches: %'8lu, %'8LuKiB\t", ios->ireads, ios->iread_kb);
695 fprintf(ofp, " Write Dispatches: %'8lu, %'8LuKiB\n", ios->iwrites,ios->iwrite_kb);
696 fprintf(ofp, " Reads Completed: %'8lu, %'8LuKiB\t", ios->creads, ios->cread_kb);
697 fprintf(ofp, " Writes Completed: %'8lu, %'8LuKiB\n", ios->cwrites,ios->cwrite_kb);
698 fprintf(ofp, " Read Merges: %'8lu%8c\t", ios->mreads, ' ');
0a6b8fc4 699
152f6476 700 fprintf(ofp, " Write Merges: %'8lu\n", ios->mwrites);
5c017e4b
JA
701}
702
152f6476
JA
703static void show_process_stats(void)
704{
705 struct per_process_info *ppi;
706
707 ppi = ppi_list;
708 while (ppi) {
709 dump_io_stats(&ppi->io_stats, ppi->name);
710 ppi = ppi->list_next;
711 }
712
713 fprintf(ofp, "\n");
714}
715
716static void show_cpu_stats(void)
d0ca268b 717{
d5396421 718 struct per_cpu_info foo, *pci;
152f6476 719 struct io_stats *ios;
412819ce 720 int i, pci_events = 0;
5c017e4b
JA
721
722 memset(&foo, 0, sizeof(foo));
d0ca268b 723
a718bd37 724 for (i = 0; i < max_cpus; i++) {
152f6476
JA
725 char cpu[8];
726
d5396421 727 pci = &per_cpu_info[i];
152f6476 728 ios = &pci->io_stats;
5c017e4b 729
d5396421 730 if (!pci->nelems)
afd2d7ad 731 continue;
732
152f6476
JA
733 foo.io_stats.qreads += ios->qreads;
734 foo.io_stats.qwrites += ios->qwrites;
735 foo.io_stats.creads += ios->creads;
736 foo.io_stats.cwrites += ios->cwrites;
737 foo.io_stats.mreads += ios->mreads;
738 foo.io_stats.mwrites += ios->mwrites;
11e51068
NS
739 foo.io_stats.ireads += ios->ireads;
740 foo.io_stats.iwrites += ios->iwrites;
152f6476
JA
741 foo.io_stats.qread_kb += ios->qread_kb;
742 foo.io_stats.qwrite_kb += ios->qwrite_kb;
743 foo.io_stats.cread_kb += ios->cread_kb;
744 foo.io_stats.cwrite_kb += ios->cwrite_kb;
11e51068
NS
745 foo.io_stats.iread_kb += ios->iread_kb;
746 foo.io_stats.iwrite_kb += ios->iwrite_kb;
152f6476
JA
747
748 snprintf(cpu, sizeof(cpu) - 1, "CPU%d:", i);
749 dump_io_stats(ios, cpu);
412819ce 750 pci_events++;
5c017e4b
JA
751 }
752
412819ce 753 if (pci_events > 1) {
152f6476
JA
754 fprintf(ofp, "\n");
755 dump_io_stats(&foo.io_stats, "Total:");
5c017e4b 756 }
d0ca268b 757
152f6476 758 fprintf(ofp, "\nEvents: %'Lu\n", events);
d0ca268b
JA
759}
760
2ff323b0
JA
761#define min(a, b) ((a) < (b) ? (a) : (b))
762
763static struct blk_io_trace *find_trace(void *p, unsigned long offset, int nr)
764{
765 unsigned long max_offset = min(offset,nr * sizeof(struct blk_io_trace));
766 unsigned long off;
767 struct blk_io_trace *bit;
768 __u32 magic;
769
770 for (off = 0; off < max_offset; off++) {
771 bit = p + off;
772
773 magic = be32_to_cpu(bit->magic);
774 if ((magic & 0xffffff00) == BLK_IO_TRACE_MAGIC)
775 return bit;
776 }
777
778 return NULL;
779}
780
412819ce 781static int sort_entries(void *traces, unsigned long offset, int nr)
8fc0abbc 782{
412819ce 783 struct per_cpu_info *pci;
8fc0abbc
JA
784 struct blk_io_trace *bit;
785 struct trace *t;
786 void *start = traces;
787 int nelems = 0;
788
6fe4709e 789 while (traces - start <= offset - sizeof(*bit)) {
412819ce
JA
790 if (!nr)
791 break;
792
2ff323b0
JA
793 bit = find_trace(traces, offset - (traces - start), nr);
794 if (!bit)
795 break;
6fe4709e 796
8fc0abbc
JA
797 t = malloc(sizeof(*t));
798 t->bit = bit;
799 memset(&t->rb_node, 0, sizeof(t->rb_node));
800
6fe4709e
JA
801 trace_to_cpu(bit);
802
66fa7233
JA
803 if (verify_trace(bit))
804 break;
805
a718bd37 806 pci = get_cpu_info(bit->cpu);
412819ce
JA
807 pci->nelems++;
808
8fc0abbc
JA
809 if (trace_rb_insert(t))
810 return -1;
811
812 traces += sizeof(*bit) + bit->pdu_len;
813 nelems++;
412819ce 814 nr--;
6fe4709e 815 }
8fc0abbc
JA
816
817 return nelems;
818}
819
412819ce
JA
820static void free_entries_rb(void)
821{
822 struct rb_node *n;
823
7997c5b0 824 while ((n = rb_first(&rb_sort_root)) != NULL) {
412819ce
JA
825 struct trace *t = rb_entry(n, struct trace, rb_node);
826
7997c5b0 827 rb_erase(&t->rb_node, &rb_sort_root);
412819ce
JA
828 free(t);
829 }
830}
831
d5396421 832static void show_entries_rb(void)
8fc0abbc 833{
8fc0abbc 834 struct blk_io_trace *bit;
3aabcd89 835 struct rb_node *n;
8fc0abbc
JA
836 struct trace *t;
837 int cpu;
838
7997c5b0 839 n = rb_first(&rb_sort_root);
3aabcd89
JA
840 if (!n)
841 return;
8fc0abbc 842
3aabcd89 843 do {
8fc0abbc
JA
844 t = rb_entry(n, struct trace, rb_node);
845 bit = t->bit;
846
d5396421 847 cpu = bit->cpu;
87b72777 848 if (cpu > max_cpus) {
8fc0abbc 849 fprintf(stderr, "CPU number too large (%d)\n", cpu);
87b72777 850 break;
8fc0abbc
JA
851 }
852
cfab07eb
AB
853 if (genesis_time == 0)
854 genesis_time = bit->time;
855 bit->time -= genesis_time;
8fc0abbc 856
cfab07eb 857 check_time(bit);
8fc0abbc 858
d5396421 859 if (dump_trace(bit, &per_cpu_info[cpu]))
87b72777
JA
860 break;
861
8fc0abbc
JA
862 } while ((n = rb_next(n)) != NULL);
863}
864
1f79c4a0
JA
865static int read_data(int fd, void *buffer, int bytes, int block)
866{
867 int ret, bytes_left, fl;
868 void *p;
869
870 fl = fcntl(fd, F_GETFL);
871
872 if (!block)
873 fcntl(fd, F_SETFL, fl | O_NONBLOCK);
874 else
875 fcntl(fd, F_SETFL, fl & ~O_NONBLOCK);
876
877 bytes_left = bytes;
878 p = buffer;
879 while (bytes_left > 0) {
880 ret = read(fd, p, bytes_left);
881 if (!ret)
882 return 1;
883 else if (ret < 0) {
884 if (errno != EAGAIN)
885 perror("read");
886 return -1;
887 } else {
888 p += ret;
889 bytes_left -= ret;
890 }
891 }
892
893 return 0;
894}
895
d5396421 896static int do_file(void)
d0ca268b 897{
a718bd37 898 int i, nfiles;
d0ca268b 899
a718bd37
NS
900 for (i = 0, nfiles = 0;; i++, nfiles++) {
901 struct per_cpu_info *pci;
87b72777
JA
902 struct stat st;
903 void *tb;
904
a718bd37 905 pci = get_cpu_info(i);
d0ca268b 906
d5396421
JA
907 snprintf(pci->fname, sizeof(pci->fname)-1,"%s_out.%d", dev, i);
908 if (stat(pci->fname, &st) < 0)
d0ca268b 909 break;
afd2d7ad 910 if (!st.st_size)
911 continue;
d0ca268b 912
d5396421 913 printf("Processing %s\n", pci->fname);
8fc0abbc 914
87b72777 915 tb = malloc(st.st_size);
8fc0abbc 916
d5396421
JA
917 pci->fd = open(pci->fname, O_RDONLY);
918 if (pci->fd < 0) {
919 perror(pci->fname);
3aabcd89 920 break;
d0ca268b 921 }
afd2d7ad 922
1f79c4a0 923 if (read_data(pci->fd, tb, st.st_size, 1))
3aabcd89 924 break;
d0ca268b 925
a718bd37 926 if (sort_entries(tb, st.st_size, ~0U) == -1)
3aabcd89 927 break;
d0ca268b 928
d5396421 929 close(pci->fd);
d5396421
JA
930 printf("\t%2d %10s %15d\n", i, pci->fname, pci->nelems);
931
932 }
933
934 if (!nfiles) {
935 fprintf(stderr, "No files found\n");
936 return 1;
937 }
938
939 show_entries_rb();
d5396421
JA
940 return 0;
941}
942
2ff323b0 943static void resize_buffer(void **buffer, long *size, long offset)
d5396421 944{
2ff323b0 945 long old_size = *size;
d5396421 946
2ff323b0
JA
947 *size *= 2;
948 *buffer = realloc(*buffer, *size);
949 memset(*buffer + offset, 0, *size - old_size);
412819ce 950}
d5396421 951
412819ce
JA
952static int read_sort_events(int fd, void **buffer)
953{
954 long offset, max_offset;
955 int events;
d5396421 956
412819ce
JA
957 max_offset = 128 * sizeof(struct blk_io_trace);
958 *buffer = malloc(max_offset);
959 events = 0;
960 offset = 0;
d5396421 961
412819ce
JA
962 do {
963 struct blk_io_trace *t;
964 int pdu_len;
d5396421 965
412819ce 966 if (max_offset - offset < sizeof(*t))
2ff323b0 967 resize_buffer(buffer, &max_offset, offset);
d5396421 968
412819ce
JA
969 if (read_data(fd, *buffer + offset, sizeof(*t), !events)) {
970 if (events)
d5396421 971 break;
412819ce
JA
972
973 usleep(1000);
974 continue;
d5396421
JA
975 }
976
412819ce
JA
977 t = *buffer + offset;
978 offset += sizeof(*t);
d5396421 979
412819ce 980 pdu_len = be16_to_cpu(t->pdu_len);
2ff323b0
JA
981 if (pdu_len) {
982 if (max_offset - offset < pdu_len)
983 resize_buffer(buffer, &max_offset, offset);
d5396421 984
2ff323b0
JA
985 if (read_data(fd, *buffer + offset, pdu_len, 1))
986 break;
d5396421 987
2ff323b0
JA
988 offset += pdu_len;
989 }
d5396421 990
412819ce 991 events++;
79f19470 992 } while (!is_done() && events < rb_batch);
d5396421 993
412819ce
JA
994 return events;
995}
d5396421 996
412819ce
JA
997static int do_stdin(void)
998{
999 int fd;
1000 void *ptr;
d5396421 1001
1f79c4a0 1002 fd = dup(STDIN_FILENO);
412819ce
JA
1003 do {
1004 int events;
d5396421 1005
412819ce
JA
1006 events = read_sort_events(fd, &ptr);
1007 if (!events)
1008 break;
1009
2ff323b0
JA
1010 if (sort_entries(ptr, ~0UL, events) == -1)
1011 break;
1012
412819ce
JA
1013 show_entries_rb();
1014 free_entries_rb();
d5396421
JA
1015 } while (1);
1016
1017 close(fd);
1018 free(ptr);
d5396421
JA
1019 return 0;
1020}
d0ca268b 1021
1f79c4a0 1022static void flush_output(void)
412819ce 1023{
152f6476 1024 fflush(ofp);
412819ce
JA
1025}
1026
1f79c4a0 1027static void handle_sigint(int sig)
412819ce
JA
1028{
1029 done = 1;
1030 flush_output();
1031}
1032
1f79c4a0
JA
1033static void usage(char *prog)
1034{
152f6476 1035 fprintf(stderr, "Usage: %s -i <name> [-o <output>][-s]\n", prog);
1f79c4a0
JA
1036}
1037
d5396421
JA
1038int main(int argc, char *argv[])
1039{
152f6476 1040 char *ofp_buffer;
a66877e6 1041 int c, ret, mode;
d5396421
JA
1042
1043 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) != -1) {
1044 switch (c) {
1045 case 'i':
66efebf8 1046 dev = optarg;
d5396421
JA
1047 break;
1048 case 'o':
66efebf8 1049 output_name = optarg;
d5396421 1050 break;
79f19470
JA
1051 case 'b':
1052 rb_batch = atoi(optarg);
1053 if (rb_batch <= 0)
1054 rb_batch = RB_BATCH_DEFAULT;
1055 break;
152f6476
JA
1056 case 's':
1057 per_process_stats = 1;
1058 break;
7997c5b0
JA
1059 case 't':
1060 track_ios = 1;
1061 break;
d5396421 1062 default:
1f79c4a0 1063 usage(argv[0]);
d5396421
JA
1064 return 1;
1065 }
d0ca268b
JA
1066 }
1067
d5396421 1068 if (!dev) {
1f79c4a0 1069 usage(argv[0]);
d5396421
JA
1070 return 1;
1071 }
1072
7997c5b0
JA
1073 memset(&rb_sort_root, 0, sizeof(rb_sort_root));
1074 memset(&rb_track_root, 0, sizeof(rb_track_root));
412819ce
JA
1075
1076 signal(SIGINT, handle_sigint);
1077 signal(SIGHUP, handle_sigint);
1078 signal(SIGTERM, handle_sigint);
d5396421 1079
d69db225
JA
1080 setlocale(LC_NUMERIC, "en_US");
1081
a66877e6 1082 if (!output_name) {
152f6476 1083 ofp = fdopen(STDOUT_FILENO, "w");
a66877e6
JA
1084 mode = _IOLBF;
1085 } else {
152f6476
JA
1086 char ofname[128];
1087
1088 snprintf(ofname, sizeof(ofname) - 1, "%s.log", output_name);
1089 ofp = fopen(ofname, "w");
a66877e6 1090 mode = _IOFBF;
152f6476
JA
1091 }
1092
1093 if (!ofp) {
1094 perror("fopen");
1095 return 1;
1096 }
1097
1098 ofp_buffer = malloc(4096);
a66877e6 1099 if (setvbuf(ofp, ofp_buffer, mode, 4096)) {
152f6476
JA
1100 perror("setvbuf");
1101 return 1;
1102 }
1103
d5396421
JA
1104 if (!strcmp(dev, "-"))
1105 ret = do_stdin();
1106 else
1107 ret = do_file();
1108
152f6476
JA
1109 if (per_process_stats)
1110 show_process_stats();
1111
1112 show_cpu_stats();
1113
412819ce 1114 flush_output();
d5396421 1115 return ret;
d0ca268b 1116}