[PATCH] blktrace: we don't need ->closed anymore
[blktrace.git] / blkparse.c
CommitLineData
d956a2cd
JA
1/*
2 * block queue tracing parse application
3 *
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
20 */
d0ca268b
JA
21#include <sys/types.h>
22#include <sys/stat.h>
23#include <unistd.h>
24#include <stdio.h>
25#include <fcntl.h>
26#include <stdlib.h>
8fc0abbc 27#include <string.h>
d5396421 28#include <getopt.h>
412819ce
JA
29#include <errno.h>
30#include <signal.h>
d69db225 31#include <locale.h>
6e0073ed 32#include <libgen.h>
d0ca268b 33
8fc0abbc
JA
34#include "blktrace.h"
35#include "rbtree.h"
bf0720af 36#include "jhash.h"
d0ca268b 37
13d928f0 38static char blkparse_version[] = "0.99";
52724a0e 39
e7c9f3ff 40struct per_dev_info {
f7bd1a9b 41 dev_t dev;
e7c9f3ff
NS
42 char *name;
43
44 int backwards;
45 unsigned long long events;
20ed6177 46 unsigned long long first_reported_time;
e7c9f3ff 47 unsigned long long last_reported_time;
287fa3d6 48 unsigned long long last_read_time;
e7c9f3ff 49 struct io_stats io_stats;
cb2a1a62 50 unsigned long last_sequence;
dce0f678
AB
51 unsigned long skips, nskips;
52 unsigned long long seq_skips, seq_nskips;
e7c9f3ff 53
2a1b3424
JA
54 struct rb_root rb_last;
55 unsigned long rb_last_entries;
56
f7bd1a9b
JA
57 struct rb_root rb_track;
58
73877e12 59 int nfiles;
e7c9f3ff 60 int ncpus;
824c2b39
JA
61
62 unsigned long *cpu_map;
63 unsigned int cpu_map_max;
64
e7c9f3ff
NS
65 struct per_cpu_info *cpus;
66};
67
152f6476
JA
68struct per_process_info {
69 char name[16];
70 __u32 pid;
71 struct io_stats io_stats;
72 struct per_process_info *hash_next, *list_next;
715d8021 73 int more_than_one;
50adc0ba
JA
74
75 /*
76 * individual io stats
77 */
b9d40d6f
JA
78 unsigned long long longest_allocation_wait[2];
79 unsigned long long longest_dispatch_wait[2];
80 unsigned long long longest_completion_wait[2];
d0ca268b
JA
81};
82
152f6476 83#define PPI_HASH_SHIFT (8)
bf0720af
JA
84#define PPI_HASH_SIZE (1 << PPI_HASH_SHIFT)
85#define PPI_HASH_MASK (PPI_HASH_SIZE - 1)
86static struct per_process_info *ppi_hash_table[PPI_HASH_SIZE];
152f6476 87static struct per_process_info *ppi_list;
886ecf0e 88static int ppi_list_entries;
152f6476 89
d915dee6 90#define S_OPTS "a:A:i:o:b:stqw:f:F:vVhD:"
d5396421 91static struct option l_opts[] = {
98f8386b
AB
92 {
93 .name = "act-mask",
94 .has_arg = required_argument,
95 .flag = NULL,
96 .val = 'a'
97 },
98 {
99 .name = "set-mask",
100 .has_arg = required_argument,
101 .flag = NULL,
102 .val = 'A'
103 },
d5396421
JA
104 {
105 .name = "input",
428683db 106 .has_arg = required_argument,
d5396421
JA
107 .flag = NULL,
108 .val = 'i'
109 },
110 {
111 .name = "output",
428683db 112 .has_arg = required_argument,
d5396421
JA
113 .flag = NULL,
114 .val = 'o'
115 },
79f19470
JA
116 {
117 .name = "batch",
428683db 118 .has_arg = required_argument,
79f19470
JA
119 .flag = NULL,
120 .val = 'b'
121 },
152f6476 122 {
3f65c585 123 .name = "per-program-stats",
428683db 124 .has_arg = no_argument,
152f6476
JA
125 .flag = NULL,
126 .val = 's'
127 },
7997c5b0 128 {
3f65c585 129 .name = "track-ios",
428683db 130 .has_arg = no_argument,
7997c5b0
JA
131 .flag = NULL,
132 .val = 't'
133 },
1e1c60f1
NS
134 {
135 .name = "quiet",
428683db 136 .has_arg = no_argument,
1e1c60f1
NS
137 .flag = NULL,
138 .val = 'q'
139 },
46e6968b
NS
140 {
141 .name = "stopwatch",
428683db 142 .has_arg = required_argument,
46e6968b
NS
143 .flag = NULL,
144 .val = 'w'
145 },
ab197ca7
AB
146 {
147 .name = "format",
428683db 148 .has_arg = required_argument,
ab197ca7
AB
149 .flag = NULL,
150 .val = 'f'
151 },
152 {
153 .name = "format-spec",
428683db 154 .has_arg = required_argument,
ab197ca7
AB
155 .flag = NULL,
156 .val = 'F'
157 },
bf0720af 158 {
3f65c585 159 .name = "hash-by-name",
bf0720af
JA
160 .has_arg = no_argument,
161 .flag = NULL,
d915dee6 162 .val = 'h'
bf0720af 163 },
7d1c0411 164 {
57ea8602 165 .name = "verbose",
7d1c0411
JA
166 .has_arg = no_argument,
167 .flag = NULL,
57ea8602 168 .val = 'v'
7d1c0411 169 },
52724a0e
JA
170 {
171 .name = "version",
172 .has_arg = no_argument,
173 .flag = NULL,
57ea8602 174 .val = 'V'
52724a0e 175 },
d1d7f15f 176 {
3f65c585 177 .name = "input-directory",
d1d7f15f
JA
178 .has_arg = required_argument,
179 .flag = NULL,
180 .val = 'D'
181 },
71ef8b7c
JA
182 {
183 .name = NULL,
184 }
d5396421
JA
185};
186
7997c5b0
JA
187/*
188 * for sorting the displayed output
189 */
8fc0abbc
JA
190struct trace {
191 struct blk_io_trace *bit;
192 struct rb_node rb_node;
cb2a1a62 193 struct trace *next;
8fc0abbc
JA
194};
195
cb2a1a62 196static struct rb_root rb_sort_root;
a649216c
JA
197static unsigned long rb_sort_entries;
198
cb2a1a62
JA
199static struct trace *trace_list;
200
d36421e4
JA
201/*
202 * allocation cache
203 */
204static struct blk_io_trace *bit_alloc_list;
205static struct trace *t_alloc_list;
206
7997c5b0
JA
207/*
208 * for tracking individual ios
209 */
210struct io_track {
211 struct rb_node rb_node;
212
213 __u64 sector;
214 __u32 pid;
bf0720af 215 char comm[16];
95c15013 216 unsigned long long allocation_time;
7997c5b0
JA
217 unsigned long long queue_time;
218 unsigned long long dispatch_time;
219 unsigned long long completion_time;
220};
221
e7c9f3ff
NS
222static int ndevices;
223static struct per_dev_info *devices;
224static char *get_dev_name(struct per_dev_info *, char *, int);
d0ca268b 225
71d5d4c9 226FILE *ofp = NULL;
e7c9f3ff 227static char *output_name;
d1d7f15f 228static char *input_dir;
e7c9f3ff
NS
229
230static unsigned long long genesis_time;
287fa3d6 231static unsigned long long last_allowed_time;
1c7c54aa 232static unsigned int smallest_seq_read;
46e6968b 233static unsigned long long stopwatch_start; /* start from zero by default */
bc171579 234static unsigned long long stopwatch_end = -1ULL; /* "infinity" */
152f6476
JA
235
236static int per_process_stats;
cbc927b6 237static int per_device_and_cpu_stats = 1;
7997c5b0 238static int track_ios;
bf0720af 239static int ppi_hash_by_pid = 1;
57ea8602 240static int verbose;
98f8386b 241static unsigned int act_mask = -1U;
cbc927b6 242static int stats_printed;
d0ca268b 243
1d24fc14
JA
244static unsigned int t_alloc_cache;
245static unsigned int bit_alloc_cache;
246
7d747d22 247#define RB_BATCH_DEFAULT (512)
e820abd7 248static unsigned int rb_batch = RB_BATCH_DEFAULT;
79f19470 249
e7c9f3ff
NS
250static int pipeline;
251
412819ce
JA
252#define is_done() (*(volatile int *)(&done))
253static volatile int done;
254
bf0720af
JA
255#define JHASH_RANDOM (0x3af5f2ee)
256
824c2b39
JA
257#define CPUS_PER_LONG (8 * sizeof(unsigned long))
258#define CPU_IDX(cpu) ((cpu) / CPUS_PER_LONG)
259#define CPU_BIT(cpu) ((cpu) & (CPUS_PER_LONG - 1))
260
261static void cpu_mark_online(struct per_dev_info *pdi, unsigned int cpu)
262{
263 if (cpu >= pdi->cpu_map_max || !pdi->cpu_map) {
264 int new_max = (cpu + CPUS_PER_LONG) & ~(CPUS_PER_LONG - 1);
265 unsigned long *map = malloc(new_max / sizeof(long));
266
267 memset(map, 0, new_max / sizeof(long));
268
269 if (pdi->cpu_map) {
270 memcpy(map, pdi->cpu_map, pdi->cpu_map_max / sizeof(long));
271 free(pdi->cpu_map);
272 }
273
274 pdi->cpu_map = map;
275 pdi->cpu_map_max = new_max;
276 }
277
278 pdi->cpu_map[CPU_IDX(cpu)] |= (1UL << CPU_BIT(cpu));
279}
280
281static inline void cpu_mark_offline(struct per_dev_info *pdi, int cpu)
282{
283 pdi->cpu_map[CPU_IDX(cpu)] &= ~(1UL << CPU_BIT(cpu));
284}
285
286static inline int cpu_is_online(struct per_dev_info *pdi, int cpu)
287{
288 return (pdi->cpu_map[CPU_IDX(cpu)] & (1UL << CPU_BIT(cpu))) != 0;
289}
290
bf0720af
JA
291static inline int ppi_hash_pid(__u32 pid)
292{
293 return jhash_1word(pid, JHASH_RANDOM) & PPI_HASH_MASK;
294}
295
296static inline int ppi_hash_name(const char *name)
152f6476 297{
bf0720af
JA
298 return jhash(name, 16, JHASH_RANDOM) & PPI_HASH_MASK;
299}
300
301static inline int ppi_hash(struct per_process_info *ppi)
302{
303 if (ppi_hash_by_pid)
304 return ppi_hash_pid(ppi->pid);
305
bf0720af 306 return ppi_hash_name(ppi->name);
152f6476
JA
307}
308
309static inline void add_process_to_hash(struct per_process_info *ppi)
310{
bf0720af 311 const int hash_idx = ppi_hash(ppi);
152f6476 312
bf0720af
JA
313 ppi->hash_next = ppi_hash_table[hash_idx];
314 ppi_hash_table[hash_idx] = ppi;
152f6476
JA
315}
316
317static inline void add_process_to_list(struct per_process_info *ppi)
318{
319 ppi->list_next = ppi_list;
320 ppi_list = ppi;
886ecf0e 321 ppi_list_entries++;
152f6476
JA
322}
323
bf0720af
JA
324static struct per_process_info *find_process_by_name(char *name)
325{
326 const int hash_idx = ppi_hash_name(name);
327 struct per_process_info *ppi;
328
329 ppi = ppi_hash_table[hash_idx];
330 while (ppi) {
331 if (!strcmp(ppi->name, name))
332 return ppi;
333
334 ppi = ppi->hash_next;
335 }
336
337 return NULL;
338}
339
152f6476
JA
340static struct per_process_info *find_process_by_pid(__u32 pid)
341{
bf0720af 342 const int hash_idx = ppi_hash_pid(pid);
152f6476
JA
343 struct per_process_info *ppi;
344
bf0720af 345 ppi = ppi_hash_table[hash_idx];
152f6476
JA
346 while (ppi) {
347 if (ppi->pid == pid)
348 return ppi;
349
350 ppi = ppi->hash_next;
351 }
352
353 return NULL;
354}
355
bf0720af
JA
356static struct per_process_info *find_process(__u32 pid, char *name)
357{
715d8021
JA
358 struct per_process_info *ppi;
359
bf0720af
JA
360 if (ppi_hash_by_pid)
361 return find_process_by_pid(pid);
362
715d8021
JA
363 ppi = find_process_by_name(name);
364 if (ppi && ppi->pid != pid)
365 ppi->more_than_one = 1;
366
367 return ppi;
bf0720af
JA
368}
369
23211fdf
JA
370static inline int trace_rb_insert(struct trace *t, struct rb_root *root,
371 int check_time)
7997c5b0 372{
2a1b3424 373 struct rb_node **p = &root->rb_node;
7997c5b0
JA
374 struct rb_node *parent = NULL;
375 struct trace *__t;
376
377 while (*p) {
378 parent = *p;
2a1b3424 379
7997c5b0
JA
380 __t = rb_entry(parent, struct trace, rb_node);
381
23211fdf
JA
382 if (check_time) {
383 if (t->bit->time < __t->bit->time) {
384 p = &(*p)->rb_left;
385 continue;
386 } else if (t->bit->time > __t->bit->time) {
387 p = &(*p)->rb_right;
388 continue;
389 }
390 }
391 if (t->bit->device < __t->bit->device)
e7c9f3ff
NS
392 p = &(*p)->rb_left;
393 else if (t->bit->device > __t->bit->device)
394 p = &(*p)->rb_right;
dcf0f7ed
JA
395 else if (t->bit->sequence < __t->bit->sequence)
396 p = &(*p)->rb_left;
0b07f23e 397 else /* >= sequence */
dcf0f7ed 398 p = &(*p)->rb_right;
7997c5b0
JA
399 }
400
401 rb_link_node(&t->rb_node, parent, p);
2a1b3424 402 rb_insert_color(&t->rb_node, root);
7997c5b0
JA
403 return 0;
404}
405
2a1b3424 406static inline int trace_rb_insert_sort(struct trace *t)
e3556946 407{
23211fdf 408 if (!trace_rb_insert(t, &rb_sort_root, 1)) {
2a1b3424
JA
409 rb_sort_entries++;
410 return 0;
411 }
412
413 return 1;
414}
415
416static inline int trace_rb_insert_last(struct per_dev_info *pdi,struct trace *t)
417{
23211fdf 418 if (!trace_rb_insert(t, &pdi->rb_last, 1)) {
2a1b3424
JA
419 pdi->rb_last_entries++;
420 return 0;
421 }
422
423 return 1;
424}
425
426static struct trace *trace_rb_find(dev_t device, unsigned long sequence,
427 struct rb_root *root, int order)
428{
429 struct rb_node *n = root->rb_node;
430 struct rb_node *prev = NULL;
e3556946
JA
431 struct trace *__t;
432
2a1b3424
JA
433 while (n) {
434 __t = rb_entry(n, struct trace, rb_node);
435 prev = n;
e3556946 436
0583b6a2 437 if (device < __t->bit->device)
2a1b3424 438 n = n->rb_left;
0583b6a2 439 else if (device > __t->bit->device)
2a1b3424 440 n = n->rb_right;
0583b6a2 441 else if (sequence < __t->bit->sequence)
2a1b3424 442 n = n->rb_left;
e3556946 443 else if (sequence > __t->bit->sequence)
2a1b3424 444 n = n->rb_right;
e3556946
JA
445 else
446 return __t;
447 }
448
2a1b3424
JA
449 /*
450 * hack - the list may not be sequence ordered because some
451 * events don't have sequence and time matched. so we end up
452 * being a little off in the rb lookup here, because we don't
453 * know the time we are looking for. compensate by browsing
454 * a little ahead from the last entry to find the match
455 */
456 if (order && prev) {
457 int max = 5;
458
459 while (((n = rb_next(prev)) != NULL) && max--) {
460 __t = rb_entry(n, struct trace, rb_node);
461
462 if (__t->bit->device == device &&
463 __t->bit->sequence == sequence)
464 return __t;
465
466 prev = n;
467 }
468 }
469
e3556946
JA
470 return NULL;
471}
472
2a1b3424
JA
473static inline struct trace *trace_rb_find_sort(dev_t dev, unsigned long seq)
474{
475 return trace_rb_find(dev, seq, &rb_sort_root, 1);
476}
477
478static inline struct trace *trace_rb_find_last(struct per_dev_info *pdi,
479 unsigned long seq)
480{
f7bd1a9b 481 return trace_rb_find(pdi->dev, seq, &pdi->rb_last, 0);
2a1b3424
JA
482}
483
f7bd1a9b 484static inline int track_rb_insert(struct per_dev_info *pdi,struct io_track *iot)
7997c5b0 485{
f7bd1a9b 486 struct rb_node **p = &pdi->rb_track.rb_node;
7997c5b0
JA
487 struct rb_node *parent = NULL;
488 struct io_track *__iot;
489
490 while (*p) {
491 parent = *p;
7997c5b0
JA
492 __iot = rb_entry(parent, struct io_track, rb_node);
493
f7bd1a9b 494 if (iot->sector < __iot->sector)
7997c5b0
JA
495 p = &(*p)->rb_left;
496 else if (iot->sector > __iot->sector)
497 p = &(*p)->rb_right;
498 else {
e7c9f3ff 499 fprintf(stderr,
ab197ca7
AB
500 "sector alias (%Lu) on device %d,%d!\n",
501 (unsigned long long) iot->sector,
f7bd1a9b 502 MAJOR(pdi->dev), MINOR(pdi->dev));
7997c5b0
JA
503 return 1;
504 }
505 }
506
507 rb_link_node(&iot->rb_node, parent, p);
f7bd1a9b 508 rb_insert_color(&iot->rb_node, &pdi->rb_track);
7997c5b0
JA
509 return 0;
510}
511
f7bd1a9b 512static struct io_track *__find_track(struct per_dev_info *pdi, __u64 sector)
7997c5b0 513{
f7bd1a9b 514 struct rb_node *n = pdi->rb_track.rb_node;
7997c5b0
JA
515 struct io_track *__iot;
516
2a1b3424
JA
517 while (n) {
518 __iot = rb_entry(n, struct io_track, rb_node);
7997c5b0 519
f7bd1a9b 520 if (sector < __iot->sector)
2a1b3424 521 n = n->rb_left;
7997c5b0 522 else if (sector > __iot->sector)
2a1b3424 523 n = n->rb_right;
7997c5b0
JA
524 else
525 return __iot;
526 }
527
528 return NULL;
529}
530
f7bd1a9b
JA
531static struct io_track *find_track(struct per_dev_info *pdi, __u32 pid,
532 char *comm, __u64 sector)
7997c5b0 533{
916b5501 534 struct io_track *iot;
7997c5b0 535
f7bd1a9b 536 iot = __find_track(pdi, sector);
7997c5b0
JA
537 if (!iot) {
538 iot = malloc(sizeof(*iot));
50adc0ba 539 iot->pid = pid;
bf0720af 540 memcpy(iot->comm, comm, sizeof(iot->comm));
7997c5b0 541 iot->sector = sector;
f7bd1a9b 542 track_rb_insert(pdi, iot);
7997c5b0
JA
543 }
544
545 return iot;
546}
547
f7bd1a9b
JA
548static void log_track_frontmerge(struct per_dev_info *pdi,
549 struct blk_io_trace *t)
2e3e8ded
JA
550{
551 struct io_track *iot;
552
553 if (!track_ios)
554 return;
2e3e8ded 555
ae957cbc 556 iot = __find_track(pdi, t->sector + t_sec(t));
cb2a1a62 557 if (!iot) {
57ea8602
JA
558 if (verbose)
559 fprintf(stderr, "merge not found for (%d,%d): %llu\n",
560 MAJOR(pdi->dev), MINOR(pdi->dev),
561 (unsigned long long) t->sector + t_sec(t));
cb2a1a62 562 return;
2e3e8ded 563 }
cb2a1a62 564
f7bd1a9b 565 rb_erase(&iot->rb_node, &pdi->rb_track);
ae957cbc 566 iot->sector -= t_sec(t);
f7bd1a9b 567 track_rb_insert(pdi, iot);
2e3e8ded
JA
568}
569
f7bd1a9b 570static void log_track_getrq(struct per_dev_info *pdi, struct blk_io_trace *t)
2e3e8ded
JA
571{
572 struct io_track *iot;
573
574 if (!track_ios)
575 return;
576
f7bd1a9b 577 iot = find_track(pdi, t->pid, t->comm, t->sector);
95c15013
JA
578 iot->allocation_time = t->time;
579}
580
95c15013 581/*
b6076a9b 582 * return time between rq allocation and insertion
95c15013 583 */
f7bd1a9b
JA
584static unsigned long long log_track_insert(struct per_dev_info *pdi,
585 struct blk_io_trace *t)
95c15013 586{
50adc0ba 587 unsigned long long elapsed;
95c15013
JA
588 struct io_track *iot;
589
590 if (!track_ios)
591 return -1;
592
f7bd1a9b 593 iot = find_track(pdi, t->pid, t->comm, t->sector);
2e3e8ded 594 iot->queue_time = t->time;
acd70d21
JA
595
596 if (!iot->allocation_time)
597 return -1;
598
50adc0ba
JA
599 elapsed = iot->queue_time - iot->allocation_time;
600
601 if (per_process_stats) {
bf0720af 602 struct per_process_info *ppi = find_process(iot->pid,iot->comm);
b9d40d6f 603 int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
50adc0ba 604
b9d40d6f
JA
605 if (ppi && elapsed > ppi->longest_allocation_wait[w])
606 ppi->longest_allocation_wait[w] = elapsed;
50adc0ba
JA
607 }
608
609 return elapsed;
2e3e8ded
JA
610}
611
612/*
613 * return time between queue and issue
614 */
f7bd1a9b
JA
615static unsigned long long log_track_issue(struct per_dev_info *pdi,
616 struct blk_io_trace *t)
2e3e8ded 617{
50adc0ba 618 unsigned long long elapsed;
2e3e8ded
JA
619 struct io_track *iot;
620
621 if (!track_ios)
622 return -1;
623 if ((t->action & BLK_TC_ACT(BLK_TC_FS)) == 0)
624 return -1;
625
f7bd1a9b 626 iot = __find_track(pdi, t->sector);
cb2a1a62 627 if (!iot) {
57ea8602
JA
628 if (verbose)
629 fprintf(stderr, "issue not found for (%d,%d): %llu\n",
630 MAJOR(pdi->dev), MINOR(pdi->dev),
631 (unsigned long long) t->sector);
2e3e8ded 632 return -1;
cb2a1a62 633 }
2e3e8ded
JA
634
635 iot->dispatch_time = t->time;
50adc0ba
JA
636 elapsed = iot->dispatch_time - iot->queue_time;
637
638 if (per_process_stats) {
bf0720af 639 struct per_process_info *ppi = find_process(iot->pid,iot->comm);
b9d40d6f 640 int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
50adc0ba 641
b9d40d6f
JA
642 if (ppi && elapsed > ppi->longest_dispatch_wait[w])
643 ppi->longest_dispatch_wait[w] = elapsed;
50adc0ba
JA
644 }
645
646 return elapsed;
2e3e8ded
JA
647}
648
649/*
650 * return time between dispatch and complete
651 */
f7bd1a9b
JA
652static unsigned long long log_track_complete(struct per_dev_info *pdi,
653 struct blk_io_trace *t)
2e3e8ded
JA
654{
655 unsigned long long elapsed;
656 struct io_track *iot;
657
658 if (!track_ios)
659 return -1;
660 if ((t->action & BLK_TC_ACT(BLK_TC_FS)) == 0)
661 return -1;
662
f7bd1a9b 663 iot = __find_track(pdi, t->sector);
cb2a1a62 664 if (!iot) {
57ea8602
JA
665 if (verbose)
666 fprintf(stderr,"complete not found for (%d,%d): %llu\n",
667 MAJOR(pdi->dev), MINOR(pdi->dev),
668 (unsigned long long) t->sector);
2e3e8ded 669 return -1;
cb2a1a62 670 }
2e3e8ded
JA
671
672 iot->completion_time = t->time;
673 elapsed = iot->completion_time - iot->dispatch_time;
674
50adc0ba 675 if (per_process_stats) {
bf0720af 676 struct per_process_info *ppi = find_process(iot->pid,iot->comm);
b9d40d6f 677 int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
50adc0ba 678
b9d40d6f
JA
679 if (ppi && elapsed > ppi->longest_completion_wait[w])
680 ppi->longest_completion_wait[w] = elapsed;
50adc0ba
JA
681 }
682
2e3e8ded
JA
683 /*
684 * kill the trace, we don't need it after completion
685 */
f7bd1a9b 686 rb_erase(&iot->rb_node, &pdi->rb_track);
2e3e8ded
JA
687 free(iot);
688
689 return elapsed;
690}
691
692
152f6476
JA
693static struct io_stats *find_process_io_stats(__u32 pid, char *name)
694{
bf0720af 695 struct per_process_info *ppi = find_process(pid, name);
152f6476
JA
696
697 if (!ppi) {
698 ppi = malloc(sizeof(*ppi));
699 memset(ppi, 0, sizeof(*ppi));
bf0720af 700 memcpy(ppi->name, name, 16);
152f6476
JA
701 ppi->pid = pid;
702 add_process_to_hash(ppi);
703 add_process_to_list(ppi);
704 }
705
706 return &ppi->io_stats;
707}
708
e7c9f3ff 709static void resize_cpu_info(struct per_dev_info *pdi, int cpu)
a718bd37 710{
e7c9f3ff
NS
711 struct per_cpu_info *cpus = pdi->cpus;
712 int ncpus = pdi->ncpus;
713 int new_count = cpu + 1;
714 int new_space, size;
a718bd37
NS
715 char *new_start;
716
e7c9f3ff
NS
717 size = new_count * sizeof(struct per_cpu_info);
718 cpus = realloc(cpus, size);
719 if (!cpus) {
720 char name[20];
721 fprintf(stderr, "Out of memory, CPU info for device %s (%d)\n",
722 get_dev_name(pdi, name, sizeof(name)), size);
a718bd37
NS
723 exit(1);
724 }
725
e7c9f3ff
NS
726 new_start = (char *)cpus + (ncpus * sizeof(struct per_cpu_info));
727 new_space = (new_count - ncpus) * sizeof(struct per_cpu_info);
a718bd37 728 memset(new_start, 0, new_space);
e7c9f3ff
NS
729
730 pdi->ncpus = new_count;
731 pdi->cpus = cpus;
732}
cb2a1a62 733
e7c9f3ff
NS
734static struct per_cpu_info *get_cpu_info(struct per_dev_info *pdi, int cpu)
735{
cb2a1a62
JA
736 struct per_cpu_info *pci;
737
e7c9f3ff
NS
738 if (cpu >= pdi->ncpus)
739 resize_cpu_info(pdi, cpu);
cb2a1a62
JA
740
741 pci = &pdi->cpus[cpu];
742 pci->cpu = cpu;
743 return pci;
a718bd37
NS
744}
745
e7c9f3ff
NS
746
747static int resize_devices(char *name)
a718bd37 748{
e7c9f3ff 749 int size = (ndevices + 1) * sizeof(struct per_dev_info);
c499bf38 750
e7c9f3ff
NS
751 devices = realloc(devices, size);
752 if (!devices) {
753 fprintf(stderr, "Out of memory, device %s (%d)\n", name, size);
754 return 1;
755 }
756 memset(&devices[ndevices], 0, sizeof(struct per_dev_info));
757 devices[ndevices].name = name;
758 ndevices++;
759 return 0;
760}
a718bd37 761
f7bd1a9b 762static struct per_dev_info *get_dev_info(dev_t dev)
e7c9f3ff 763{
cb2a1a62 764 struct per_dev_info *pdi;
e7c9f3ff 765 int i;
c499bf38 766
7d747d22 767 for (i = 0; i < ndevices; i++) {
f7bd1a9b
JA
768 if (!devices[i].dev)
769 devices[i].dev = dev;
770 if (devices[i].dev == dev)
e7c9f3ff 771 return &devices[i];
7d747d22 772 }
cb2a1a62 773
2a1b3424 774 if (resize_devices(NULL))
e7c9f3ff 775 return NULL;
cb2a1a62
JA
776
777 pdi = &devices[ndevices - 1];
f7bd1a9b 778 pdi->dev = dev;
20ed6177 779 pdi->first_reported_time = 0;
d6222db8 780 pdi->last_sequence = -1;
287fa3d6 781 pdi->last_read_time = 0;
2a1b3424
JA
782 memset(&pdi->rb_last, 0, sizeof(pdi->rb_last));
783 pdi->rb_last_entries = 0;
cb2a1a62 784 return pdi;
a718bd37
NS
785}
786
e7c9f3ff
NS
787static char *get_dev_name(struct per_dev_info *pdi, char *buffer, int size)
788{
789 if (pdi->name)
790 snprintf(buffer, size, "%s", pdi->name);
791 else
f7bd1a9b 792 snprintf(buffer, size, "%d,%d",MAJOR(pdi->dev),MINOR(pdi->dev));
e7c9f3ff
NS
793 return buffer;
794}
795
e7c9f3ff 796static void check_time(struct per_dev_info *pdi, struct blk_io_trace *bit)
cfab07eb
AB
797{
798 unsigned long long this = bit->time;
e7c9f3ff 799 unsigned long long last = pdi->last_reported_time;
cfab07eb 800
e7c9f3ff
NS
801 pdi->backwards = (this < last) ? 'B' : ' ';
802 pdi->last_reported_time = this;
cfab07eb
AB
803}
804
152f6476
JA
805static inline void __account_m(struct io_stats *ios, struct blk_io_trace *t,
806 int rw)
d0ca268b
JA
807{
808 if (rw) {
152f6476 809 ios->mwrites++;
ae957cbc 810 ios->qwrite_kb += t_kb(t);
d0ca268b 811 } else {
152f6476 812 ios->mreads++;
ae957cbc 813 ios->qread_kb += t_kb(t);
152f6476
JA
814 }
815}
816
817static inline void account_m(struct blk_io_trace *t, struct per_cpu_info *pci,
818 int rw)
819{
820 __account_m(&pci->io_stats, t, rw);
821
822 if (per_process_stats) {
823 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
824
825 __account_m(ios, t, rw);
d0ca268b
JA
826 }
827}
828
b6076a9b
JA
829static inline void __account_queue(struct io_stats *ios, struct blk_io_trace *t,
830 int rw)
d0ca268b
JA
831{
832 if (rw) {
152f6476 833 ios->qwrites++;
ae957cbc 834 ios->qwrite_kb += t_kb(t);
d0ca268b 835 } else {
152f6476 836 ios->qreads++;
ae957cbc 837 ios->qread_kb += t_kb(t);
152f6476
JA
838 }
839}
840
b6076a9b
JA
841static inline void account_queue(struct blk_io_trace *t,
842 struct per_cpu_info *pci, int rw)
152f6476 843{
b6076a9b 844 __account_queue(&pci->io_stats, t, rw);
152f6476
JA
845
846 if (per_process_stats) {
847 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
848
b6076a9b 849 __account_queue(ios, t, rw);
d0ca268b
JA
850 }
851}
852
e21dc4dd 853static inline void __account_c(struct io_stats *ios, int rw, int bytes)
d0ca268b
JA
854{
855 if (rw) {
152f6476
JA
856 ios->cwrites++;
857 ios->cwrite_kb += bytes >> 10;
d0ca268b 858 } else {
152f6476
JA
859 ios->creads++;
860 ios->cread_kb += bytes >> 10;
861 }
862}
863
864static inline void account_c(struct blk_io_trace *t, struct per_cpu_info *pci,
865 int rw, int bytes)
866{
867 __account_c(&pci->io_stats, rw, bytes);
868
869 if (per_process_stats) {
870 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
871
872 __account_c(ios, rw, bytes);
d0ca268b
JA
873 }
874}
875
b6076a9b
JA
876static inline void __account_issue(struct io_stats *ios, int rw,
877 unsigned int bytes)
afd2d7ad 878{
879 if (rw) {
152f6476
JA
880 ios->iwrites++;
881 ios->iwrite_kb += bytes >> 10;
afd2d7ad 882 } else {
152f6476
JA
883 ios->ireads++;
884 ios->iread_kb += bytes >> 10;
afd2d7ad 885 }
886}
887
b6076a9b
JA
888static inline void account_issue(struct blk_io_trace *t,
889 struct per_cpu_info *pci, int rw)
d0ca268b 890{
b6076a9b 891 __account_issue(&pci->io_stats, rw, t->bytes);
152f6476
JA
892
893 if (per_process_stats) {
894 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
d5396421 895
b6076a9b 896 __account_issue(ios, rw, t->bytes);
152f6476
JA
897 }
898}
899
06639b27
JA
900static inline void __account_unplug(struct io_stats *ios, int timer)
901{
902 if (timer)
903 ios->timer_unplugs++;
904 else
905 ios->io_unplugs++;
906}
907
908static inline void account_unplug(struct blk_io_trace *t,
909 struct per_cpu_info *pci, int timer)
910{
911 __account_unplug(&pci->io_stats, timer);
912
913 if (per_process_stats) {
914 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
915
916 __account_unplug(ios, timer);
917 }
918}
919
4054070a
JA
920static inline void __account_requeue(struct io_stats *ios,
921 struct blk_io_trace *t, int rw)
922{
923 if (rw) {
924 ios->wrqueue++;
925 ios->iwrite_kb -= t_kb(t);
926 } else {
927 ios->rrqueue++;
928 ios->iread_kb -= t_kb(t);
929 }
930}
931
932static inline void account_requeue(struct blk_io_trace *t,
933 struct per_cpu_info *pci, int rw)
934{
935 __account_requeue(&pci->io_stats, t, rw);
936
937 if (per_process_stats) {
938 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
939
940 __account_requeue(ios, t, rw);
941 }
942}
943
f7bd1a9b
JA
944static void log_complete(struct per_dev_info *pdi, struct per_cpu_info *pci,
945 struct blk_io_trace *t, char *act)
ab197ca7 946{
f7bd1a9b 947 process_fmt(act, pci, t, log_track_complete(pdi, t), 0, NULL);
ab197ca7
AB
948}
949
f7bd1a9b
JA
950static void log_insert(struct per_dev_info *pdi, struct per_cpu_info *pci,
951 struct blk_io_trace *t, char *act)
b6076a9b 952{
f7bd1a9b 953 process_fmt(act, pci, t, log_track_insert(pdi, t), 0, NULL);
b6076a9b
JA
954}
955
ab197ca7
AB
956static void log_queue(struct per_cpu_info *pci, struct blk_io_trace *t,
957 char *act)
958{
b6076a9b 959 process_fmt(act, pci, t, -1, 0, NULL);
ab197ca7 960}
2e3e8ded 961
f7bd1a9b
JA
962static void log_issue(struct per_dev_info *pdi, struct per_cpu_info *pci,
963 struct blk_io_trace *t, char *act)
ab197ca7 964{
f7bd1a9b 965 process_fmt(act, pci, t, log_track_issue(pdi, t), 0, NULL);
d0ca268b
JA
966}
967
f7bd1a9b
JA
968static void log_merge(struct per_dev_info *pdi, struct per_cpu_info *pci,
969 struct blk_io_trace *t, char *act)
d0ca268b 970{
a01516de 971 if (act[0] == 'F')
f7bd1a9b 972 log_track_frontmerge(pdi, t);
2e3e8ded 973
ab197ca7 974 process_fmt(act, pci, t, -1ULL, 0, NULL);
d0ca268b
JA
975}
976
dfe34da1 977static void log_action(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 978 char *act)
dfe34da1 979{
ab197ca7 980 process_fmt(act, pci, t, -1ULL, 0, NULL);
dfe34da1
JA
981}
982
d5396421 983static void log_generic(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 984 char *act)
d0ca268b 985{
ab197ca7 986 process_fmt(act, pci, t, -1ULL, 0, NULL);
d0ca268b
JA
987}
988
ab197ca7 989static void log_unplug(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 990 char *act)
67e14fdc 991{
ab197ca7 992 process_fmt(act, pci, t, -1ULL, 0, NULL);
67e14fdc
JA
993}
994
93f1c611
JA
995static void log_split(struct per_cpu_info *pci, struct blk_io_trace *t,
996 char *act)
997{
998 process_fmt(act, pci, t, -1ULL, 0, NULL);
999}
1000
ab197ca7 1001static void log_pc(struct per_cpu_info *pci, struct blk_io_trace *t, char *act)
d0ca268b 1002{
ab197ca7 1003 unsigned char *buf = (unsigned char *) t + sizeof(*t);
d0ca268b 1004
ab197ca7 1005 process_fmt(act, pci, t, -1ULL, t->pdu_len, buf);
d0ca268b
JA
1006}
1007
ff3a732c 1008static void dump_trace_pc(struct blk_io_trace *t, struct per_cpu_info *pci)
d0ca268b 1009{
56f2af81
JA
1010 int act = t->action & 0xffff;
1011
1012 switch (act) {
d0ca268b 1013 case __BLK_TA_QUEUE:
3639a11e 1014 log_generic(pci, t, "Q");
d0ca268b
JA
1015 break;
1016 case __BLK_TA_GETRQ:
3639a11e 1017 log_generic(pci, t, "G");
d0ca268b
JA
1018 break;
1019 case __BLK_TA_SLEEPRQ:
3639a11e 1020 log_generic(pci, t, "S");
d0ca268b
JA
1021 break;
1022 case __BLK_TA_REQUEUE:
3639a11e 1023 log_generic(pci, t, "R");
d0ca268b
JA
1024 break;
1025 case __BLK_TA_ISSUE:
ab197ca7 1026 log_pc(pci, t, "D");
d0ca268b
JA
1027 break;
1028 case __BLK_TA_COMPLETE:
3639a11e 1029 log_pc(pci, t, "C");
d0ca268b 1030 break;
56f2af81
JA
1031 case __BLK_TA_INSERT:
1032 log_pc(pci, t, "I");
1033 break;
d0ca268b 1034 default:
56f2af81 1035 fprintf(stderr, "Bad pc action %x\n", act);
87b72777 1036 break;
d0ca268b 1037 }
d0ca268b
JA
1038}
1039
f7bd1a9b
JA
1040static void dump_trace_fs(struct blk_io_trace *t, struct per_dev_info *pdi,
1041 struct per_cpu_info *pci)
d0ca268b
JA
1042{
1043 int w = t->action & BLK_TC_ACT(BLK_TC_WRITE);
7997c5b0 1044 int act = t->action & 0xffff;
d0ca268b 1045
7997c5b0 1046 switch (act) {
d0ca268b 1047 case __BLK_TA_QUEUE:
b6076a9b 1048 account_queue(t, pci, w);
3639a11e 1049 log_queue(pci, t, "Q");
d0ca268b 1050 break;
b6076a9b 1051 case __BLK_TA_INSERT:
f7bd1a9b 1052 log_insert(pdi, pci, t, "I");
b6076a9b 1053 break;
d0ca268b 1054 case __BLK_TA_BACKMERGE:
152f6476 1055 account_m(t, pci, w);
f7bd1a9b 1056 log_merge(pdi, pci, t, "M");
d0ca268b
JA
1057 break;
1058 case __BLK_TA_FRONTMERGE:
152f6476 1059 account_m(t, pci, w);
f7bd1a9b 1060 log_merge(pdi, pci, t, "F");
d0ca268b
JA
1061 break;
1062 case __BLK_TA_GETRQ:
f7bd1a9b 1063 log_track_getrq(pdi, t);
3639a11e 1064 log_generic(pci, t, "G");
d0ca268b
JA
1065 break;
1066 case __BLK_TA_SLEEPRQ:
3639a11e 1067 log_generic(pci, t, "S");
d0ca268b
JA
1068 break;
1069 case __BLK_TA_REQUEUE:
4054070a 1070 account_requeue(t, pci, w);
3639a11e 1071 log_queue(pci, t, "R");
d0ca268b
JA
1072 break;
1073 case __BLK_TA_ISSUE:
b6076a9b 1074 account_issue(t, pci, w);
f7bd1a9b 1075 log_issue(pdi, pci, t, "D");
d0ca268b
JA
1076 break;
1077 case __BLK_TA_COMPLETE:
152f6476 1078 account_c(t, pci, w, t->bytes);
f7bd1a9b 1079 log_complete(pdi, pci, t, "C");
d0ca268b 1080 break;
88b1a526 1081 case __BLK_TA_PLUG:
3639a11e 1082 log_action(pci, t, "P");
88b1a526 1083 break;
3639a11e 1084 case __BLK_TA_UNPLUG_IO:
06639b27 1085 account_unplug(t, pci, 0);
3639a11e
JA
1086 log_unplug(pci, t, "U");
1087 break;
1088 case __BLK_TA_UNPLUG_TIMER:
06639b27 1089 account_unplug(t, pci, 1);
3639a11e 1090 log_unplug(pci, t, "UT");
88b1a526 1091 break;
93f1c611
JA
1092 case __BLK_TA_SPLIT:
1093 log_split(pci, t, "X");
1094 break;
1095 case __BLK_TA_BOUNCE:
1096 log_generic(pci, t, "B");
1097 break;
a8f30e64
JA
1098 case __BLK_TA_REMAP:
1099 log_generic(pci, t, "A");
1100 break;
d0ca268b
JA
1101 default:
1102 fprintf(stderr, "Bad fs action %x\n", t->action);
1f79c4a0 1103 break;
d0ca268b 1104 }
d0ca268b
JA
1105}
1106
ff3a732c
JA
1107static void dump_trace(struct blk_io_trace *t, struct per_cpu_info *pci,
1108 struct per_dev_info *pdi)
d0ca268b
JA
1109{
1110 if (t->action & BLK_TC_ACT(BLK_TC_PC))
ff3a732c 1111 dump_trace_pc(t, pci);
d0ca268b 1112 else
f7bd1a9b 1113 dump_trace_fs(t, pdi, pci);
87b72777 1114
20ed6177
JA
1115 if (!pdi->events)
1116 pdi->first_reported_time = t->time;
1117
e7c9f3ff 1118 pdi->events++;
d0ca268b
JA
1119}
1120
4c523165
JA
1121/*
1122 * print in a proper way, not too small and not too big. if more than
1123 * 1000,000K, turn into M and so on
1124 */
1125static char *size_cnv(char *dst, unsigned long long num, int in_kb)
1126{
da19e768 1127 char suff[] = { '\0', 'K', 'M', 'G', 'P' };
0dc3602c 1128 unsigned int i = 0;
4c523165
JA
1129
1130 if (in_kb)
1131 i++;
1132
0dc3602c 1133 while (num > 1000 * 1000ULL && (i < sizeof(suff) - 1)) {
4c523165
JA
1134 i++;
1135 num /= 1000;
1136 }
1137
1138 sprintf(dst, "%'8Lu%c", num, suff[i]);
1139 return dst;
1140}
1141
152f6476 1142static void dump_io_stats(struct io_stats *ios, char *msg)
5c017e4b 1143{
4c523165
JA
1144 static char x[256], y[256];
1145
152f6476
JA
1146 fprintf(ofp, "%s\n", msg);
1147
4c523165
JA
1148 fprintf(ofp, " Reads Queued: %s, %siB\t", size_cnv(x, ios->qreads, 0), size_cnv(y, ios->qread_kb, 1));
1149 fprintf(ofp, " Writes Queued: %s, %siB\n", size_cnv(x, ios->qwrites, 0), size_cnv(y, ios->qwrite_kb, 1));
0a6b8fc4 1150
4c523165
JA
1151 fprintf(ofp, " Read Dispatches: %s, %siB\t", size_cnv(x, ios->ireads, 0), size_cnv(y, ios->iread_kb, 1));
1152 fprintf(ofp, " Write Dispatches: %s, %siB\n", size_cnv(x, ios->iwrites, 0), size_cnv(y, ios->iwrite_kb, 1));
4054070a
JA
1153 fprintf(ofp, " Reads Requeued: %s\t\t", size_cnv(x, ios->rrqueue, 0));
1154 fprintf(ofp, " Writes Requeued: %s\n", size_cnv(x, ios->wrqueue, 0));
4c523165
JA
1155 fprintf(ofp, " Reads Completed: %s, %siB\t", size_cnv(x, ios->creads, 0), size_cnv(y, ios->cread_kb, 1));
1156 fprintf(ofp, " Writes Completed: %s, %siB\n", size_cnv(x, ios->cwrites, 0), size_cnv(y, ios->cwrite_kb, 1));
152f6476 1157 fprintf(ofp, " Read Merges: %'8lu%8c\t", ios->mreads, ' ');
152f6476 1158 fprintf(ofp, " Write Merges: %'8lu\n", ios->mwrites);
06639b27
JA
1159 fprintf(ofp, " IO unplugs: %'8lu%8c\t", ios->io_unplugs, ' ');
1160 fprintf(ofp, " Timer unplugs: %'8lu\n", ios->timer_unplugs);
5c017e4b
JA
1161}
1162
50adc0ba
JA
1163static void dump_wait_stats(struct per_process_info *ppi)
1164{
b9d40d6f
JA
1165 unsigned long rawait = ppi->longest_allocation_wait[0] / 1000;
1166 unsigned long rdwait = ppi->longest_dispatch_wait[0] / 1000;
1167 unsigned long rcwait = ppi->longest_completion_wait[0] / 1000;
1168 unsigned long wawait = ppi->longest_allocation_wait[1] / 1000;
1169 unsigned long wdwait = ppi->longest_dispatch_wait[1] / 1000;
1170 unsigned long wcwait = ppi->longest_completion_wait[1] / 1000;
1171
1172 fprintf(ofp, " Allocation wait: %'8lu%8c\t", rawait, ' ');
1173 fprintf(ofp, " Allocation wait: %'8lu\n", wawait);
1174 fprintf(ofp, " Dispatch wait: %'8lu%8c\t", rdwait, ' ');
1175 fprintf(ofp, " Dispatch wait: %'8lu\n", wdwait);
1176 fprintf(ofp, " Completion wait: %'8lu%8c\t", rcwait, ' ');
1177 fprintf(ofp, " Completion wait: %'8lu\n", wcwait);
50adc0ba
JA
1178}
1179
886ecf0e
JA
1180static int ppi_name_compare(const void *p1, const void *p2)
1181{
1182 struct per_process_info *ppi1 = *((struct per_process_info **) p1);
1183 struct per_process_info *ppi2 = *((struct per_process_info **) p2);
1184 int res;
1185
1186 res = strverscmp(ppi1->name, ppi2->name);
1187 if (!res)
06e6f286 1188 res = ppi1->pid > ppi2->pid;
886ecf0e
JA
1189
1190 return res;
1191}
1192
1193static void sort_process_list(void)
1194{
1195 struct per_process_info **ppis;
1196 struct per_process_info *ppi;
1197 int i = 0;
1198
1199 ppis = malloc(ppi_list_entries * sizeof(struct per_process_info *));
1200
1201 ppi = ppi_list;
1202 while (ppi) {
06e6f286 1203 ppis[i++] = ppi;
886ecf0e
JA
1204 ppi = ppi->list_next;
1205 }
1206
06e6f286 1207 qsort(ppis, ppi_list_entries, sizeof(ppi), ppi_name_compare);
886ecf0e
JA
1208
1209 i = ppi_list_entries - 1;
1210 ppi_list = NULL;
1211 while (i >= 0) {
1212 ppi = ppis[i];
1213
1214 ppi->list_next = ppi_list;
1215 ppi_list = ppi;
1216 i--;
1217 }
50c38702
JA
1218
1219 free(ppis);
886ecf0e
JA
1220}
1221
152f6476
JA
1222static void show_process_stats(void)
1223{
1224 struct per_process_info *ppi;
1225
886ecf0e
JA
1226 sort_process_list();
1227
152f6476
JA
1228 ppi = ppi_list;
1229 while (ppi) {
ce8b6b4f
JA
1230 char name[64];
1231
715d8021 1232 if (ppi->more_than_one)
bf0720af 1233 sprintf(name, "%s (%u, ...)", ppi->name, ppi->pid);
715d8021
JA
1234 else
1235 sprintf(name, "%s (%u)", ppi->name, ppi->pid);
bf0720af 1236
ce8b6b4f 1237 dump_io_stats(&ppi->io_stats, name);
50adc0ba 1238 dump_wait_stats(ppi);
152f6476
JA
1239 ppi = ppi->list_next;
1240 }
1241
1242 fprintf(ofp, "\n");
1243}
1244
e7c9f3ff 1245static void show_device_and_cpu_stats(void)
d0ca268b 1246{
e7c9f3ff
NS
1247 struct per_dev_info *pdi;
1248 struct per_cpu_info *pci;
1249 struct io_stats total, *ios;
20ed6177 1250 unsigned long long rrate, wrate, msec;
e7c9f3ff
NS
1251 int i, j, pci_events;
1252 char line[3 + 8/*cpu*/ + 2 + 32/*dev*/ + 3];
1253 char name[32];
1254
1255 for (pdi = devices, i = 0; i < ndevices; i++, pdi++) {
1256
1257 memset(&total, 0, sizeof(total));
1258 pci_events = 0;
1259
1260 if (i > 0)
1261 fprintf(ofp, "\n");
1262
1263 for (pci = pdi->cpus, j = 0; j < pdi->ncpus; j++, pci++) {
1264 if (!pci->nelems)
1265 continue;
1266
1267 ios = &pci->io_stats;
1268 total.qreads += ios->qreads;
1269 total.qwrites += ios->qwrites;
1270 total.creads += ios->creads;
1271 total.cwrites += ios->cwrites;
1272 total.mreads += ios->mreads;
1273 total.mwrites += ios->mwrites;
1274 total.ireads += ios->ireads;
1275 total.iwrites += ios->iwrites;
4054070a
JA
1276 total.rrqueue += ios->rrqueue;
1277 total.wrqueue += ios->wrqueue;
e7c9f3ff
NS
1278 total.qread_kb += ios->qread_kb;
1279 total.qwrite_kb += ios->qwrite_kb;
1280 total.cread_kb += ios->cread_kb;
1281 total.cwrite_kb += ios->cwrite_kb;
1282 total.iread_kb += ios->iread_kb;
1283 total.iwrite_kb += ios->iwrite_kb;
06639b27
JA
1284 total.timer_unplugs += ios->timer_unplugs;
1285 total.io_unplugs += ios->io_unplugs;
e7c9f3ff
NS
1286
1287 snprintf(line, sizeof(line) - 1, "CPU%d (%s):",
1288 j, get_dev_name(pdi, name, sizeof(name)));
1289 dump_io_stats(ios, line);
1290 pci_events++;
1291 }
5c017e4b 1292
e7c9f3ff
NS
1293 if (pci_events > 1) {
1294 fprintf(ofp, "\n");
1295 snprintf(line, sizeof(line) - 1, "Total (%s):",
1296 get_dev_name(pdi, name, sizeof(name)));
1297 dump_io_stats(&total, line);
1298 }
d0ca268b 1299
20ed6177 1300 wrate = rrate = 0;
20ed6177
JA
1301 msec = (pdi->last_reported_time - pdi->first_reported_time) / 1000000;
1302 if (msec) {
1303 rrate = 1000 * total.cread_kb / msec;
1304 wrate = 1000 * total.cwrite_kb / msec;
1305 }
1306
dce0f678
AB
1307 fprintf(ofp, "\nThroughput (R/W): %'LuKiB/s / %'LuKiB/s\n",
1308 rrate, wrate);
1309 fprintf(ofp, "Events (%s): %'Lu entries\n",
1310 get_dev_name(pdi, line, sizeof(line)), pdi->events);
1311 fprintf(ofp, "Skips: %'lu forward (%'llu - %5.1lf%%) %'lu backward (%'llu - %5.1lf%%)\n",
1312 pdi->skips,pdi->seq_skips,
1313 100.0 * ((double)pdi->seq_skips /
1314 (double)(pdi->events + pdi->seq_skips)),
1315 pdi->nskips,pdi->seq_nskips,
1316 100.0 * ((double)pdi->seq_nskips /
1317 (double)(pdi->events + pdi->seq_skips)));
e7c9f3ff 1318 }
d0ca268b
JA
1319}
1320
d36421e4
JA
1321/*
1322 * struct trace and blktrace allocation cache, we do potentially
1323 * millions of mallocs for these structures while only using at most
1324 * a few thousand at the time
1325 */
1326static inline void t_free(struct trace *t)
1327{
3c8413cb 1328 if (t_alloc_cache < 1024) {
1d24fc14
JA
1329 t->next = t_alloc_list;
1330 t_alloc_list = t;
3c8413cb
JA
1331 t_alloc_cache++;
1332 } else
1333 free(t);
d36421e4
JA
1334}
1335
1336static inline struct trace *t_alloc(void)
1337{
1338 struct trace *t = t_alloc_list;
1339
1340 if (t) {
1341 t_alloc_list = t->next;
3c8413cb 1342 t_alloc_cache--;
d36421e4
JA
1343 return t;
1344 }
1345
1346 return malloc(sizeof(*t));
1347}
1348
1349static inline void bit_free(struct blk_io_trace *bit)
1350{
eb9bd4e9 1351 if (bit_alloc_cache < 1024 && !bit->pdu_len) {
1d24fc14
JA
1352 /*
1353 * abuse a 64-bit field for a next pointer for the free item
1354 */
1355 bit->time = (__u64) (unsigned long) bit_alloc_list;
1356 bit_alloc_list = (struct blk_io_trace *) bit;
3c8413cb
JA
1357 bit_alloc_cache++;
1358 } else
1359 free(bit);
d36421e4
JA
1360}
1361
1362static inline struct blk_io_trace *bit_alloc(void)
1363{
1364 struct blk_io_trace *bit = bit_alloc_list;
1365
1366 if (bit) {
0c39425c
NS
1367 bit_alloc_list = (struct blk_io_trace *) (unsigned long) \
1368 bit->time;
3c8413cb 1369 bit_alloc_cache--;
d36421e4
JA
1370 return bit;
1371 }
1372
1373 return malloc(sizeof(*bit));
1374}
1375
4f0ae44f
JA
1376static void find_genesis(void)
1377{
1378 struct trace *t = trace_list;
1379
1380 genesis_time = -1ULL;
1381 while (t != NULL) {
1382 if (t->bit->time < genesis_time)
1383 genesis_time = t->bit->time;
1384
1385 t = t->next;
1386 }
1387}
1388
7f4d89e6 1389static inline int check_stopwatch(struct blk_io_trace *bit)
4f0ae44f 1390{
7f4d89e6
JA
1391 if (bit->time < stopwatch_end &&
1392 bit->time >= stopwatch_start)
4f0ae44f
JA
1393 return 0;
1394
1395 return 1;
1396}
1397
53c68c88
JA
1398/*
1399 * return youngest entry read
1400 */
1401static int sort_entries(unsigned long long *youngest)
4f0ae44f
JA
1402{
1403 struct trace *t;
4f0ae44f
JA
1404
1405 if (!genesis_time)
1406 find_genesis();
1407
d6222db8 1408 *youngest = 0;
4f0ae44f
JA
1409 while ((t = trace_list) != NULL) {
1410 struct blk_io_trace *bit = t->bit;
1411
1412 trace_list = t->next;
1413
7f4d89e6 1414 bit->time -= genesis_time;
4f0ae44f 1415
d6222db8
JA
1416 if (bit->time < *youngest || !*youngest)
1417 *youngest = bit->time;
1418
774a1a10
JA
1419 if (bit->sequence < smallest_seq_read)
1420 smallest_seq_read = bit->sequence;
1421
7f4d89e6 1422 if (check_stopwatch(bit)) {
4f0ae44f
JA
1423 bit_free(bit);
1424 t_free(t);
1425 continue;
1426 }
1427
2a1b3424 1428 if (trace_rb_insert_sort(t))
53c68c88 1429 return -1;
4f0ae44f
JA
1430 }
1431
53c68c88 1432 return 0;
4f0ae44f
JA
1433}
1434
1c24add6
JA
1435static inline void __put_trace_last(struct per_dev_info *pdi, struct trace *t)
1436{
1437 rb_erase(&t->rb_node, &pdi->rb_last);
1438 pdi->rb_last_entries--;
1439
1440 bit_free(t->bit);
1441 t_free(t);
1442}
1443
1444static void put_trace(struct per_dev_info *pdi, struct trace *t)
2a1b3424
JA
1445{
1446 rb_erase(&t->rb_node, &rb_sort_root);
1447 rb_sort_entries--;
1448
1449 trace_rb_insert_last(pdi, t);
1450
1c24add6 1451 if (pdi->rb_last_entries > rb_batch * pdi->nfiles) {
2a1b3424
JA
1452 struct rb_node *n = rb_first(&pdi->rb_last);
1453
1454 t = rb_entry(n, struct trace, rb_node);
1c24add6 1455 __put_trace_last(pdi, t);
2a1b3424
JA
1456 }
1457}
1458
824c2b39
JA
1459/*
1460 * to continue, we must have traces from all online cpus in the tree
1461 */
1462static int check_cpu_map(struct per_dev_info *pdi)
1463{
1464 unsigned long *cpu_map;
1465 struct rb_node *n;
1466 struct trace *__t;
1467 unsigned int i;
1468 int ret, cpu;
1469
1470 /*
1471 * create a map of the cpus we have traces for
1472 */
1473 cpu_map = malloc(pdi->cpu_map_max / sizeof(long));
1474 n = rb_first(&rb_sort_root);
1475 while (n) {
1476 __t = rb_entry(n, struct trace, rb_node);
1477 cpu = __t->bit->cpu;
1478
1479 cpu_map[CPU_IDX(cpu)] |= (1UL << CPU_BIT(cpu));
1480 n = rb_next(n);
1481 }
1482
1483 /*
b1c8e614
JA
1484 * we can't continue if pdi->cpu_map has entries set that we don't
1485 * have in the sort rbtree. the opposite is not a problem, though
824c2b39
JA
1486 */
1487 ret = 0;
1488 for (i = 0; i < pdi->cpu_map_max / CPUS_PER_LONG; i++) {
1489 if (pdi->cpu_map[i] & ~(cpu_map[i])) {
1490 ret = 1;
1491 break;
1492 }
1493 }
1494
1495 free(cpu_map);
1496 return ret;
1497}
1498
a141a7cd 1499static int check_sequence(struct per_dev_info *pdi, struct trace *t, int force)
2a1b3424
JA
1500{
1501 unsigned long expected_sequence = pdi->last_sequence + 1;
1ca323a5
JA
1502 struct blk_io_trace *bit = t->bit;
1503 struct trace *__t;
d6222db8 1504
774a1a10 1505 if (!expected_sequence) {
774a1a10
JA
1506 /*
1507 * 1 should be the first entry, just allow it
1508 */
1509 if (bit->sequence == 1)
1510 return 0;
1511
824c2b39 1512 return check_cpu_map(pdi);
774a1a10 1513 }
2a1b3424
JA
1514
1515 if (bit->sequence == expected_sequence)
1516 return 0;
1517
2a1b3424 1518 /*
1c7c54aa
JA
1519 * we may not have seen that sequence yet. if we are not doing
1520 * the final run, break and wait for more entries.
1c24add6 1521 */
1c7c54aa 1522 if (expected_sequence < smallest_seq_read) {
1ca323a5
JA
1523 __t = trace_rb_find_last(pdi, expected_sequence);
1524 if (!__t)
1c7c54aa 1525 goto skip;
2a1b3424 1526
1ca323a5 1527 __put_trace_last(pdi, __t);
2a1b3424 1528 return 0;
a141a7cd
JA
1529 } else if (!force) {
1530 return 1;
0b07f23e 1531 } else {
1c7c54aa 1532skip:
57ea8602 1533 if (verbose) {
7d1c0411
JA
1534 fprintf(stderr, "(%d,%d): skipping %lu -> %u\n",
1535 MAJOR(pdi->dev), MINOR(pdi->dev),
1536 pdi->last_sequence, bit->sequence);
1537 }
dce0f678
AB
1538 if (bit->sequence > pdi->last_sequence) {
1539 pdi->skips++;
1540 pdi->seq_skips += (bit->sequence - pdi->last_sequence);
1541 } else {
1542 pdi->nskips++;
1543 pdi->seq_nskips += (pdi->last_sequence - bit->sequence);
1544 }
1c7c54aa
JA
1545 return 0;
1546 }
2a1b3424
JA
1547}
1548
a649216c 1549static void show_entries_rb(int force)
8fc0abbc 1550{
1f7afa72
JA
1551 struct per_dev_info *pdi = NULL;
1552 struct per_cpu_info *pci = NULL;
8fc0abbc 1553 struct blk_io_trace *bit;
3aabcd89 1554 struct rb_node *n;
8fc0abbc 1555 struct trace *t;
1f7afa72 1556
7d747d22 1557 while ((n = rb_first(&rb_sort_root)) != NULL) {
dd90748f 1558 if (is_done() && !force && !pipeline)
1f7afa72 1559 break;
8fc0abbc
JA
1560
1561 t = rb_entry(n, struct trace, rb_node);
1562 bit = t->bit;
1563
f7bd1a9b 1564 if (!pdi || pdi->dev != bit->device)
287fa3d6 1565 pdi = get_dev_info(bit->device);
1f7afa72 1566
e7c9f3ff
NS
1567 if (!pdi) {
1568 fprintf(stderr, "Unknown device ID? (%d,%d)\n",
1569 MAJOR(bit->device), MINOR(bit->device));
1570 break;
1571 }
1f7afa72 1572
a141a7cd
JA
1573 if (check_sequence(pdi, t, force))
1574 break;
cb2a1a62 1575
a141a7cd
JA
1576 if (!force && bit->time > last_allowed_time)
1577 break;
8fc0abbc 1578
be925321
JA
1579 pdi->last_sequence = bit->sequence;
1580
4f0ae44f 1581 check_time(pdi, bit);
8fc0abbc 1582
4f0ae44f
JA
1583 if (!pci || pci->cpu != bit->cpu)
1584 pci = get_cpu_info(pdi, bit->cpu);
287fa3d6 1585
cbc927b6
JA
1586 pci->nelems++;
1587
98f8386b
AB
1588 if (bit->action & (act_mask << BLK_TC_SHIFT))
1589 dump_trace(bit, pci, pdi);
87b72777 1590
2a1b3424 1591 put_trace(pdi, t);
cb2a1a62 1592 }
8fc0abbc
JA
1593}
1594
1f79c4a0
JA
1595static int read_data(int fd, void *buffer, int bytes, int block)
1596{
1597 int ret, bytes_left, fl;
1598 void *p;
1599
1600 fl = fcntl(fd, F_GETFL);
1601
1602 if (!block)
1603 fcntl(fd, F_SETFL, fl | O_NONBLOCK);
1604 else
1605 fcntl(fd, F_SETFL, fl & ~O_NONBLOCK);
1606
1607 bytes_left = bytes;
1608 p = buffer;
1609 while (bytes_left > 0) {
1610 ret = read(fd, p, bytes_left);
1611 if (!ret)
1612 return 1;
1613 else if (ret < 0) {
1614 if (errno != EAGAIN)
1615 perror("read");
a649216c 1616
1f79c4a0
JA
1617 return -1;
1618 } else {
1619 p += ret;
1620 bytes_left -= ret;
1621 }
1622 }
1623
1624 return 0;
1625}
1626
a649216c 1627static int read_events(int fd, int always_block)
cb2a1a62 1628{
287fa3d6 1629 struct per_dev_info *pdi = NULL;
e820abd7 1630 unsigned int events = 0;
7d747d22
JA
1631
1632 while (!is_done() && events < rb_batch) {
1633 struct blk_io_trace *bit;
1634 struct trace *t;
1635 int pdu_len;
1636 __u32 magic;
1637
d36421e4 1638 bit = bit_alloc();
cb2a1a62 1639
eb9bd4e9
JA
1640 if (read_data(fd, bit, sizeof(*bit), !events || always_block)) {
1641 bit_free(bit);
cb2a1a62 1642 break;
eb9bd4e9 1643 }
cb2a1a62 1644
7d747d22
JA
1645 magic = be32_to_cpu(bit->magic);
1646 if ((magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
1647 fprintf(stderr, "Bad magic %x\n", magic);
1648 break;
1649 }
1650
1651 pdu_len = be16_to_cpu(bit->pdu_len);
1652 if (pdu_len) {
1653 void *ptr = realloc(bit, sizeof(*bit) + pdu_len);
1654
eb9bd4e9
JA
1655 if (read_data(fd, ptr + sizeof(*bit), pdu_len, 1)) {
1656 bit_free(ptr);
7d747d22 1657 break;
eb9bd4e9 1658 }
7d747d22
JA
1659
1660 bit = ptr;
1661 }
1662
d6222db8
JA
1663 trace_to_cpu(bit);
1664
1665 if (verify_trace(bit)) {
1666 bit_free(bit);
1667 continue;
1668 }
1669
d36421e4 1670 t = t_alloc();
cb2a1a62
JA
1671 memset(t, 0, sizeof(*t));
1672 t->bit = bit;
1673
7d747d22
JA
1674 t->next = trace_list;
1675 trace_list = t;
1f7afa72 1676
f7bd1a9b 1677 if (!pdi || pdi->dev != bit->device)
287fa3d6
JA
1678 pdi = get_dev_info(bit->device);
1679
1680 if (bit->time > pdi->last_read_time)
1681 pdi->last_read_time = bit->time;
1682
7d747d22 1683 events++;
cb2a1a62
JA
1684 }
1685
7d747d22 1686 return events;
cb2a1a62
JA
1687}
1688
d5396421 1689static int do_file(void)
d0ca268b 1690{
7d747d22 1691 struct per_cpu_info *pci;
73877e12
NS
1692 struct per_dev_info *pdi;
1693 int i, j, events, events_added;
d0ca268b 1694
7d747d22
JA
1695 /*
1696 * first prepare all files for reading
1697 */
e8741a4a 1698 for (i = 0; i < ndevices; i++) {
73877e12
NS
1699 pdi = &devices[i];
1700 pdi->nfiles = 0;
d6222db8 1701 pdi->last_sequence = -1;
73877e12 1702
74feaeb5 1703 for (j = 0;; j++) {
e7c9f3ff 1704 struct stat st;
d1d7f15f 1705 int len = 0;
6e0073ed 1706 char *p, *dname;
87b72777 1707
e7c9f3ff
NS
1708 pci = get_cpu_info(pdi, j);
1709 pci->cpu = j;
7d747d22 1710 pci->fd = -1;
6e0073ed
JA
1711
1712 p = strdup(pdi->name);
1713 dname = dirname(p);
1714 if (strcmp(dname, ".")) {
1715 input_dir = dname;
1716 p = strdup(pdi->name);
1717 strcpy(pdi->name, basename(p));
1718 }
1719 free(p);
d0ca268b 1720
d1d7f15f
JA
1721 if (input_dir)
1722 len = sprintf(pci->fname, "%s/", input_dir);
1723
1724 snprintf(pci->fname + len, sizeof(pci->fname)-1-len,
7d747d22 1725 "%s.blktrace.%d", pdi->name, pci->cpu);
e7c9f3ff
NS
1726 if (stat(pci->fname, &st) < 0)
1727 break;
74feaeb5
AB
1728 if (st.st_size) {
1729 pci->fd = open(pci->fname, O_RDONLY);
1730 if (pci->fd < 0) {
1731 perror(pci->fname);
1732 continue;
1733 }
e7c9f3ff
NS
1734 }
1735
7d747d22 1736 printf("Input file %s added\n", pci->fname);
73877e12 1737 pdi->nfiles++;
824c2b39 1738 cpu_mark_online(pdi, pci->cpu);
d0ca268b 1739 }
d5396421
JA
1740 }
1741
7d747d22
JA
1742 /*
1743 * now loop over the files reading in the data
1744 */
412819ce 1745 do {
53c68c88
JA
1746 unsigned long long youngest;
1747
7d747d22 1748 events_added = 0;
287fa3d6 1749 last_allowed_time = -1ULL;
1c7c54aa 1750 smallest_seq_read = -1U;
d5396421 1751
7d747d22 1752 for (i = 0; i < ndevices; i++) {
73877e12
NS
1753 pdi = &devices[i];
1754
1755 for (j = 0; j < pdi->nfiles; j++) {
d5396421 1756
73877e12 1757 pci = get_cpu_info(pdi, j);
d5396421 1758
7d747d22
JA
1759 if (pci->fd == -1)
1760 continue;
51128a28 1761
a649216c 1762 events = read_events(pci->fd, 1);
7d747d22 1763 if (!events) {
824c2b39 1764 cpu_mark_offline(pdi, pci->cpu);
7d747d22
JA
1765 close(pci->fd);
1766 pci->fd = -1;
1767 continue;
1768 }
d5396421 1769
287fa3d6
JA
1770 if (pdi->last_read_time < last_allowed_time)
1771 last_allowed_time = pdi->last_read_time;
d5396421 1772
7d747d22
JA
1773 events_added += events;
1774 }
2ff323b0 1775 }
d5396421 1776
53c68c88
JA
1777 if (sort_entries(&youngest))
1778 break;
1779
1780 if (youngest > stopwatch_end)
287fa3d6
JA
1781 break;
1782
a649216c 1783 show_entries_rb(0);
cb2a1a62 1784
7d747d22 1785 } while (events_added);
d5396421 1786
a649216c
JA
1787 if (rb_sort_entries)
1788 show_entries_rb(1);
1789
7d747d22 1790 return 0;
412819ce 1791}
d5396421 1792
412819ce
JA
1793static int do_stdin(void)
1794{
53c68c88 1795 unsigned long long youngest;
763d936e 1796 int fd, events;
d5396421 1797
be925321 1798 last_allowed_time = -1ULL;
1f79c4a0 1799 fd = dup(STDIN_FILENO);
0b07f23e
JA
1800 if (fd == -1) {
1801 perror("dup stdin");
1802 return -1;
1803 }
d5396421 1804
0b07f23e 1805 while ((events = read_events(fd, 0)) != 0) {
412819ce 1806
0b07f23e
JA
1807 smallest_seq_read = -1U;
1808
53c68c88
JA
1809 if (sort_entries(&youngest))
1810 break;
1811
1812 if (youngest > stopwatch_end)
2ff323b0
JA
1813 break;
1814
763d936e 1815 show_entries_rb(0);
0b07f23e 1816 }
d5396421 1817
a649216c
JA
1818 if (rb_sort_entries)
1819 show_entries_rb(1);
1820
d5396421 1821 close(fd);
d5396421
JA
1822 return 0;
1823}
d0ca268b 1824
cbc927b6 1825static void show_stats(void)
412819ce 1826{
cbc927b6
JA
1827 if (!ofp)
1828 return;
1829 if (stats_printed)
1830 return;
1831
1832 stats_printed = 1;
1833
1834 if (per_process_stats)
1835 show_process_stats();
1836
1837 if (per_device_and_cpu_stats)
1838 show_device_and_cpu_stats();
1839
152f6476 1840 fflush(ofp);
412819ce
JA
1841}
1842
e820abd7 1843static void handle_sigint(__attribute__((__unused__)) int sig)
412819ce
JA
1844{
1845 done = 1;
cbc927b6 1846 show_stats();
412819ce
JA
1847}
1848
46e6968b
NS
1849/*
1850 * Extract start and duration times from a string, allowing
1851 * us to specify a time interval of interest within a trace.
1852 * Format: "duration" (start is zero) or "start:duration".
1853 */
1854static int find_stopwatch_interval(char *string)
1855{
1856 double value;
1857 char *sp;
1858
1859 value = strtod(string, &sp);
1860 if (sp == string) {
1861 fprintf(stderr,"Invalid stopwatch timer: %s\n", string);
1862 return 1;
1863 }
1864 if (*sp == ':') {
1865 stopwatch_start = DOUBLE_TO_NANO_ULL(value);
1866 string = sp + 1;
1867 value = strtod(string, &sp);
1868 if (sp == string || *sp != '\0') {
1869 fprintf(stderr,"Invalid stopwatch duration time: %s\n",
1870 string);
1871 return 1;
1872 }
1873 } else if (*sp != '\0') {
1874 fprintf(stderr,"Invalid stopwatch start timer: %s\n", string);
1875 return 1;
1876 }
1b928247
JA
1877 stopwatch_end = DOUBLE_TO_NANO_ULL(value);
1878 if (stopwatch_end <= stopwatch_start) {
1879 fprintf(stderr, "Invalid stopwatch interval: %Lu -> %Lu\n",
1880 stopwatch_start, stopwatch_end);
1881 return 1;
1882 }
1883
46e6968b
NS
1884 return 0;
1885}
1886
52724a0e
JA
1887static char usage_str[] = \
1888 "[ -i <input name> ] [-o <output name> [ -s ] [ -t ] [ -q ]\n" \
1889 "[ -w start:stop ] [ -f output format ] [ -F format spec ] [ -v] \n\n" \
1890 "\t-i Input file containing trace data, or '-' for stdin\n" \
d1d7f15f 1891 "\t-D Directory to prepend to input file names\n" \
52724a0e
JA
1892 "\t-o Output file. If not given, output is stdout\n" \
1893 "\t-b stdin read batching\n" \
1894 "\t-s Show per-program io statistics\n" \
bf0720af 1895 "\t-n Hash processes by name, not pid\n" \
52724a0e
JA
1896 "\t-t Track individual ios. Will tell you the time a request took\n" \
1897 "\t to get queued, to get dispatched, and to get completed\n" \
1898 "\t-q Quiet. Don't display any stats at the end of the trace\n" \
1899 "\t-w Only parse data between the given time interval in seconds.\n" \
1900 "\t If 'start' isn't given, blkparse defaults the start time to 0\n" \
d915dee6
JA
1901 "\t-f Output format. Customize the output format. The format field\n" \
1902 "\t identifies can be found in the documentation\n" \
52724a0e 1903 "\t-F Format specification. Can be found in the documentation\n" \
57ea8602
JA
1904 "\t-v More verbose for marginal errors\n" \
1905 "\t-V Print program version info\n\n";
52724a0e 1906
1f79c4a0
JA
1907static void usage(char *prog)
1908{
52724a0e 1909 fprintf(stderr, "Usage: %s %s %s", prog, blkparse_version, usage_str);
1f79c4a0
JA
1910}
1911
d5396421
JA
1912int main(int argc, char *argv[])
1913{
152f6476 1914 char *ofp_buffer;
98f8386b 1915 int i, c, ret, mode;
98f8386b 1916 int act_mask_tmp = 0;
d5396421
JA
1917
1918 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) != -1) {
1919 switch (c) {
98f8386b
AB
1920 case 'a':
1921 i = find_mask_map(optarg);
1922 if (i < 0) {
1923 fprintf(stderr,"Invalid action mask %s\n",
1924 optarg);
1925 return 1;
1926 }
1927 act_mask_tmp |= i;
1928 break;
1929
1930 case 'A':
1931 if ((sscanf(optarg, "%x", &i) != 1) ||
1932 !valid_act_opt(i)) {
1933 fprintf(stderr,
1934 "Invalid set action mask %s/0x%x\n",
1935 optarg, i);
1936 return 1;
1937 }
1938 act_mask_tmp = i;
1939 break;
d5396421 1940 case 'i':
e7c9f3ff
NS
1941 if (!strcmp(optarg, "-") && !pipeline)
1942 pipeline = 1;
1943 else if (resize_devices(optarg) != 0)
1944 return 1;
d5396421 1945 break;
d1d7f15f
JA
1946 case 'D':
1947 input_dir = optarg;
1948 break;
d5396421 1949 case 'o':
66efebf8 1950 output_name = optarg;
d5396421 1951 break;
79f19470
JA
1952 case 'b':
1953 rb_batch = atoi(optarg);
1954 if (rb_batch <= 0)
1955 rb_batch = RB_BATCH_DEFAULT;
1956 break;
152f6476
JA
1957 case 's':
1958 per_process_stats = 1;
1959 break;
7997c5b0
JA
1960 case 't':
1961 track_ios = 1;
1962 break;
1e1c60f1
NS
1963 case 'q':
1964 per_device_and_cpu_stats = 0;
1965 break;
46e6968b
NS
1966 case 'w':
1967 if (find_stopwatch_interval(optarg) != 0)
1968 return 1;
1969 break;
ab197ca7
AB
1970 case 'f':
1971 set_all_format_specs(optarg);
1972 break;
1973 case 'F':
1974 if (add_format_spec(optarg) != 0)
1975 return 1;
1976 break;
d915dee6 1977 case 'h':
715d8021 1978 ppi_hash_by_pid = 0;
bf0720af 1979 break;
52724a0e 1980 case 'v':
57ea8602
JA
1981 verbose++;
1982 break;
1983 case 'V':
52724a0e
JA
1984 printf("%s version %s\n", argv[0], blkparse_version);
1985 return 0;
d5396421 1986 default:
1f79c4a0 1987 usage(argv[0]);
d5396421
JA
1988 return 1;
1989 }
d0ca268b
JA
1990 }
1991
e7c9f3ff
NS
1992 while (optind < argc) {
1993 if (!strcmp(argv[optind], "-") && !pipeline)
1994 pipeline = 1;
1995 else if (resize_devices(argv[optind]) != 0)
1996 return 1;
1997 optind++;
1998 }
1999
2000 if (!pipeline && !ndevices) {
1f79c4a0 2001 usage(argv[0]);
d5396421
JA
2002 return 1;
2003 }
2004
98f8386b
AB
2005 if (act_mask_tmp != 0)
2006 act_mask = act_mask_tmp;
2007
7997c5b0 2008 memset(&rb_sort_root, 0, sizeof(rb_sort_root));
412819ce
JA
2009
2010 signal(SIGINT, handle_sigint);
2011 signal(SIGHUP, handle_sigint);
2012 signal(SIGTERM, handle_sigint);
d5396421 2013
d69db225
JA
2014 setlocale(LC_NUMERIC, "en_US");
2015
a66877e6 2016 if (!output_name) {
152f6476 2017 ofp = fdopen(STDOUT_FILENO, "w");
a66877e6
JA
2018 mode = _IOLBF;
2019 } else {
152f6476
JA
2020 char ofname[128];
2021
7a9690c0 2022 snprintf(ofname, sizeof(ofname) - 1, "%s", output_name);
152f6476 2023 ofp = fopen(ofname, "w");
a66877e6 2024 mode = _IOFBF;
152f6476
JA
2025 }
2026
2027 if (!ofp) {
2028 perror("fopen");
2029 return 1;
2030 }
2031
2032 ofp_buffer = malloc(4096);
a66877e6 2033 if (setvbuf(ofp, ofp_buffer, mode, 4096)) {
152f6476
JA
2034 perror("setvbuf");
2035 return 1;
2036 }
2037
e7c9f3ff 2038 if (pipeline)
d5396421
JA
2039 ret = do_stdin();
2040 else
2041 ret = do_file();
2042
cbc927b6 2043 show_stats();
eb9bd4e9 2044 free(ofp_buffer);
d5396421 2045 return ret;
d0ca268b 2046}