[PATCH] blkparse: improve skip detection
[blktrace.git] / blkparse.c
CommitLineData
d956a2cd
JA
1/*
2 * block queue tracing parse application
3 *
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
20 */
d0ca268b
JA
21#include <sys/types.h>
22#include <sys/stat.h>
23#include <unistd.h>
24#include <stdio.h>
25#include <fcntl.h>
26#include <stdlib.h>
8fc0abbc 27#include <string.h>
d5396421 28#include <getopt.h>
412819ce
JA
29#include <errno.h>
30#include <signal.h>
d69db225 31#include <locale.h>
46e6968b 32#include <limits.h>
d0ca268b 33
8fc0abbc
JA
34#include "blktrace.h"
35#include "rbtree.h"
bf0720af 36#include "jhash.h"
d0ca268b 37
52724a0e
JA
38static char blkparse_version[] = "0.90";
39
e7c9f3ff
NS
40struct per_dev_info {
41 dev_t id;
42 char *name;
43
44 int backwards;
45 unsigned long long events;
46 unsigned long long last_reported_time;
287fa3d6 47 unsigned long long last_read_time;
e7c9f3ff 48 struct io_stats io_stats;
cb2a1a62 49 unsigned long last_sequence;
dcf0f7ed 50 unsigned long skips;
e7c9f3ff 51
73877e12 52 int nfiles;
e7c9f3ff
NS
53 int ncpus;
54 struct per_cpu_info *cpus;
55};
56
152f6476
JA
57struct per_process_info {
58 char name[16];
59 __u32 pid;
60 struct io_stats io_stats;
61 struct per_process_info *hash_next, *list_next;
715d8021 62 int more_than_one;
50adc0ba
JA
63
64 /*
65 * individual io stats
66 */
b9d40d6f
JA
67 unsigned long long longest_allocation_wait[2];
68 unsigned long long longest_dispatch_wait[2];
69 unsigned long long longest_completion_wait[2];
d0ca268b
JA
70};
71
152f6476 72#define PPI_HASH_SHIFT (8)
bf0720af
JA
73#define PPI_HASH_SIZE (1 << PPI_HASH_SHIFT)
74#define PPI_HASH_MASK (PPI_HASH_SIZE - 1)
75static struct per_process_info *ppi_hash_table[PPI_HASH_SIZE];
152f6476 76static struct per_process_info *ppi_list;
886ecf0e 77static int ppi_list_entries;
152f6476 78
bf0720af 79#define S_OPTS "i:o:b:stqw:f:F:vn"
d5396421
JA
80static struct option l_opts[] = {
81 {
82 .name = "input",
428683db 83 .has_arg = required_argument,
d5396421
JA
84 .flag = NULL,
85 .val = 'i'
86 },
87 {
88 .name = "output",
428683db 89 .has_arg = required_argument,
d5396421
JA
90 .flag = NULL,
91 .val = 'o'
92 },
79f19470
JA
93 {
94 .name = "batch",
428683db 95 .has_arg = required_argument,
79f19470
JA
96 .flag = NULL,
97 .val = 'b'
98 },
152f6476
JA
99 {
100 .name = "per program stats",
428683db 101 .has_arg = no_argument,
152f6476
JA
102 .flag = NULL,
103 .val = 's'
104 },
7997c5b0
JA
105 {
106 .name = "track ios",
428683db 107 .has_arg = no_argument,
7997c5b0
JA
108 .flag = NULL,
109 .val = 't'
110 },
1e1c60f1
NS
111 {
112 .name = "quiet",
428683db 113 .has_arg = no_argument,
1e1c60f1
NS
114 .flag = NULL,
115 .val = 'q'
116 },
46e6968b
NS
117 {
118 .name = "stopwatch",
428683db 119 .has_arg = required_argument,
46e6968b
NS
120 .flag = NULL,
121 .val = 'w'
122 },
ab197ca7
AB
123 {
124 .name = "format",
428683db 125 .has_arg = required_argument,
ab197ca7
AB
126 .flag = NULL,
127 .val = 'f'
128 },
129 {
130 .name = "format-spec",
428683db 131 .has_arg = required_argument,
ab197ca7
AB
132 .flag = NULL,
133 .val = 'F'
134 },
bf0720af
JA
135 {
136 .name = "hash by name",
137 .has_arg = no_argument,
138 .flag = NULL,
139 .val = 'n'
140 },
52724a0e
JA
141 {
142 .name = "version",
143 .has_arg = no_argument,
144 .flag = NULL,
145 .val = 'v'
146 },
d5396421
JA
147};
148
7997c5b0
JA
149/*
150 * for sorting the displayed output
151 */
8fc0abbc
JA
152struct trace {
153 struct blk_io_trace *bit;
154 struct rb_node rb_node;
cb2a1a62 155 struct trace *next;
8fc0abbc
JA
156};
157
cb2a1a62 158static struct rb_root rb_sort_root;
a649216c
JA
159static unsigned long rb_sort_entries;
160
cb2a1a62
JA
161static struct rb_root rb_track_root;
162
163static struct trace *trace_list;
164
d36421e4
JA
165/*
166 * allocation cache
167 */
168static struct blk_io_trace *bit_alloc_list;
169static struct trace *t_alloc_list;
170
7997c5b0
JA
171/*
172 * for tracking individual ios
173 */
174struct io_track {
175 struct rb_node rb_node;
176
e7c9f3ff 177 dev_t device;
7997c5b0
JA
178 __u64 sector;
179 __u32 pid;
bf0720af 180 char comm[16];
95c15013 181 unsigned long long allocation_time;
7997c5b0
JA
182 unsigned long long queue_time;
183 unsigned long long dispatch_time;
184 unsigned long long completion_time;
185};
186
e7c9f3ff
NS
187static int ndevices;
188static struct per_dev_info *devices;
189static char *get_dev_name(struct per_dev_info *, char *, int);
d0ca268b 190
71d5d4c9 191FILE *ofp = NULL;
e7c9f3ff
NS
192static char *output_name;
193
194static unsigned long long genesis_time;
287fa3d6 195static unsigned long long last_allowed_time;
46e6968b
NS
196static unsigned long long stopwatch_start; /* start from zero by default */
197static unsigned long long stopwatch_end = ULONG_LONG_MAX; /* "infinity" */
152f6476
JA
198
199static int per_process_stats;
7997c5b0 200static int track_ios;
bf0720af 201static int ppi_hash_by_pid = 1;
d0ca268b 202
7d747d22 203#define RB_BATCH_DEFAULT (512)
79f19470
JA
204static int rb_batch = RB_BATCH_DEFAULT;
205
e7c9f3ff
NS
206static int pipeline;
207
412819ce
JA
208#define is_done() (*(volatile int *)(&done))
209static volatile int done;
210
bf0720af
JA
211#define JHASH_RANDOM (0x3af5f2ee)
212
213static inline int ppi_hash_pid(__u32 pid)
214{
215 return jhash_1word(pid, JHASH_RANDOM) & PPI_HASH_MASK;
216}
217
218static inline int ppi_hash_name(const char *name)
152f6476 219{
bf0720af
JA
220 return jhash(name, 16, JHASH_RANDOM) & PPI_HASH_MASK;
221}
222
223static inline int ppi_hash(struct per_process_info *ppi)
224{
225 if (ppi_hash_by_pid)
226 return ppi_hash_pid(ppi->pid);
227
bf0720af 228 return ppi_hash_name(ppi->name);
152f6476
JA
229}
230
231static inline void add_process_to_hash(struct per_process_info *ppi)
232{
bf0720af 233 const int hash_idx = ppi_hash(ppi);
152f6476 234
bf0720af
JA
235 ppi->hash_next = ppi_hash_table[hash_idx];
236 ppi_hash_table[hash_idx] = ppi;
152f6476
JA
237}
238
239static inline void add_process_to_list(struct per_process_info *ppi)
240{
241 ppi->list_next = ppi_list;
242 ppi_list = ppi;
886ecf0e 243 ppi_list_entries++;
152f6476
JA
244}
245
bf0720af
JA
246static struct per_process_info *find_process_by_name(char *name)
247{
248 const int hash_idx = ppi_hash_name(name);
249 struct per_process_info *ppi;
250
251 ppi = ppi_hash_table[hash_idx];
252 while (ppi) {
253 if (!strcmp(ppi->name, name))
254 return ppi;
255
256 ppi = ppi->hash_next;
257 }
258
259 return NULL;
260}
261
152f6476
JA
262static struct per_process_info *find_process_by_pid(__u32 pid)
263{
bf0720af 264 const int hash_idx = ppi_hash_pid(pid);
152f6476
JA
265 struct per_process_info *ppi;
266
bf0720af 267 ppi = ppi_hash_table[hash_idx];
152f6476
JA
268 while (ppi) {
269 if (ppi->pid == pid)
270 return ppi;
271
272 ppi = ppi->hash_next;
273 }
274
275 return NULL;
276}
277
bf0720af
JA
278static struct per_process_info *find_process(__u32 pid, char *name)
279{
715d8021
JA
280 struct per_process_info *ppi;
281
bf0720af
JA
282 if (ppi_hash_by_pid)
283 return find_process_by_pid(pid);
284
715d8021
JA
285 ppi = find_process_by_name(name);
286 if (ppi && ppi->pid != pid)
287 ppi->more_than_one = 1;
288
289 return ppi;
bf0720af
JA
290}
291
7997c5b0
JA
292static inline int trace_rb_insert(struct trace *t)
293{
294 struct rb_node **p = &rb_sort_root.rb_node;
295 struct rb_node *parent = NULL;
296 struct trace *__t;
297
298 while (*p) {
299 parent = *p;
300 __t = rb_entry(parent, struct trace, rb_node);
301
dcf0f7ed 302 if (t->bit->time < __t->bit->time)
e7c9f3ff
NS
303 p = &(*p)->rb_left;
304 else if (t->bit->time > __t->bit->time)
305 p = &(*p)->rb_right;
306 else if (t->bit->device < __t->bit->device)
307 p = &(*p)->rb_left;
308 else if (t->bit->device > __t->bit->device)
309 p = &(*p)->rb_right;
dcf0f7ed
JA
310 else if (t->bit->sequence < __t->bit->sequence)
311 p = &(*p)->rb_left;
312 else if (t->bit->sequence > __t->bit->sequence)
313 p = &(*p)->rb_right;
e7c9f3ff
NS
314 else if (t->bit->device == __t->bit->device) {
315 fprintf(stderr,
316 "sequence alias (%d) on device %d,%d!\n",
317 t->bit->sequence,
318 MAJOR(t->bit->device), MINOR(t->bit->device));
7997c5b0
JA
319 return 1;
320 }
321 }
322
a649216c 323 rb_sort_entries++;
7997c5b0
JA
324 rb_link_node(&t->rb_node, parent, p);
325 rb_insert_color(&t->rb_node, &rb_sort_root);
326 return 0;
327}
328
0583b6a2 329static struct trace *trace_rb_find(dev_t device, unsigned long sequence)
e3556946
JA
330{
331 struct rb_node **p = &rb_sort_root.rb_node;
332 struct rb_node *parent = NULL;
333 struct trace *__t;
334
335 while (*p) {
336 parent = *p;
337 __t = rb_entry(parent, struct trace, rb_node);
338
0583b6a2
JA
339 if (device < __t->bit->device)
340 p = &(*p)->rb_left;
341 else if (device > __t->bit->device)
342 p = &(*p)->rb_right;
343 else if (sequence < __t->bit->sequence)
e3556946
JA
344 p = &(*p)->rb_left;
345 else if (sequence > __t->bit->sequence)
346 p = &(*p)->rb_right;
347 else
348 return __t;
349 }
350
351 return NULL;
352}
353
7997c5b0
JA
354static inline int track_rb_insert(struct io_track *iot)
355{
356 struct rb_node **p = &rb_track_root.rb_node;
357 struct rb_node *parent = NULL;
358 struct io_track *__iot;
359
360 while (*p) {
361 parent = *p;
e7c9f3ff 362
7997c5b0
JA
363 __iot = rb_entry(parent, struct io_track, rb_node);
364
e7c9f3ff
NS
365 if (iot->device < __iot->device)
366 p = &(*p)->rb_left;
367 else if (iot->device > __iot->device)
368 p = &(*p)->rb_right;
369 else if (iot->sector < __iot->sector)
7997c5b0
JA
370 p = &(*p)->rb_left;
371 else if (iot->sector > __iot->sector)
372 p = &(*p)->rb_right;
373 else {
e7c9f3ff 374 fprintf(stderr,
ab197ca7
AB
375 "sector alias (%Lu) on device %d,%d!\n",
376 (unsigned long long) iot->sector,
e7c9f3ff 377 MAJOR(iot->device), MINOR(iot->device));
7997c5b0
JA
378 return 1;
379 }
380 }
381
382 rb_link_node(&iot->rb_node, parent, p);
383 rb_insert_color(&iot->rb_node, &rb_track_root);
384 return 0;
385}
386
e7c9f3ff 387static struct io_track *__find_track(dev_t device, __u64 sector)
7997c5b0
JA
388{
389 struct rb_node **p = &rb_track_root.rb_node;
390 struct rb_node *parent = NULL;
391 struct io_track *__iot;
392
393 while (*p) {
394 parent = *p;
395
396 __iot = rb_entry(parent, struct io_track, rb_node);
397
e7c9f3ff
NS
398 if (device < __iot->device)
399 p = &(*p)->rb_left;
400 else if (device > __iot->device)
401 p = &(*p)->rb_right;
402 else if (sector < __iot->sector)
7997c5b0
JA
403 p = &(*p)->rb_left;
404 else if (sector > __iot->sector)
405 p = &(*p)->rb_right;
406 else
407 return __iot;
408 }
409
410 return NULL;
411}
412
bf0720af
JA
413static struct io_track *find_track(__u32 pid, char *comm, dev_t device,
414 __u64 sector)
7997c5b0 415{
916b5501 416 struct io_track *iot;
7997c5b0 417
e7c9f3ff 418 iot = __find_track(device, sector);
7997c5b0
JA
419 if (!iot) {
420 iot = malloc(sizeof(*iot));
50adc0ba 421 iot->pid = pid;
bf0720af 422 memcpy(iot->comm, comm, sizeof(iot->comm));
e7c9f3ff 423 iot->device = device;
7997c5b0
JA
424 iot->sector = sector;
425 track_rb_insert(iot);
426 }
427
428 return iot;
429}
430
a01516de 431static void log_track_frontmerge(struct blk_io_trace *t)
2e3e8ded
JA
432{
433 struct io_track *iot;
434
435 if (!track_ios)
436 return;
2e3e8ded 437
a01516de 438 iot = __find_track(t->device, t->sector + (t->bytes >> 9));
cb2a1a62 439 if (!iot) {
acd70d21
JA
440 fprintf(stderr, "merge not found for (%d,%d): %llu\n",
441 MAJOR(t->device), MINOR(t->device),
442 t->sector + (t->bytes >> 9));
cb2a1a62 443 return;
2e3e8ded 444 }
cb2a1a62
JA
445
446 rb_erase(&iot->rb_node, &rb_track_root);
a01516de 447 iot->sector -= t->bytes >> 9;
cb2a1a62 448 track_rb_insert(iot);
2e3e8ded
JA
449}
450
95c15013 451static void log_track_getrq(struct blk_io_trace *t)
2e3e8ded
JA
452{
453 struct io_track *iot;
454
455 if (!track_ios)
456 return;
457
bf0720af 458 iot = find_track(t->pid, t->comm, t->device, t->sector);
95c15013
JA
459 iot->allocation_time = t->time;
460}
461
95c15013 462/*
b6076a9b 463 * return time between rq allocation and insertion
95c15013 464 */
b6076a9b 465static unsigned long long log_track_insert(struct blk_io_trace *t)
95c15013 466{
50adc0ba 467 unsigned long long elapsed;
95c15013
JA
468 struct io_track *iot;
469
470 if (!track_ios)
471 return -1;
472
bf0720af 473 iot = find_track(t->pid, t->comm, t->device, t->sector);
2e3e8ded 474 iot->queue_time = t->time;
acd70d21
JA
475
476 if (!iot->allocation_time)
477 return -1;
478
50adc0ba
JA
479 elapsed = iot->queue_time - iot->allocation_time;
480
481 if (per_process_stats) {
bf0720af 482 struct per_process_info *ppi = find_process(iot->pid,iot->comm);
b9d40d6f 483 int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
50adc0ba 484
b9d40d6f
JA
485 if (ppi && elapsed > ppi->longest_allocation_wait[w])
486 ppi->longest_allocation_wait[w] = elapsed;
50adc0ba
JA
487 }
488
489 return elapsed;
2e3e8ded
JA
490}
491
492/*
493 * return time between queue and issue
494 */
495static unsigned long long log_track_issue(struct blk_io_trace *t)
496{
50adc0ba 497 unsigned long long elapsed;
2e3e8ded
JA
498 struct io_track *iot;
499
500 if (!track_ios)
501 return -1;
502 if ((t->action & BLK_TC_ACT(BLK_TC_FS)) == 0)
503 return -1;
504
e7c9f3ff 505 iot = __find_track(t->device, t->sector);
cb2a1a62 506 if (!iot) {
acd70d21
JA
507 fprintf(stderr, "issue not found for (%d,%d): %llu\n",
508 MAJOR(t->device), MINOR(t->device), t->sector);
2e3e8ded 509 return -1;
cb2a1a62 510 }
2e3e8ded
JA
511
512 iot->dispatch_time = t->time;
50adc0ba
JA
513 elapsed = iot->dispatch_time - iot->queue_time;
514
515 if (per_process_stats) {
bf0720af 516 struct per_process_info *ppi = find_process(iot->pid,iot->comm);
b9d40d6f 517 int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
50adc0ba 518
b9d40d6f
JA
519 if (ppi && elapsed > ppi->longest_dispatch_wait[w])
520 ppi->longest_dispatch_wait[w] = elapsed;
50adc0ba
JA
521 }
522
523 return elapsed;
2e3e8ded
JA
524}
525
526/*
527 * return time between dispatch and complete
528 */
529static unsigned long long log_track_complete(struct blk_io_trace *t)
530{
531 unsigned long long elapsed;
532 struct io_track *iot;
533
534 if (!track_ios)
535 return -1;
536 if ((t->action & BLK_TC_ACT(BLK_TC_FS)) == 0)
537 return -1;
538
e7c9f3ff 539 iot = __find_track(t->device, t->sector);
cb2a1a62 540 if (!iot) {
acd70d21
JA
541 fprintf(stderr, "complete not found for (%d,%d): %llu\n",
542 MAJOR(t->device), MINOR(t->device), t->sector);
2e3e8ded 543 return -1;
cb2a1a62 544 }
2e3e8ded
JA
545
546 iot->completion_time = t->time;
547 elapsed = iot->completion_time - iot->dispatch_time;
548
50adc0ba 549 if (per_process_stats) {
bf0720af 550 struct per_process_info *ppi = find_process(iot->pid,iot->comm);
b9d40d6f 551 int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
50adc0ba 552
b9d40d6f
JA
553 if (ppi && elapsed > ppi->longest_completion_wait[w])
554 ppi->longest_completion_wait[w] = elapsed;
50adc0ba
JA
555 }
556
2e3e8ded
JA
557 /*
558 * kill the trace, we don't need it after completion
559 */
560 rb_erase(&iot->rb_node, &rb_track_root);
561 free(iot);
562
563 return elapsed;
564}
565
566
152f6476
JA
567static struct io_stats *find_process_io_stats(__u32 pid, char *name)
568{
bf0720af 569 struct per_process_info *ppi = find_process(pid, name);
152f6476
JA
570
571 if (!ppi) {
572 ppi = malloc(sizeof(*ppi));
573 memset(ppi, 0, sizeof(*ppi));
bf0720af 574 memcpy(ppi->name, name, 16);
152f6476
JA
575 ppi->pid = pid;
576 add_process_to_hash(ppi);
577 add_process_to_list(ppi);
578 }
579
580 return &ppi->io_stats;
581}
582
e7c9f3ff 583static void resize_cpu_info(struct per_dev_info *pdi, int cpu)
a718bd37 584{
e7c9f3ff
NS
585 struct per_cpu_info *cpus = pdi->cpus;
586 int ncpus = pdi->ncpus;
587 int new_count = cpu + 1;
588 int new_space, size;
a718bd37
NS
589 char *new_start;
590
e7c9f3ff
NS
591 size = new_count * sizeof(struct per_cpu_info);
592 cpus = realloc(cpus, size);
593 if (!cpus) {
594 char name[20];
595 fprintf(stderr, "Out of memory, CPU info for device %s (%d)\n",
596 get_dev_name(pdi, name, sizeof(name)), size);
a718bd37
NS
597 exit(1);
598 }
599
e7c9f3ff
NS
600 new_start = (char *)cpus + (ncpus * sizeof(struct per_cpu_info));
601 new_space = (new_count - ncpus) * sizeof(struct per_cpu_info);
a718bd37 602 memset(new_start, 0, new_space);
e7c9f3ff
NS
603
604 pdi->ncpus = new_count;
605 pdi->cpus = cpus;
606}
cb2a1a62 607
e7c9f3ff
NS
608static struct per_cpu_info *get_cpu_info(struct per_dev_info *pdi, int cpu)
609{
cb2a1a62
JA
610 struct per_cpu_info *pci;
611
e7c9f3ff
NS
612 if (cpu >= pdi->ncpus)
613 resize_cpu_info(pdi, cpu);
cb2a1a62
JA
614
615 pci = &pdi->cpus[cpu];
616 pci->cpu = cpu;
617 return pci;
a718bd37
NS
618}
619
e7c9f3ff
NS
620
621static int resize_devices(char *name)
a718bd37 622{
e7c9f3ff 623 int size = (ndevices + 1) * sizeof(struct per_dev_info);
c499bf38 624
e7c9f3ff
NS
625 devices = realloc(devices, size);
626 if (!devices) {
627 fprintf(stderr, "Out of memory, device %s (%d)\n", name, size);
628 return 1;
629 }
630 memset(&devices[ndevices], 0, sizeof(struct per_dev_info));
631 devices[ndevices].name = name;
632 ndevices++;
633 return 0;
634}
a718bd37 635
cb2a1a62 636static struct per_dev_info *get_dev_info(dev_t id)
e7c9f3ff 637{
cb2a1a62 638 struct per_dev_info *pdi;
e7c9f3ff 639 int i;
c499bf38 640
7d747d22
JA
641 for (i = 0; i < ndevices; i++) {
642 if (!devices[i].id)
643 devices[i].id = id;
e7c9f3ff
NS
644 if (devices[i].id == id)
645 return &devices[i];
7d747d22 646 }
cb2a1a62 647
e7c9f3ff
NS
648 if (resize_devices(NULL) != 0)
649 return NULL;
cb2a1a62
JA
650
651 pdi = &devices[ndevices - 1];
652 pdi->id = id;
0583b6a2 653 pdi->last_sequence = 0;
287fa3d6 654 pdi->last_read_time = 0;
cb2a1a62 655 return pdi;
a718bd37
NS
656}
657
e7c9f3ff
NS
658static char *get_dev_name(struct per_dev_info *pdi, char *buffer, int size)
659{
660 if (pdi->name)
661 snprintf(buffer, size, "%s", pdi->name);
662 else
663 snprintf(buffer, size, "%d,%d", MAJOR(pdi->id), MINOR(pdi->id));
664 return buffer;
665}
666
e7c9f3ff 667static void check_time(struct per_dev_info *pdi, struct blk_io_trace *bit)
cfab07eb
AB
668{
669 unsigned long long this = bit->time;
e7c9f3ff 670 unsigned long long last = pdi->last_reported_time;
cfab07eb 671
e7c9f3ff
NS
672 pdi->backwards = (this < last) ? 'B' : ' ';
673 pdi->last_reported_time = this;
cfab07eb
AB
674}
675
152f6476
JA
676static inline void __account_m(struct io_stats *ios, struct blk_io_trace *t,
677 int rw)
d0ca268b
JA
678{
679 if (rw) {
152f6476
JA
680 ios->mwrites++;
681 ios->qwrite_kb += t->bytes >> 10;
d0ca268b 682 } else {
152f6476
JA
683 ios->mreads++;
684 ios->qread_kb += t->bytes >> 10;
685 }
686}
687
688static inline void account_m(struct blk_io_trace *t, struct per_cpu_info *pci,
689 int rw)
690{
691 __account_m(&pci->io_stats, t, rw);
692
693 if (per_process_stats) {
694 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
695
696 __account_m(ios, t, rw);
d0ca268b
JA
697 }
698}
699
b6076a9b
JA
700static inline void __account_queue(struct io_stats *ios, struct blk_io_trace *t,
701 int rw)
d0ca268b
JA
702{
703 if (rw) {
152f6476
JA
704 ios->qwrites++;
705 ios->qwrite_kb += t->bytes >> 10;
d0ca268b 706 } else {
152f6476
JA
707 ios->qreads++;
708 ios->qread_kb += t->bytes >> 10;
709 }
710}
711
b6076a9b
JA
712static inline void account_queue(struct blk_io_trace *t,
713 struct per_cpu_info *pci, int rw)
152f6476 714{
b6076a9b 715 __account_queue(&pci->io_stats, t, rw);
152f6476
JA
716
717 if (per_process_stats) {
718 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
719
b6076a9b 720 __account_queue(ios, t, rw);
d0ca268b
JA
721 }
722}
723
152f6476 724static inline void __account_c(struct io_stats *ios, int rw, unsigned int bytes)
d0ca268b
JA
725{
726 if (rw) {
152f6476
JA
727 ios->cwrites++;
728 ios->cwrite_kb += bytes >> 10;
d0ca268b 729 } else {
152f6476
JA
730 ios->creads++;
731 ios->cread_kb += bytes >> 10;
732 }
733}
734
735static inline void account_c(struct blk_io_trace *t, struct per_cpu_info *pci,
736 int rw, int bytes)
737{
738 __account_c(&pci->io_stats, rw, bytes);
739
740 if (per_process_stats) {
741 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
742
743 __account_c(ios, rw, bytes);
d0ca268b
JA
744 }
745}
746
b6076a9b
JA
747static inline void __account_issue(struct io_stats *ios, int rw,
748 unsigned int bytes)
afd2d7ad 749{
750 if (rw) {
152f6476
JA
751 ios->iwrites++;
752 ios->iwrite_kb += bytes >> 10;
afd2d7ad 753 } else {
152f6476
JA
754 ios->ireads++;
755 ios->iread_kb += bytes >> 10;
afd2d7ad 756 }
757}
758
b6076a9b
JA
759static inline void account_issue(struct blk_io_trace *t,
760 struct per_cpu_info *pci, int rw)
d0ca268b 761{
b6076a9b 762 __account_issue(&pci->io_stats, rw, t->bytes);
152f6476
JA
763
764 if (per_process_stats) {
765 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
d5396421 766
b6076a9b 767 __account_issue(ios, rw, t->bytes);
152f6476
JA
768 }
769}
770
06639b27
JA
771static inline void __account_unplug(struct io_stats *ios, int timer)
772{
773 if (timer)
774 ios->timer_unplugs++;
775 else
776 ios->io_unplugs++;
777}
778
779static inline void account_unplug(struct blk_io_trace *t,
780 struct per_cpu_info *pci, int timer)
781{
782 __account_unplug(&pci->io_stats, timer);
783
784 if (per_process_stats) {
785 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
786
787 __account_unplug(ios, timer);
788 }
789}
790
ab197ca7
AB
791static void log_complete(struct per_cpu_info *pci, struct blk_io_trace *t,
792 char *act)
793{
794 process_fmt(act, pci, t, log_track_complete(t), 0, NULL);
795}
796
b6076a9b
JA
797static void log_insert(struct per_cpu_info *pci, struct blk_io_trace *t,
798 char *act)
799{
800 process_fmt(act, pci, t, log_track_insert(t), 0, NULL);
801}
802
ab197ca7
AB
803static void log_queue(struct per_cpu_info *pci, struct blk_io_trace *t,
804 char *act)
805{
b6076a9b 806 process_fmt(act, pci, t, -1, 0, NULL);
ab197ca7 807}
2e3e8ded 808
ab197ca7
AB
809static void log_issue(struct per_cpu_info *pci, struct blk_io_trace *t,
810 char *act)
811{
812 process_fmt(act, pci, t, log_track_issue(t), 0, NULL);
d0ca268b
JA
813}
814
d5396421 815static void log_merge(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 816 char *act)
d0ca268b 817{
a01516de
JA
818 if (act[0] == 'F')
819 log_track_frontmerge(t);
2e3e8ded 820
ab197ca7 821 process_fmt(act, pci, t, -1ULL, 0, NULL);
d0ca268b
JA
822}
823
dfe34da1 824static void log_action(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 825 char *act)
dfe34da1 826{
ab197ca7 827 process_fmt(act, pci, t, -1ULL, 0, NULL);
dfe34da1
JA
828}
829
d5396421 830static void log_generic(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 831 char *act)
d0ca268b 832{
ab197ca7 833 process_fmt(act, pci, t, -1ULL, 0, NULL);
d0ca268b
JA
834}
835
ab197ca7 836static void log_unplug(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 837 char *act)
67e14fdc 838{
ab197ca7 839 process_fmt(act, pci, t, -1ULL, 0, NULL);
67e14fdc
JA
840}
841
93f1c611
JA
842static void log_split(struct per_cpu_info *pci, struct blk_io_trace *t,
843 char *act)
844{
845 process_fmt(act, pci, t, -1ULL, 0, NULL);
846}
847
ab197ca7 848static void log_pc(struct per_cpu_info *pci, struct blk_io_trace *t, char *act)
d0ca268b 849{
ab197ca7 850 unsigned char *buf = (unsigned char *) t + sizeof(*t);
d0ca268b 851
ab197ca7 852 process_fmt(act, pci, t, -1ULL, t->pdu_len, buf);
d0ca268b
JA
853}
854
ff3a732c 855static void dump_trace_pc(struct blk_io_trace *t, struct per_cpu_info *pci)
d0ca268b 856{
56f2af81
JA
857 int act = t->action & 0xffff;
858
859 switch (act) {
d0ca268b 860 case __BLK_TA_QUEUE:
3639a11e 861 log_generic(pci, t, "Q");
d0ca268b
JA
862 break;
863 case __BLK_TA_GETRQ:
3639a11e 864 log_generic(pci, t, "G");
d0ca268b
JA
865 break;
866 case __BLK_TA_SLEEPRQ:
3639a11e 867 log_generic(pci, t, "S");
d0ca268b
JA
868 break;
869 case __BLK_TA_REQUEUE:
3639a11e 870 log_generic(pci, t, "R");
d0ca268b
JA
871 break;
872 case __BLK_TA_ISSUE:
ab197ca7 873 log_pc(pci, t, "D");
d0ca268b
JA
874 break;
875 case __BLK_TA_COMPLETE:
3639a11e 876 log_pc(pci, t, "C");
d0ca268b 877 break;
56f2af81
JA
878 case __BLK_TA_INSERT:
879 log_pc(pci, t, "I");
880 break;
d0ca268b 881 default:
56f2af81 882 fprintf(stderr, "Bad pc action %x\n", act);
87b72777 883 break;
d0ca268b 884 }
d0ca268b
JA
885}
886
d5396421 887static void dump_trace_fs(struct blk_io_trace *t, struct per_cpu_info *pci)
d0ca268b
JA
888{
889 int w = t->action & BLK_TC_ACT(BLK_TC_WRITE);
7997c5b0 890 int act = t->action & 0xffff;
d0ca268b 891
7997c5b0 892 switch (act) {
d0ca268b 893 case __BLK_TA_QUEUE:
b6076a9b 894 account_queue(t, pci, w);
3639a11e 895 log_queue(pci, t, "Q");
d0ca268b 896 break;
b6076a9b
JA
897 case __BLK_TA_INSERT:
898 log_insert(pci, t, "I");
899 break;
d0ca268b 900 case __BLK_TA_BACKMERGE:
152f6476 901 account_m(t, pci, w);
3639a11e 902 log_merge(pci, t, "M");
d0ca268b
JA
903 break;
904 case __BLK_TA_FRONTMERGE:
152f6476 905 account_m(t, pci, w);
3639a11e 906 log_merge(pci, t, "F");
d0ca268b
JA
907 break;
908 case __BLK_TA_GETRQ:
95c15013 909 log_track_getrq(t);
3639a11e 910 log_generic(pci, t, "G");
d0ca268b
JA
911 break;
912 case __BLK_TA_SLEEPRQ:
3639a11e 913 log_generic(pci, t, "S");
d0ca268b
JA
914 break;
915 case __BLK_TA_REQUEUE:
152f6476 916 account_c(t, pci, w, -t->bytes);
3639a11e 917 log_queue(pci, t, "R");
d0ca268b
JA
918 break;
919 case __BLK_TA_ISSUE:
b6076a9b 920 account_issue(t, pci, w);
3639a11e 921 log_issue(pci, t, "D");
d0ca268b
JA
922 break;
923 case __BLK_TA_COMPLETE:
152f6476 924 account_c(t, pci, w, t->bytes);
3639a11e 925 log_complete(pci, t, "C");
d0ca268b 926 break;
88b1a526 927 case __BLK_TA_PLUG:
3639a11e 928 log_action(pci, t, "P");
88b1a526 929 break;
3639a11e 930 case __BLK_TA_UNPLUG_IO:
06639b27 931 account_unplug(t, pci, 0);
3639a11e
JA
932 log_unplug(pci, t, "U");
933 break;
934 case __BLK_TA_UNPLUG_TIMER:
06639b27 935 account_unplug(t, pci, 1);
3639a11e 936 log_unplug(pci, t, "UT");
88b1a526 937 break;
93f1c611
JA
938 case __BLK_TA_SPLIT:
939 log_split(pci, t, "X");
940 break;
941 case __BLK_TA_BOUNCE:
942 log_generic(pci, t, "B");
943 break;
d0ca268b
JA
944 default:
945 fprintf(stderr, "Bad fs action %x\n", t->action);
1f79c4a0 946 break;
d0ca268b 947 }
d0ca268b
JA
948}
949
ff3a732c
JA
950static void dump_trace(struct blk_io_trace *t, struct per_cpu_info *pci,
951 struct per_dev_info *pdi)
d0ca268b
JA
952{
953 if (t->action & BLK_TC_ACT(BLK_TC_PC))
ff3a732c 954 dump_trace_pc(t, pci);
d0ca268b 955 else
d5396421 956 dump_trace_fs(t, pci);
87b72777 957
e7c9f3ff 958 pdi->events++;
d0ca268b
JA
959}
960
152f6476 961static void dump_io_stats(struct io_stats *ios, char *msg)
5c017e4b 962{
152f6476
JA
963 fprintf(ofp, "%s\n", msg);
964
965 fprintf(ofp, " Reads Queued: %'8lu, %'8LuKiB\t", ios->qreads, ios->qread_kb);
966 fprintf(ofp, " Writes Queued: %'8lu, %'8LuKiB\n", ios->qwrites,ios->qwrite_kb);
0a6b8fc4 967
152f6476
JA
968 fprintf(ofp, " Read Dispatches: %'8lu, %'8LuKiB\t", ios->ireads, ios->iread_kb);
969 fprintf(ofp, " Write Dispatches: %'8lu, %'8LuKiB\n", ios->iwrites,ios->iwrite_kb);
970 fprintf(ofp, " Reads Completed: %'8lu, %'8LuKiB\t", ios->creads, ios->cread_kb);
971 fprintf(ofp, " Writes Completed: %'8lu, %'8LuKiB\n", ios->cwrites,ios->cwrite_kb);
972 fprintf(ofp, " Read Merges: %'8lu%8c\t", ios->mreads, ' ');
152f6476 973 fprintf(ofp, " Write Merges: %'8lu\n", ios->mwrites);
06639b27
JA
974 fprintf(ofp, " IO unplugs: %'8lu%8c\t", ios->io_unplugs, ' ');
975 fprintf(ofp, " Timer unplugs: %'8lu\n", ios->timer_unplugs);
5c017e4b
JA
976}
977
50adc0ba
JA
978static void dump_wait_stats(struct per_process_info *ppi)
979{
b9d40d6f
JA
980 unsigned long rawait = ppi->longest_allocation_wait[0] / 1000;
981 unsigned long rdwait = ppi->longest_dispatch_wait[0] / 1000;
982 unsigned long rcwait = ppi->longest_completion_wait[0] / 1000;
983 unsigned long wawait = ppi->longest_allocation_wait[1] / 1000;
984 unsigned long wdwait = ppi->longest_dispatch_wait[1] / 1000;
985 unsigned long wcwait = ppi->longest_completion_wait[1] / 1000;
986
987 fprintf(ofp, " Allocation wait: %'8lu%8c\t", rawait, ' ');
988 fprintf(ofp, " Allocation wait: %'8lu\n", wawait);
989 fprintf(ofp, " Dispatch wait: %'8lu%8c\t", rdwait, ' ');
990 fprintf(ofp, " Dispatch wait: %'8lu\n", wdwait);
991 fprintf(ofp, " Completion wait: %'8lu%8c\t", rcwait, ' ');
992 fprintf(ofp, " Completion wait: %'8lu\n", wcwait);
50adc0ba
JA
993}
994
886ecf0e
JA
995static int ppi_name_compare(const void *p1, const void *p2)
996{
997 struct per_process_info *ppi1 = *((struct per_process_info **) p1);
998 struct per_process_info *ppi2 = *((struct per_process_info **) p2);
999 int res;
1000
1001 res = strverscmp(ppi1->name, ppi2->name);
1002 if (!res)
06e6f286 1003 res = ppi1->pid > ppi2->pid;
886ecf0e
JA
1004
1005 return res;
1006}
1007
1008static void sort_process_list(void)
1009{
1010 struct per_process_info **ppis;
1011 struct per_process_info *ppi;
1012 int i = 0;
1013
1014 ppis = malloc(ppi_list_entries * sizeof(struct per_process_info *));
1015
1016 ppi = ppi_list;
1017 while (ppi) {
06e6f286 1018 ppis[i++] = ppi;
886ecf0e
JA
1019 ppi = ppi->list_next;
1020 }
1021
06e6f286 1022 qsort(ppis, ppi_list_entries, sizeof(ppi), ppi_name_compare);
886ecf0e
JA
1023
1024 i = ppi_list_entries - 1;
1025 ppi_list = NULL;
1026 while (i >= 0) {
1027 ppi = ppis[i];
1028
1029 ppi->list_next = ppi_list;
1030 ppi_list = ppi;
1031 i--;
1032 }
50c38702
JA
1033
1034 free(ppis);
886ecf0e
JA
1035}
1036
152f6476
JA
1037static void show_process_stats(void)
1038{
1039 struct per_process_info *ppi;
1040
886ecf0e
JA
1041 sort_process_list();
1042
152f6476
JA
1043 ppi = ppi_list;
1044 while (ppi) {
ce8b6b4f
JA
1045 char name[64];
1046
715d8021 1047 if (ppi->more_than_one)
bf0720af 1048 sprintf(name, "%s (%u, ...)", ppi->name, ppi->pid);
715d8021
JA
1049 else
1050 sprintf(name, "%s (%u)", ppi->name, ppi->pid);
bf0720af 1051
ce8b6b4f 1052 dump_io_stats(&ppi->io_stats, name);
50adc0ba 1053 dump_wait_stats(ppi);
152f6476
JA
1054 ppi = ppi->list_next;
1055 }
1056
1057 fprintf(ofp, "\n");
1058}
1059
e7c9f3ff 1060static void show_device_and_cpu_stats(void)
d0ca268b 1061{
e7c9f3ff
NS
1062 struct per_dev_info *pdi;
1063 struct per_cpu_info *pci;
1064 struct io_stats total, *ios;
1065 int i, j, pci_events;
1066 char line[3 + 8/*cpu*/ + 2 + 32/*dev*/ + 3];
1067 char name[32];
1068
1069 for (pdi = devices, i = 0; i < ndevices; i++, pdi++) {
1070
1071 memset(&total, 0, sizeof(total));
1072 pci_events = 0;
1073
1074 if (i > 0)
1075 fprintf(ofp, "\n");
1076
1077 for (pci = pdi->cpus, j = 0; j < pdi->ncpus; j++, pci++) {
1078 if (!pci->nelems)
1079 continue;
1080
1081 ios = &pci->io_stats;
1082 total.qreads += ios->qreads;
1083 total.qwrites += ios->qwrites;
1084 total.creads += ios->creads;
1085 total.cwrites += ios->cwrites;
1086 total.mreads += ios->mreads;
1087 total.mwrites += ios->mwrites;
1088 total.ireads += ios->ireads;
1089 total.iwrites += ios->iwrites;
1090 total.qread_kb += ios->qread_kb;
1091 total.qwrite_kb += ios->qwrite_kb;
1092 total.cread_kb += ios->cread_kb;
1093 total.cwrite_kb += ios->cwrite_kb;
1094 total.iread_kb += ios->iread_kb;
1095 total.iwrite_kb += ios->iwrite_kb;
06639b27
JA
1096 total.timer_unplugs += ios->timer_unplugs;
1097 total.io_unplugs += ios->io_unplugs;
e7c9f3ff
NS
1098
1099 snprintf(line, sizeof(line) - 1, "CPU%d (%s):",
1100 j, get_dev_name(pdi, name, sizeof(name)));
1101 dump_io_stats(ios, line);
1102 pci_events++;
1103 }
5c017e4b 1104
e7c9f3ff
NS
1105 if (pci_events > 1) {
1106 fprintf(ofp, "\n");
1107 snprintf(line, sizeof(line) - 1, "Total (%s):",
1108 get_dev_name(pdi, name, sizeof(name)));
1109 dump_io_stats(&total, line);
1110 }
d0ca268b 1111
dcf0f7ed 1112 fprintf(ofp, "\nEvents (%s): %'Lu entries, %'lu skips\n",
0aa8a62d 1113 get_dev_name(pdi, line, sizeof(line)), pdi->events,
dcf0f7ed 1114 pdi->skips);
e7c9f3ff 1115 }
d0ca268b
JA
1116}
1117
d36421e4
JA
1118/*
1119 * struct trace and blktrace allocation cache, we do potentially
1120 * millions of mallocs for these structures while only using at most
1121 * a few thousand at the time
1122 */
1123static inline void t_free(struct trace *t)
1124{
1125 t->next = t_alloc_list;
1126 t_alloc_list = t;
1127}
1128
1129static inline struct trace *t_alloc(void)
1130{
1131 struct trace *t = t_alloc_list;
1132
1133 if (t) {
1134 t_alloc_list = t->next;
1135 return t;
1136 }
1137
1138 return malloc(sizeof(*t));
1139}
1140
1141static inline void bit_free(struct blk_io_trace *bit)
1142{
1143 /*
1144 * abuse a 64-bit field for a next pointer for the free item
1145 */
0c39425c 1146 bit->time = (__u64) (unsigned long) bit_alloc_list;
d36421e4
JA
1147 bit_alloc_list = (struct blk_io_trace *) bit;
1148}
1149
1150static inline struct blk_io_trace *bit_alloc(void)
1151{
1152 struct blk_io_trace *bit = bit_alloc_list;
1153
1154 if (bit) {
0c39425c
NS
1155 bit_alloc_list = (struct blk_io_trace *) (unsigned long) \
1156 bit->time;
d36421e4
JA
1157 return bit;
1158 }
1159
1160 return malloc(sizeof(*bit));
1161}
1162
4f0ae44f
JA
1163static void find_genesis(void)
1164{
1165 struct trace *t = trace_list;
1166
1167 genesis_time = -1ULL;
1168 while (t != NULL) {
1169 if (t->bit->time < genesis_time)
1170 genesis_time = t->bit->time;
1171
1172 t = t->next;
1173 }
1174}
1175
7f4d89e6 1176static inline int check_stopwatch(struct blk_io_trace *bit)
4f0ae44f 1177{
7f4d89e6
JA
1178 if (bit->time < stopwatch_end &&
1179 bit->time >= stopwatch_start)
4f0ae44f
JA
1180 return 0;
1181
1182 return 1;
1183}
1184
53c68c88
JA
1185/*
1186 * return youngest entry read
1187 */
1188static int sort_entries(unsigned long long *youngest)
4f0ae44f
JA
1189{
1190 struct trace *t;
4f0ae44f
JA
1191
1192 if (!genesis_time)
1193 find_genesis();
1194
53c68c88 1195 *youngest = -1ULL;
4f0ae44f
JA
1196 while ((t = trace_list) != NULL) {
1197 struct blk_io_trace *bit = t->bit;
1198
1199 trace_list = t->next;
1200
1201 if (verify_trace(bit))
1202 continue;
1203
7f4d89e6 1204 bit->time -= genesis_time;
4f0ae44f 1205
7f4d89e6 1206 if (check_stopwatch(bit)) {
4f0ae44f
JA
1207 bit_free(bit);
1208 t_free(t);
1209 continue;
1210 }
1211
1212 if (trace_rb_insert(t))
53c68c88 1213 return -1;
4f0ae44f 1214
53c68c88
JA
1215 if (bit->time < *youngest)
1216 *youngest = bit->time;
4f0ae44f
JA
1217 }
1218
53c68c88 1219 return 0;
4f0ae44f
JA
1220}
1221
a649216c 1222static void show_entries_rb(int force)
8fc0abbc 1223{
1f7afa72
JA
1224 struct per_dev_info *pdi = NULL;
1225 struct per_cpu_info *pci = NULL;
8fc0abbc 1226 struct blk_io_trace *bit;
3aabcd89 1227 struct rb_node *n;
8fc0abbc 1228 struct trace *t;
1f7afa72 1229
7d747d22 1230 while ((n = rb_first(&rb_sort_root)) != NULL) {
8fc0abbc 1231
1f7afa72
JA
1232 if (done)
1233 break;
8fc0abbc
JA
1234
1235 t = rb_entry(n, struct trace, rb_node);
1236 bit = t->bit;
1237
287fa3d6
JA
1238 if (!pdi || pdi->id != bit->device)
1239 pdi = get_dev_info(bit->device);
1f7afa72 1240
e7c9f3ff
NS
1241 if (!pdi) {
1242 fprintf(stderr, "Unknown device ID? (%d,%d)\n",
1243 MAJOR(bit->device), MINOR(bit->device));
1244 break;
1245 }
1f7afa72
JA
1246
1247 if (bit->cpu > pdi->ncpus) {
e7c9f3ff 1248 fprintf(stderr, "Unknown CPU ID? (%d, device %d,%d)\n",
7d747d22
JA
1249 bit->cpu, MAJOR(bit->device),
1250 MINOR(bit->device));
87b72777 1251 break;
8fc0abbc
JA
1252 }
1253
cb2a1a62
JA
1254 /*
1255 * back off displaying more info if we are out of sync
1256 * on SMP systems. to prevent stalling on lost events,
287fa3d6 1257 * only allow an event to skip us a few times
cb2a1a62 1258 */
acd70d21
JA
1259 if (bit->sequence != (pdi->last_sequence + 1)) {
1260 if (!force) {
1261 struct trace *__t;
1262
1263 /*
1264 * the wanted sequence is really there,
1265 * continue because this means that the
1266 * log time is earlier on the trace we have
1267 * now
1268 */
1269 __t = trace_rb_find(pdi->id,
1270 pdi->last_sequence + 1);
1271 if (__t == NULL)
1272 break;
1273 } else {
1274 fprintf(stderr, "(%d,%d): skipping %lu -> %u\n",
1275 MAJOR(pdi->id), MINOR(pdi->id),
1276 pdi->last_sequence, bit->sequence);
0aa8a62d 1277 pdi->skips++;
acd70d21 1278 }
cb2a1a62
JA
1279 }
1280
a649216c 1281 if (!force && bit->time > last_allowed_time)
46e6968b 1282 break;
8fc0abbc 1283
be925321
JA
1284 pdi->last_sequence = bit->sequence;
1285
4f0ae44f 1286 check_time(pdi, bit);
8fc0abbc 1287
4f0ae44f
JA
1288 if (!pci || pci->cpu != bit->cpu)
1289 pci = get_cpu_info(pdi, bit->cpu);
287fa3d6 1290
4f0ae44f 1291 dump_trace(bit, pci, pdi);
87b72777 1292
7d747d22 1293 rb_erase(&t->rb_node, &rb_sort_root);
a649216c 1294 rb_sort_entries--;
d36421e4
JA
1295 bit_free(bit);
1296 t_free(t);
cb2a1a62 1297 }
8fc0abbc
JA
1298}
1299
1f79c4a0
JA
1300static int read_data(int fd, void *buffer, int bytes, int block)
1301{
1302 int ret, bytes_left, fl;
1303 void *p;
1304
1305 fl = fcntl(fd, F_GETFL);
1306
1307 if (!block)
1308 fcntl(fd, F_SETFL, fl | O_NONBLOCK);
1309 else
1310 fcntl(fd, F_SETFL, fl & ~O_NONBLOCK);
1311
1312 bytes_left = bytes;
1313 p = buffer;
1314 while (bytes_left > 0) {
1315 ret = read(fd, p, bytes_left);
1316 if (!ret)
1317 return 1;
1318 else if (ret < 0) {
1319 if (errno != EAGAIN)
1320 perror("read");
a649216c 1321
1f79c4a0
JA
1322 return -1;
1323 } else {
1324 p += ret;
1325 bytes_left -= ret;
1326 }
1327 }
1328
1329 return 0;
1330}
1331
a649216c 1332static int read_events(int fd, int always_block)
cb2a1a62 1333{
287fa3d6 1334 struct per_dev_info *pdi = NULL;
7d747d22
JA
1335 int events = 0;
1336
1337 while (!is_done() && events < rb_batch) {
1338 struct blk_io_trace *bit;
1339 struct trace *t;
1340 int pdu_len;
1341 __u32 magic;
1342
d36421e4 1343 bit = bit_alloc();
cb2a1a62 1344
a649216c 1345 if (read_data(fd, bit, sizeof(*bit), !events || always_block))
cb2a1a62
JA
1346 break;
1347
7d747d22
JA
1348 magic = be32_to_cpu(bit->magic);
1349 if ((magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
1350 fprintf(stderr, "Bad magic %x\n", magic);
1351 break;
1352 }
1353
1354 pdu_len = be16_to_cpu(bit->pdu_len);
1355 if (pdu_len) {
1356 void *ptr = realloc(bit, sizeof(*bit) + pdu_len);
1357
1358 if (read_data(fd, ptr + sizeof(*bit), pdu_len, 1))
1359 break;
1360
1361 bit = ptr;
1362 }
1363
d36421e4 1364 t = t_alloc();
cb2a1a62
JA
1365 memset(t, 0, sizeof(*t));
1366 t->bit = bit;
1367
f1327cc5 1368 trace_to_cpu(bit);
4f0ae44f 1369
7d747d22
JA
1370 t->next = trace_list;
1371 trace_list = t;
1f7afa72 1372
287fa3d6
JA
1373 if (!pdi || pdi->id != bit->device)
1374 pdi = get_dev_info(bit->device);
1375
1376 if (bit->time > pdi->last_read_time)
1377 pdi->last_read_time = bit->time;
1378
7d747d22 1379 events++;
cb2a1a62
JA
1380 }
1381
7d747d22 1382 return events;
cb2a1a62
JA
1383}
1384
d5396421 1385static int do_file(void)
d0ca268b 1386{
7d747d22 1387 struct per_cpu_info *pci;
73877e12
NS
1388 struct per_dev_info *pdi;
1389 int i, j, events, events_added;
d0ca268b 1390
7d747d22
JA
1391 /*
1392 * first prepare all files for reading
1393 */
e8741a4a 1394 for (i = 0; i < ndevices; i++) {
73877e12
NS
1395 pdi = &devices[i];
1396 pdi->nfiles = 0;
0583b6a2 1397 pdi->last_sequence = 0;
73877e12 1398
74feaeb5 1399 for (j = 0;; j++) {
e7c9f3ff 1400 struct stat st;
87b72777 1401
e7c9f3ff
NS
1402 pci = get_cpu_info(pdi, j);
1403 pci->cpu = j;
7d747d22 1404 pci->fd = -1;
d0ca268b 1405
e7c9f3ff 1406 snprintf(pci->fname, sizeof(pci->fname)-1,
7d747d22 1407 "%s.blktrace.%d", pdi->name, pci->cpu);
e7c9f3ff
NS
1408 if (stat(pci->fname, &st) < 0)
1409 break;
74feaeb5
AB
1410 if (st.st_size) {
1411 pci->fd = open(pci->fname, O_RDONLY);
1412 if (pci->fd < 0) {
1413 perror(pci->fname);
1414 continue;
1415 }
e7c9f3ff
NS
1416 }
1417
7d747d22 1418 printf("Input file %s added\n", pci->fname);
73877e12 1419 pdi->nfiles++;
d0ca268b 1420 }
d5396421
JA
1421 }
1422
7d747d22
JA
1423 /*
1424 * now loop over the files reading in the data
1425 */
412819ce 1426 do {
53c68c88
JA
1427 unsigned long long youngest;
1428
7d747d22 1429 events_added = 0;
287fa3d6 1430 last_allowed_time = -1ULL;
d5396421 1431
7d747d22 1432 for (i = 0; i < ndevices; i++) {
73877e12
NS
1433 pdi = &devices[i];
1434
1435 for (j = 0; j < pdi->nfiles; j++) {
d5396421 1436
73877e12 1437 pci = get_cpu_info(pdi, j);
d5396421 1438
7d747d22
JA
1439 if (pci->fd == -1)
1440 continue;
51128a28 1441
a649216c 1442 events = read_events(pci->fd, 1);
7d747d22
JA
1443 if (!events) {
1444 close(pci->fd);
1445 pci->fd = -1;
1446 continue;
1447 }
d5396421 1448
287fa3d6
JA
1449 if (pdi->last_read_time < last_allowed_time)
1450 last_allowed_time = pdi->last_read_time;
d5396421 1451
7d747d22
JA
1452 events_added += events;
1453 }
2ff323b0 1454 }
d5396421 1455
53c68c88
JA
1456 if (sort_entries(&youngest))
1457 break;
1458
1459 if (youngest > stopwatch_end)
287fa3d6
JA
1460 break;
1461
a649216c 1462 show_entries_rb(0);
cb2a1a62 1463
7d747d22 1464 } while (events_added);
d5396421 1465
a649216c
JA
1466 if (rb_sort_entries)
1467 show_entries_rb(1);
1468
7d747d22 1469 return 0;
412819ce 1470}
d5396421 1471
412819ce
JA
1472static int do_stdin(void)
1473{
53c68c88 1474 unsigned long long youngest;
412819ce 1475 int fd;
d5396421 1476
be925321 1477 last_allowed_time = -1ULL;
1f79c4a0 1478 fd = dup(STDIN_FILENO);
412819ce
JA
1479 do {
1480 int events;
d5396421 1481
a649216c 1482 events = read_events(fd, 0);
412819ce
JA
1483 if (!events)
1484 break;
1485
53c68c88
JA
1486 if (sort_entries(&youngest))
1487 break;
1488
1489 if (youngest > stopwatch_end)
2ff323b0
JA
1490 break;
1491
a649216c 1492 show_entries_rb(0);
d5396421
JA
1493 } while (1);
1494
a649216c
JA
1495 if (rb_sort_entries)
1496 show_entries_rb(1);
1497
d5396421 1498 close(fd);
d5396421
JA
1499 return 0;
1500}
d0ca268b 1501
1f79c4a0 1502static void flush_output(void)
412819ce 1503{
152f6476 1504 fflush(ofp);
412819ce
JA
1505}
1506
1f79c4a0 1507static void handle_sigint(int sig)
412819ce
JA
1508{
1509 done = 1;
1510 flush_output();
1511}
1512
46e6968b
NS
1513/*
1514 * Extract start and duration times from a string, allowing
1515 * us to specify a time interval of interest within a trace.
1516 * Format: "duration" (start is zero) or "start:duration".
1517 */
1518static int find_stopwatch_interval(char *string)
1519{
1520 double value;
1521 char *sp;
1522
1523 value = strtod(string, &sp);
1524 if (sp == string) {
1525 fprintf(stderr,"Invalid stopwatch timer: %s\n", string);
1526 return 1;
1527 }
1528 if (*sp == ':') {
1529 stopwatch_start = DOUBLE_TO_NANO_ULL(value);
1530 string = sp + 1;
1531 value = strtod(string, &sp);
1532 if (sp == string || *sp != '\0') {
1533 fprintf(stderr,"Invalid stopwatch duration time: %s\n",
1534 string);
1535 return 1;
1536 }
1537 } else if (*sp != '\0') {
1538 fprintf(stderr,"Invalid stopwatch start timer: %s\n", string);
1539 return 1;
1540 }
1b928247
JA
1541 stopwatch_end = DOUBLE_TO_NANO_ULL(value);
1542 if (stopwatch_end <= stopwatch_start) {
1543 fprintf(stderr, "Invalid stopwatch interval: %Lu -> %Lu\n",
1544 stopwatch_start, stopwatch_end);
1545 return 1;
1546 }
1547
46e6968b
NS
1548 return 0;
1549}
1550
52724a0e
JA
1551static char usage_str[] = \
1552 "[ -i <input name> ] [-o <output name> [ -s ] [ -t ] [ -q ]\n" \
1553 "[ -w start:stop ] [ -f output format ] [ -F format spec ] [ -v] \n\n" \
1554 "\t-i Input file containing trace data, or '-' for stdin\n" \
1555 "\t-o Output file. If not given, output is stdout\n" \
1556 "\t-b stdin read batching\n" \
1557 "\t-s Show per-program io statistics\n" \
bf0720af 1558 "\t-n Hash processes by name, not pid\n" \
52724a0e
JA
1559 "\t-t Track individual ios. Will tell you the time a request took\n" \
1560 "\t to get queued, to get dispatched, and to get completed\n" \
1561 "\t-q Quiet. Don't display any stats at the end of the trace\n" \
1562 "\t-w Only parse data between the given time interval in seconds.\n" \
1563 "\t If 'start' isn't given, blkparse defaults the start time to 0\n" \
1564 "\t -f Output format. Customize the output format. The format field\n" \
1565 "\t identifies can be found in the documentation\n" \
1566 "\t-F Format specification. Can be found in the documentation\n" \
1567 "\t-v Print program version info\n\n";
1568
1f79c4a0
JA
1569static void usage(char *prog)
1570{
52724a0e 1571 fprintf(stderr, "Usage: %s %s %s", prog, blkparse_version, usage_str);
1f79c4a0
JA
1572}
1573
d5396421
JA
1574int main(int argc, char *argv[])
1575{
152f6476 1576 char *ofp_buffer;
a66877e6 1577 int c, ret, mode;
1e1c60f1 1578 int per_device_and_cpu_stats = 1;
d5396421
JA
1579
1580 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) != -1) {
1581 switch (c) {
1582 case 'i':
e7c9f3ff
NS
1583 if (!strcmp(optarg, "-") && !pipeline)
1584 pipeline = 1;
1585 else if (resize_devices(optarg) != 0)
1586 return 1;
d5396421
JA
1587 break;
1588 case 'o':
66efebf8 1589 output_name = optarg;
d5396421 1590 break;
79f19470
JA
1591 case 'b':
1592 rb_batch = atoi(optarg);
1593 if (rb_batch <= 0)
1594 rb_batch = RB_BATCH_DEFAULT;
1595 break;
152f6476
JA
1596 case 's':
1597 per_process_stats = 1;
1598 break;
7997c5b0
JA
1599 case 't':
1600 track_ios = 1;
1601 break;
1e1c60f1
NS
1602 case 'q':
1603 per_device_and_cpu_stats = 0;
1604 break;
46e6968b
NS
1605 case 'w':
1606 if (find_stopwatch_interval(optarg) != 0)
1607 return 1;
1608 break;
ab197ca7
AB
1609 case 'f':
1610 set_all_format_specs(optarg);
1611 break;
1612 case 'F':
1613 if (add_format_spec(optarg) != 0)
1614 return 1;
1615 break;
bf0720af 1616 case 'n':
715d8021 1617 ppi_hash_by_pid = 0;
bf0720af 1618 break;
52724a0e
JA
1619 case 'v':
1620 printf("%s version %s\n", argv[0], blkparse_version);
1621 return 0;
d5396421 1622 default:
1f79c4a0 1623 usage(argv[0]);
d5396421
JA
1624 return 1;
1625 }
d0ca268b
JA
1626 }
1627
e7c9f3ff
NS
1628 while (optind < argc) {
1629 if (!strcmp(argv[optind], "-") && !pipeline)
1630 pipeline = 1;
1631 else if (resize_devices(argv[optind]) != 0)
1632 return 1;
1633 optind++;
1634 }
1635
1636 if (!pipeline && !ndevices) {
1f79c4a0 1637 usage(argv[0]);
d5396421
JA
1638 return 1;
1639 }
1640
7997c5b0
JA
1641 memset(&rb_sort_root, 0, sizeof(rb_sort_root));
1642 memset(&rb_track_root, 0, sizeof(rb_track_root));
412819ce
JA
1643
1644 signal(SIGINT, handle_sigint);
1645 signal(SIGHUP, handle_sigint);
1646 signal(SIGTERM, handle_sigint);
d5396421 1647
d69db225
JA
1648 setlocale(LC_NUMERIC, "en_US");
1649
a66877e6 1650 if (!output_name) {
152f6476 1651 ofp = fdopen(STDOUT_FILENO, "w");
a66877e6
JA
1652 mode = _IOLBF;
1653 } else {
152f6476
JA
1654 char ofname[128];
1655
7a9690c0 1656 snprintf(ofname, sizeof(ofname) - 1, "%s", output_name);
152f6476 1657 ofp = fopen(ofname, "w");
a66877e6 1658 mode = _IOFBF;
152f6476
JA
1659 }
1660
1661 if (!ofp) {
1662 perror("fopen");
1663 return 1;
1664 }
1665
1666 ofp_buffer = malloc(4096);
a66877e6 1667 if (setvbuf(ofp, ofp_buffer, mode, 4096)) {
152f6476
JA
1668 perror("setvbuf");
1669 return 1;
1670 }
1671
e7c9f3ff 1672 if (pipeline)
d5396421
JA
1673 ret = do_stdin();
1674 else
1675 ret = do_file();
1676
152f6476
JA
1677 if (per_process_stats)
1678 show_process_stats();
1679
1e1c60f1
NS
1680 if (per_device_and_cpu_stats)
1681 show_device_and_cpu_stats();
152f6476 1682
412819ce 1683 flush_output();
d5396421 1684 return ret;
d0ca268b 1685}