blkparse: Handle cgroup information
[blktrace.git] / blkparse.c
CommitLineData
d956a2cd
JA
1/*
2 * block queue tracing parse application
3 *
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
46e37c55 5 * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
d956a2cd
JA
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 */
d0ca268b
JA
22#include <sys/types.h>
23#include <sys/stat.h>
24#include <unistd.h>
25#include <stdio.h>
26#include <fcntl.h>
27#include <stdlib.h>
a61c0677 28#include <stdarg.h>
8fc0abbc 29#include <string.h>
d5396421 30#include <getopt.h>
412819ce
JA
31#include <errno.h>
32#include <signal.h>
d69db225 33#include <locale.h>
6e0073ed 34#include <libgen.h>
d0ca268b 35
8fc0abbc
JA
36#include "blktrace.h"
37#include "rbtree.h"
bf0720af 38#include "jhash.h"
d0ca268b 39
cca113f2 40static char blkparse_version[] = "1.2.0";
52724a0e 41
492da111
AB
42struct skip_info {
43 unsigned long start, end;
44 struct skip_info *prev, *next;
45};
46
e7c9f3ff 47struct per_dev_info {
f7bd1a9b 48 dev_t dev;
e7c9f3ff
NS
49 char *name;
50
51 int backwards;
52 unsigned long long events;
20ed6177 53 unsigned long long first_reported_time;
e7c9f3ff 54 unsigned long long last_reported_time;
287fa3d6 55 unsigned long long last_read_time;
e7c9f3ff 56 struct io_stats io_stats;
2990e589
JA
57 unsigned long skips;
58 unsigned long long seq_skips;
649c7b66
JA
59 unsigned int max_depth[2];
60 unsigned int cur_depth[2];
e7c9f3ff 61
f7bd1a9b
JA
62 struct rb_root rb_track;
63
73877e12 64 int nfiles;
e7c9f3ff 65 int ncpus;
824c2b39
JA
66
67 unsigned long *cpu_map;
68 unsigned int cpu_map_max;
69
e7c9f3ff
NS
70 struct per_cpu_info *cpus;
71};
72
2990e589
JA
73/*
74 * some duplicated effort here, we can unify this hash and the ppi hash later
75 */
76struct process_pid_map {
77 pid_t pid;
78 char comm[16];
79 struct process_pid_map *hash_next, *list_next;
80};
81
82#define PPM_HASH_SHIFT (8)
83#define PPM_HASH_SIZE (1 << PPM_HASH_SHIFT)
84#define PPM_HASH_MASK (PPM_HASH_SIZE - 1)
85static struct process_pid_map *ppm_hash_table[PPM_HASH_SIZE];
86
152f6476 87struct per_process_info {
2990e589 88 struct process_pid_map *ppm;
152f6476
JA
89 struct io_stats io_stats;
90 struct per_process_info *hash_next, *list_next;
715d8021 91 int more_than_one;
50adc0ba
JA
92
93 /*
94 * individual io stats
95 */
b9d40d6f
JA
96 unsigned long long longest_allocation_wait[2];
97 unsigned long long longest_dispatch_wait[2];
98 unsigned long long longest_completion_wait[2];
d0ca268b
JA
99};
100
152f6476 101#define PPI_HASH_SHIFT (8)
bf0720af
JA
102#define PPI_HASH_SIZE (1 << PPI_HASH_SHIFT)
103#define PPI_HASH_MASK (PPI_HASH_SIZE - 1)
a7263b8f
WZ
104
105enum {
106 SORT_PROG_EVENT_N, /* Program Name */
107 SORT_PROG_EVENT_QKB, /* KB: Queued read and write */
108 SORT_PROG_EVENT_RKB, /* KB: Queued Read */
109 SORT_PROG_EVENT_WKB, /* KB: Queued Write */
110 SORT_PROG_EVENT_CKB, /* KB: Complete */
111 SORT_PROG_EVENT_QIO, /* IO: Queued read and write */
112 SORT_PROG_EVENT_RIO, /* IO: Queued Read */
113 SORT_PROG_EVENT_WIO, /* IO: Queued Write */
114 SORT_PROG_EVENT_CIO, /* IO: Complete */
115};
116
bf0720af 117static struct per_process_info *ppi_hash_table[PPI_HASH_SIZE];
152f6476 118static struct per_process_info *ppi_list;
886ecf0e 119static int ppi_list_entries;
152f6476 120
d5396421 121static struct option l_opts[] = {
98f8386b
AB
122 {
123 .name = "act-mask",
124 .has_arg = required_argument,
125 .flag = NULL,
126 .val = 'a'
127 },
128 {
129 .name = "set-mask",
130 .has_arg = required_argument,
131 .flag = NULL,
132 .val = 'A'
133 },
d5396421 134 {
234db09d 135 .name = "batch",
428683db 136 .has_arg = required_argument,
d5396421 137 .flag = NULL,
234db09d 138 .val = 'b'
d5396421
JA
139 },
140 {
234db09d 141 .name = "input-directory",
428683db 142 .has_arg = required_argument,
d5396421 143 .flag = NULL,
234db09d 144 .val = 'D'
d5396421 145 },
79f19470 146 {
234db09d 147 .name = "dump-binary",
428683db 148 .has_arg = required_argument,
79f19470 149 .flag = NULL,
234db09d 150 .val = 'd'
79f19470 151 },
152f6476 152 {
234db09d
AB
153 .name = "format",
154 .has_arg = required_argument,
152f6476 155 .flag = NULL,
234db09d 156 .val = 'f'
152f6476 157 },
7997c5b0 158 {
234db09d
AB
159 .name = "format-spec",
160 .has_arg = required_argument,
7997c5b0 161 .flag = NULL,
234db09d 162 .val = 'F'
7997c5b0 163 },
1e1c60f1 164 {
234db09d 165 .name = "hash-by-name",
428683db 166 .has_arg = no_argument,
1e1c60f1 167 .flag = NULL,
234db09d 168 .val = 'h'
1e1c60f1 169 },
46e6968b 170 {
234db09d 171 .name = "input",
428683db 172 .has_arg = required_argument,
46e6968b 173 .flag = NULL,
234db09d 174 .val = 'i'
46e6968b 175 },
19cfaf3f
AB
176 {
177 .name = "no-msgs",
178 .has_arg = no_argument,
179 .flag = NULL,
180 .val = 'M'
181 },
ab197ca7 182 {
234db09d 183 .name = "output",
428683db 184 .has_arg = required_argument,
ab197ca7 185 .flag = NULL,
234db09d 186 .val = 'o'
ab197ca7
AB
187 },
188 {
234db09d
AB
189 .name = "no-text-output",
190 .has_arg = no_argument,
ab197ca7 191 .flag = NULL,
234db09d 192 .val = 'O'
ab197ca7 193 },
bf0720af 194 {
234db09d 195 .name = "quiet",
bf0720af
JA
196 .has_arg = no_argument,
197 .flag = NULL,
234db09d 198 .val = 'q'
bf0720af 199 },
7d1c0411 200 {
234db09d 201 .name = "per-program-stats",
7d1c0411
JA
202 .has_arg = no_argument,
203 .flag = NULL,
234db09d 204 .val = 's'
7d1c0411 205 },
a7263b8f
WZ
206 {
207 .name = "sort-program-stats",
208 .has_arg = required_argument,
209 .flag = NULL,
210 .val = 'S'
211 },
52724a0e 212 {
234db09d 213 .name = "track-ios",
52724a0e
JA
214 .has_arg = no_argument,
215 .flag = NULL,
234db09d 216 .val = 't'
52724a0e 217 },
d1d7f15f 218 {
234db09d 219 .name = "stopwatch",
d1d7f15f
JA
220 .has_arg = required_argument,
221 .flag = NULL,
234db09d 222 .val = 'w'
d1d7f15f 223 },
a2594911 224 {
234db09d
AB
225 .name = "verbose",
226 .has_arg = no_argument,
a2594911 227 .flag = NULL,
234db09d
AB
228 .val = 'v'
229 },
230 {
231 .name = "version",
232 .has_arg = no_argument,
233 .flag = NULL,
234 .val = 'V'
a2594911 235 },
71ef8b7c
JA
236 {
237 .name = NULL,
238 }
d5396421
JA
239};
240
7997c5b0
JA
241/*
242 * for sorting the displayed output
243 */
8fc0abbc
JA
244struct trace {
245 struct blk_io_trace *bit;
246 struct rb_node rb_node;
cb2a1a62 247 struct trace *next;
a43c1c17 248 unsigned long read_sequence;
8fc0abbc
JA
249};
250
cb2a1a62 251static struct rb_root rb_sort_root;
a649216c
JA
252static unsigned long rb_sort_entries;
253
cb2a1a62
JA
254static struct trace *trace_list;
255
d36421e4
JA
256/*
257 * allocation cache
258 */
259static struct blk_io_trace *bit_alloc_list;
260static struct trace *t_alloc_list;
261
7997c5b0
JA
262/*
263 * for tracking individual ios
264 */
e81829a5 265struct io_track_req {
2990e589 266 struct process_pid_map *ppm;
95c15013 267 unsigned long long allocation_time;
7997c5b0
JA
268 unsigned long long queue_time;
269 unsigned long long dispatch_time;
270 unsigned long long completion_time;
271};
272
e81829a5
AG
273struct io_track {
274 struct rb_node rb_node;
275 struct io_track_req *req;
276 struct io_track *next;
277 __u64 sector;
278};
279
e7c9f3ff
NS
280static int ndevices;
281static struct per_dev_info *devices;
282static char *get_dev_name(struct per_dev_info *, char *, int);
210824c3 283static int trace_rb_insert_last(struct per_dev_info *, struct trace *);
d0ca268b 284
71d5d4c9 285FILE *ofp = NULL;
e7c9f3ff 286static char *output_name;
d1d7f15f 287static char *input_dir;
e7c9f3ff
NS
288
289static unsigned long long genesis_time;
287fa3d6 290static unsigned long long last_allowed_time;
46e6968b 291static unsigned long long stopwatch_start; /* start from zero by default */
bc171579 292static unsigned long long stopwatch_end = -1ULL; /* "infinity" */
a43c1c17 293static unsigned long read_sequence;
152f6476
JA
294
295static int per_process_stats;
a7263b8f 296static int per_process_stats_event = SORT_PROG_EVENT_N;
cbc927b6 297static int per_device_and_cpu_stats = 1;
7997c5b0 298static int track_ios;
bf0720af 299static int ppi_hash_by_pid = 1;
57ea8602 300static int verbose;
98f8386b 301static unsigned int act_mask = -1U;
cbc927b6 302static int stats_printed;
19cfaf3f 303static int bin_output_msgs = 1;
86368eb5 304int data_is_native = -1;
d0ca268b 305
346d8a74 306static FILE *dump_fp;
a2594911
AB
307static char *dump_binary;
308
1d24fc14
JA
309static unsigned int t_alloc_cache;
310static unsigned int bit_alloc_cache;
311
7d747d22 312#define RB_BATCH_DEFAULT (512)
e820abd7 313static unsigned int rb_batch = RB_BATCH_DEFAULT;
79f19470 314
e7c9f3ff 315static int pipeline;
67076cbc 316static char *pipename;
e7c9f3ff 317
234db09d
AB
318static int text_output = 1;
319
412819ce
JA
320#define is_done() (*(volatile int *)(&done))
321static volatile int done;
322
7bd4fd0a
OK
323struct timespec abs_start_time;
324static unsigned long long start_timestamp;
325
c701176c
MP
326static int have_drv_data = 0;
327
bf0720af
JA
328#define JHASH_RANDOM (0x3af5f2ee)
329
824c2b39
JA
330#define CPUS_PER_LONG (8 * sizeof(unsigned long))
331#define CPU_IDX(cpu) ((cpu) / CPUS_PER_LONG)
332#define CPU_BIT(cpu) ((cpu) & (CPUS_PER_LONG - 1))
333
a61c0677
AG
334static void io_warn_unless(struct blk_io_trace *t, int condition,
335 const char *fmt, ...)
336{
337 va_list ap;
338
339 if (condition)
340 return;
341 va_start(ap, fmt);
342 printf("(%d,%d) request %llu + %u: ",
343 MAJOR(t->device), MINOR(t->device),
344 t->sector, t->bytes);
345 vfprintf(stderr, fmt, ap);
346 va_end(ap);
347}
348
a2594911
AB
349static void output_binary(void *buf, int len)
350{
351 if (dump_binary) {
346d8a74
AB
352 size_t n = fwrite(buf, len, 1, dump_fp);
353 if (n != 1) {
a2594911 354 perror(dump_binary);
346d8a74 355 fclose(dump_fp);
a2594911
AB
356 dump_binary = NULL;
357 }
358 }
359}
360
210824c3
JA
361static void resize_cpu_info(struct per_dev_info *pdi, int cpu)
362{
363 struct per_cpu_info *cpus = pdi->cpus;
364 int ncpus = pdi->ncpus;
365 int new_count = cpu + 1;
366 int new_space, size;
367 char *new_start;
368
369 size = new_count * sizeof(struct per_cpu_info);
370 cpus = realloc(cpus, size);
371 if (!cpus) {
372 char name[20];
373 fprintf(stderr, "Out of memory, CPU info for device %s (%d)\n",
374 get_dev_name(pdi, name, sizeof(name)), size);
375 exit(1);
376 }
377
378 new_start = (char *)cpus + (ncpus * sizeof(struct per_cpu_info));
379 new_space = (new_count - ncpus) * sizeof(struct per_cpu_info);
380 memset(new_start, 0, new_space);
381
382 pdi->ncpus = new_count;
383 pdi->cpus = cpus;
384
385 for (new_count = 0; new_count < pdi->ncpus; new_count++) {
386 struct per_cpu_info *pci = &pdi->cpus[new_count];
387
388 if (!pci->fd) {
389 pci->fd = -1;
390 memset(&pci->rb_last, 0, sizeof(pci->rb_last));
391 pci->rb_last_entries = 0;
392 pci->last_sequence = -1;
393 }
394 }
395}
396
397static struct per_cpu_info *get_cpu_info(struct per_dev_info *pdi, int cpu)
398{
399 struct per_cpu_info *pci;
400
401 if (cpu >= pdi->ncpus)
402 resize_cpu_info(pdi, cpu);
403
404 pci = &pdi->cpus[cpu];
405 pci->cpu = cpu;
406 return pci;
407}
408
409
410static int resize_devices(char *name)
411{
412 int size = (ndevices + 1) * sizeof(struct per_dev_info);
413
414 devices = realloc(devices, size);
415 if (!devices) {
416 fprintf(stderr, "Out of memory, device %s (%d)\n", name, size);
417 return 1;
418 }
419 memset(&devices[ndevices], 0, sizeof(struct per_dev_info));
420 devices[ndevices].name = name;
421 ndevices++;
422 return 0;
423}
424
425static struct per_dev_info *get_dev_info(dev_t dev)
426{
427 struct per_dev_info *pdi;
428 int i;
429
430 for (i = 0; i < ndevices; i++) {
431 if (!devices[i].dev)
432 devices[i].dev = dev;
433 if (devices[i].dev == dev)
434 return &devices[i];
435 }
436
437 if (resize_devices(NULL))
438 return NULL;
439
440 pdi = &devices[ndevices - 1];
441 pdi->dev = dev;
442 pdi->first_reported_time = 0;
443 pdi->last_read_time = 0;
210824c3
JA
444
445 return pdi;
446}
447
66930177 448static void insert_skip(struct per_cpu_info *pci, unsigned long start,
492da111
AB
449 unsigned long end)
450{
451 struct skip_info *sip;
452
66930177 453 for (sip = pci->skips_tail; sip != NULL; sip = sip->prev) {
492da111
AB
454 if (end == (sip->start - 1)) {
455 sip->start = start;
456 return;
457 } else if (start == (sip->end + 1)) {
458 sip->end = end;
459 return;
460 }
461 }
462
463 sip = malloc(sizeof(struct skip_info));
464 sip->start = start;
465 sip->end = end;
466 sip->prev = sip->next = NULL;
66930177
JA
467 if (pci->skips_tail == NULL)
468 pci->skips_head = pci->skips_tail = sip;
492da111 469 else {
66930177
JA
470 sip->prev = pci->skips_tail;
471 pci->skips_tail->next = sip;
472 pci->skips_tail = sip;
492da111
AB
473 }
474}
475
66930177 476static void remove_sip(struct per_cpu_info *pci, struct skip_info *sip)
492da111
AB
477{
478 if (sip->prev == NULL) {
479 if (sip->next == NULL)
66930177 480 pci->skips_head = pci->skips_tail = NULL;
492da111 481 else {
66930177 482 pci->skips_head = sip->next;
492da111
AB
483 sip->next->prev = NULL;
484 }
485 } else if (sip->next == NULL) {
66930177 486 pci->skips_tail = sip->prev;
492da111
AB
487 sip->prev->next = NULL;
488 } else {
489 sip->prev->next = sip->next;
490 sip->next->prev = sip->prev;
491 }
492
493 sip->prev = sip->next = NULL;
494 free(sip);
495}
496
497#define IN_SKIP(sip,seq) (((sip)->start <= (seq)) && ((seq) <= sip->end))
66930177 498static int check_current_skips(struct per_cpu_info *pci, unsigned long seq)
492da111
AB
499{
500 struct skip_info *sip;
501
66930177
JA
502 for (sip = pci->skips_tail; sip != NULL; sip = sip->prev) {
503 if (IN_SKIP(sip, seq)) {
492da111
AB
504 if (sip->start == seq) {
505 if (sip->end == seq)
66930177 506 remove_sip(pci, sip);
492da111
AB
507 else
508 sip->start += 1;
509 } else if (sip->end == seq)
510 sip->end -= 1;
511 else {
512 sip->end = seq - 1;
66930177 513 insert_skip(pci, seq + 1, sip->end);
492da111
AB
514 }
515 return 1;
516 }
517 }
66930177 518
492da111
AB
519 return 0;
520}
521
522static void collect_pdi_skips(struct per_dev_info *pdi)
523{
524 struct skip_info *sip;
66930177 525 int cpu;
492da111
AB
526
527 pdi->skips = 0;
528 pdi->seq_skips = 0;
66930177
JA
529
530 for (cpu = 0; cpu < pdi->ncpus; cpu++) {
531 struct per_cpu_info *pci = &pdi->cpus[cpu];
532
533 for (sip = pci->skips_head; sip != NULL; sip = sip->next) {
534 pdi->skips++;
535 pdi->seq_skips += (sip->end - sip->start + 1);
536 if (verbose)
537 fprintf(stderr,"(%d,%d): skipping %lu -> %lu\n",
538 MAJOR(pdi->dev), MINOR(pdi->dev),
539 sip->start, sip->end);
540 }
492da111
AB
541 }
542}
543
824c2b39
JA
544static void cpu_mark_online(struct per_dev_info *pdi, unsigned int cpu)
545{
546 if (cpu >= pdi->cpu_map_max || !pdi->cpu_map) {
547 int new_max = (cpu + CPUS_PER_LONG) & ~(CPUS_PER_LONG - 1);
548 unsigned long *map = malloc(new_max / sizeof(long));
549
550 memset(map, 0, new_max / sizeof(long));
551
552 if (pdi->cpu_map) {
553 memcpy(map, pdi->cpu_map, pdi->cpu_map_max / sizeof(long));
554 free(pdi->cpu_map);
555 }
556
557 pdi->cpu_map = map;
558 pdi->cpu_map_max = new_max;
559 }
560
561 pdi->cpu_map[CPU_IDX(cpu)] |= (1UL << CPU_BIT(cpu));
562}
563
564static inline void cpu_mark_offline(struct per_dev_info *pdi, int cpu)
565{
566 pdi->cpu_map[CPU_IDX(cpu)] &= ~(1UL << CPU_BIT(cpu));
567}
568
569static inline int cpu_is_online(struct per_dev_info *pdi, int cpu)
570{
571 return (pdi->cpu_map[CPU_IDX(cpu)] & (1UL << CPU_BIT(cpu))) != 0;
572}
573
bfc70ad5
JA
574static inline int ppm_hash_pid(pid_t pid)
575{
576 return jhash_1word(pid, JHASH_RANDOM) & PPM_HASH_MASK;
577}
578
579static struct process_pid_map *find_ppm(pid_t pid)
580{
581 const int hash_idx = ppm_hash_pid(pid);
582 struct process_pid_map *ppm;
583
584 ppm = ppm_hash_table[hash_idx];
585 while (ppm) {
586 if (ppm->pid == pid)
587 return ppm;
588
589 ppm = ppm->hash_next;
590 }
591
592 return NULL;
593}
594
ebe2d1aa 595static struct process_pid_map *add_ppm_hash(pid_t pid, const char *name)
bfc70ad5
JA
596{
597 const int hash_idx = ppm_hash_pid(pid);
598 struct process_pid_map *ppm;
599
600 ppm = find_ppm(pid);
248eac8f
JA
601 if (!ppm) {
602 ppm = malloc(sizeof(*ppm));
603 memset(ppm, 0, sizeof(*ppm));
604 ppm->pid = pid;
d324757e
ES
605 memset(ppm->comm, 0, sizeof(ppm->comm));
606 strncpy(ppm->comm, name, sizeof(ppm->comm));
607 ppm->comm[sizeof(ppm->comm) - 1] = '\0';
248eac8f
JA
608 ppm->hash_next = ppm_hash_table[hash_idx];
609 ppm_hash_table[hash_idx] = ppm;
bfc70ad5 610 }
ebe2d1aa
JA
611
612 return ppm;
bfc70ad5
JA
613}
614
7bd4fd0a
OK
615static void handle_notify(struct blk_io_trace *bit)
616{
617 void *payload = (caddr_t) bit + sizeof(*bit);
618 __u32 two32[2];
619
7238673f 620 switch (bit->action & ~__BLK_TN_CGROUP) {
7bd4fd0a
OK
621 case BLK_TN_PROCESS:
622 add_ppm_hash(bit->pid, payload);
623 break;
624
625 case BLK_TN_TIMESTAMP:
626 if (bit->pdu_len != sizeof(two32))
627 return;
628 memcpy(two32, payload, sizeof(two32));
629 if (!data_is_native) {
630 two32[0] = be32_to_cpu(two32[0]);
631 two32[1] = be32_to_cpu(two32[1]);
632 }
633 start_timestamp = bit->time;
634 abs_start_time.tv_sec = two32[0];
635 abs_start_time.tv_nsec = two32[1];
636 if (abs_start_time.tv_nsec < 0) {
637 abs_start_time.tv_sec--;
638 abs_start_time.tv_nsec += 1000000000;
639 }
640
641 break;
642
1a15f6a8
AB
643 case BLK_TN_MESSAGE:
644 if (bit->pdu_len > 0) {
645 char msg[bit->pdu_len+1];
7238673f
JK
646 int len = bit->pdu_len;
647 char cgidstr[24];
1a15f6a8 648
7238673f
JK
649 cgidstr[0] = 0;
650 if (bit->action & __BLK_TN_CGROUP) {
651 struct blk_io_cgroup_payload *cgid = payload;
652
653 sprintf(cgidstr, "%x,%x ", cgid->ino,
654 cgid->gen);
655 payload += sizeof(struct blk_io_cgroup_payload);
656 len -= sizeof(struct blk_io_cgroup_payload);
657 }
658 memcpy(msg, (char *)payload, len);
659 msg[len] = '\0';
1a15f6a8
AB
660
661 fprintf(ofp,
7238673f 662 "%3d,%-3d %2d %8s %5d.%09lu %5u %s%2s %3s %s\n",
1a15f6a8 663 MAJOR(bit->device), MINOR(bit->device),
7238673f
JK
664 bit->cpu, "0", (int)SECONDS(bit->time),
665 (unsigned long)NANO_SECONDS(bit->time),
666 0, cgidstr, "m", "N", msg);
1a15f6a8
AB
667 }
668 break;
669
7bd4fd0a
OK
670 default:
671 /* Ignore unknown notify events */
672 ;
673 }
674}
675
bfc70ad5
JA
676char *find_process_name(pid_t pid)
677{
678 struct process_pid_map *ppm = find_ppm(pid);
679
680 if (ppm)
681 return ppm->comm;
682
683 return NULL;
684}
685
9e4cd1b8 686static inline int ppi_hash_pid(pid_t pid)
bf0720af
JA
687{
688 return jhash_1word(pid, JHASH_RANDOM) & PPI_HASH_MASK;
689}
690
691static inline int ppi_hash_name(const char *name)
152f6476 692{
bf0720af
JA
693 return jhash(name, 16, JHASH_RANDOM) & PPI_HASH_MASK;
694}
695
696static inline int ppi_hash(struct per_process_info *ppi)
697{
2990e589
JA
698 struct process_pid_map *ppm = ppi->ppm;
699
bf0720af 700 if (ppi_hash_by_pid)
2990e589 701 return ppi_hash_pid(ppm->pid);
bf0720af 702
2990e589 703 return ppi_hash_name(ppm->comm);
152f6476
JA
704}
705
bfc70ad5 706static inline void add_ppi_to_hash(struct per_process_info *ppi)
152f6476 707{
bf0720af 708 const int hash_idx = ppi_hash(ppi);
152f6476 709
bf0720af
JA
710 ppi->hash_next = ppi_hash_table[hash_idx];
711 ppi_hash_table[hash_idx] = ppi;
152f6476
JA
712}
713
bfc70ad5 714static inline void add_ppi_to_list(struct per_process_info *ppi)
152f6476
JA
715{
716 ppi->list_next = ppi_list;
717 ppi_list = ppi;
886ecf0e 718 ppi_list_entries++;
152f6476
JA
719}
720
bfc70ad5 721static struct per_process_info *find_ppi_by_name(char *name)
bf0720af
JA
722{
723 const int hash_idx = ppi_hash_name(name);
724 struct per_process_info *ppi;
725
726 ppi = ppi_hash_table[hash_idx];
727 while (ppi) {
2990e589
JA
728 struct process_pid_map *ppm = ppi->ppm;
729
730 if (!strcmp(ppm->comm, name))
bf0720af
JA
731 return ppi;
732
733 ppi = ppi->hash_next;
734 }
735
736 return NULL;
737}
738
9e4cd1b8 739static struct per_process_info *find_ppi_by_pid(pid_t pid)
152f6476 740{
bf0720af 741 const int hash_idx = ppi_hash_pid(pid);
152f6476
JA
742 struct per_process_info *ppi;
743
bf0720af 744 ppi = ppi_hash_table[hash_idx];
152f6476 745 while (ppi) {
2990e589
JA
746 struct process_pid_map *ppm = ppi->ppm;
747
748 if (ppm->pid == pid)
152f6476
JA
749 return ppi;
750
751 ppi = ppi->hash_next;
752 }
753
754 return NULL;
755}
756
9e4cd1b8 757static struct per_process_info *find_ppi(pid_t pid)
bf0720af 758{
715d8021 759 struct per_process_info *ppi;
bfc70ad5 760 char *name;
715d8021 761
bf0720af 762 if (ppi_hash_by_pid)
bfc70ad5
JA
763 return find_ppi_by_pid(pid);
764
765 name = find_process_name(pid);
766 if (!name)
767 return NULL;
bf0720af 768
bfc70ad5 769 ppi = find_ppi_by_name(name);
2990e589 770 if (ppi && ppi->ppm->pid != pid)
715d8021
JA
771 ppi->more_than_one = 1;
772
773 return ppi;
bf0720af
JA
774}
775
210824c3
JA
776/*
777 * struct trace and blktrace allocation cache, we do potentially
778 * millions of mallocs for these structures while only using at most
779 * a few thousand at the time
780 */
781static inline void t_free(struct trace *t)
782{
783 if (t_alloc_cache < 1024) {
784 t->next = t_alloc_list;
785 t_alloc_list = t;
786 t_alloc_cache++;
787 } else
788 free(t);
789}
790
791static inline struct trace *t_alloc(void)
792{
793 struct trace *t = t_alloc_list;
794
795 if (t) {
796 t_alloc_list = t->next;
797 t_alloc_cache--;
798 return t;
799 }
800
801 return malloc(sizeof(*t));
802}
803
804static inline void bit_free(struct blk_io_trace *bit)
805{
806 if (bit_alloc_cache < 1024 && !bit->pdu_len) {
807 /*
808 * abuse a 64-bit field for a next pointer for the free item
809 */
810 bit->time = (__u64) (unsigned long) bit_alloc_list;
811 bit_alloc_list = (struct blk_io_trace *) bit;
812 bit_alloc_cache++;
813 } else
814 free(bit);
815}
816
817static inline struct blk_io_trace *bit_alloc(void)
818{
819 struct blk_io_trace *bit = bit_alloc_list;
820
821 if (bit) {
822 bit_alloc_list = (struct blk_io_trace *) (unsigned long) \
823 bit->time;
824 bit_alloc_cache--;
825 return bit;
826 }
827
828 return malloc(sizeof(*bit));
829}
830
831static inline void __put_trace_last(struct per_dev_info *pdi, struct trace *t)
832{
833 struct per_cpu_info *pci = get_cpu_info(pdi, t->bit->cpu);
834
835 rb_erase(&t->rb_node, &pci->rb_last);
836 pci->rb_last_entries--;
837
838 bit_free(t->bit);
839 t_free(t);
840}
841
842static void put_trace(struct per_dev_info *pdi, struct trace *t)
843{
844 rb_erase(&t->rb_node, &rb_sort_root);
845 rb_sort_entries--;
846
847 trace_rb_insert_last(pdi, t);
848}
849
89482da6 850static inline int trace_rb_insert(struct trace *t, struct rb_root *root)
7997c5b0 851{
2a1b3424 852 struct rb_node **p = &root->rb_node;
7997c5b0
JA
853 struct rb_node *parent = NULL;
854 struct trace *__t;
855
856 while (*p) {
857 parent = *p;
2a1b3424 858
7997c5b0
JA
859 __t = rb_entry(parent, struct trace, rb_node);
860
89482da6
JA
861 if (t->bit->time < __t->bit->time)
862 p = &(*p)->rb_left;
863 else if (t->bit->time > __t->bit->time)
864 p = &(*p)->rb_right;
865 else if (t->bit->device < __t->bit->device)
e7c9f3ff
NS
866 p = &(*p)->rb_left;
867 else if (t->bit->device > __t->bit->device)
868 p = &(*p)->rb_right;
dcf0f7ed
JA
869 else if (t->bit->sequence < __t->bit->sequence)
870 p = &(*p)->rb_left;
0b07f23e 871 else /* >= sequence */
dcf0f7ed 872 p = &(*p)->rb_right;
7997c5b0
JA
873 }
874
875 rb_link_node(&t->rb_node, parent, p);
2a1b3424 876 rb_insert_color(&t->rb_node, root);
7997c5b0
JA
877 return 0;
878}
879
2a1b3424 880static inline int trace_rb_insert_sort(struct trace *t)
e3556946 881{
89482da6 882 if (!trace_rb_insert(t, &rb_sort_root)) {
2a1b3424
JA
883 rb_sort_entries++;
884 return 0;
885 }
886
887 return 1;
888}
889
210824c3 890static int trace_rb_insert_last(struct per_dev_info *pdi, struct trace *t)
2a1b3424 891{
210824c3
JA
892 struct per_cpu_info *pci = get_cpu_info(pdi, t->bit->cpu);
893
894 if (trace_rb_insert(t, &pci->rb_last))
895 return 1;
896
897 pci->rb_last_entries++;
898
899 if (pci->rb_last_entries > rb_batch * pdi->nfiles) {
900 struct rb_node *n = rb_first(&pci->rb_last);
901
902 t = rb_entry(n, struct trace, rb_node);
903 __put_trace_last(pdi, t);
2a1b3424
JA
904 }
905
210824c3 906 return 0;
2a1b3424
JA
907}
908
909static struct trace *trace_rb_find(dev_t device, unsigned long sequence,
910 struct rb_root *root, int order)
911{
912 struct rb_node *n = root->rb_node;
913 struct rb_node *prev = NULL;
e3556946
JA
914 struct trace *__t;
915
2a1b3424
JA
916 while (n) {
917 __t = rb_entry(n, struct trace, rb_node);
918 prev = n;
e3556946 919
0583b6a2 920 if (device < __t->bit->device)
2a1b3424 921 n = n->rb_left;
0583b6a2 922 else if (device > __t->bit->device)
2a1b3424 923 n = n->rb_right;
0583b6a2 924 else if (sequence < __t->bit->sequence)
2a1b3424 925 n = n->rb_left;
e3556946 926 else if (sequence > __t->bit->sequence)
2a1b3424 927 n = n->rb_right;
e3556946
JA
928 else
929 return __t;
930 }
931
2a1b3424
JA
932 /*
933 * hack - the list may not be sequence ordered because some
934 * events don't have sequence and time matched. so we end up
935 * being a little off in the rb lookup here, because we don't
936 * know the time we are looking for. compensate by browsing
937 * a little ahead from the last entry to find the match
938 */
939 if (order && prev) {
940 int max = 5;
941
942 while (((n = rb_next(prev)) != NULL) && max--) {
943 __t = rb_entry(n, struct trace, rb_node);
492da111 944
2a1b3424
JA
945 if (__t->bit->device == device &&
946 __t->bit->sequence == sequence)
947 return __t;
948
949 prev = n;
950 }
951 }
492da111 952
e3556946
JA
953 return NULL;
954}
955
2a1b3424 956static inline struct trace *trace_rb_find_last(struct per_dev_info *pdi,
210824c3 957 struct per_cpu_info *pci,
2a1b3424
JA
958 unsigned long seq)
959{
210824c3 960 return trace_rb_find(pdi->dev, seq, &pci->rb_last, 0);
2a1b3424
JA
961}
962
f7bd1a9b 963static inline int track_rb_insert(struct per_dev_info *pdi,struct io_track *iot)
7997c5b0 964{
f7bd1a9b 965 struct rb_node **p = &pdi->rb_track.rb_node;
7997c5b0
JA
966 struct rb_node *parent = NULL;
967 struct io_track *__iot;
968
969 while (*p) {
970 parent = *p;
7997c5b0
JA
971 __iot = rb_entry(parent, struct io_track, rb_node);
972
f7bd1a9b 973 if (iot->sector < __iot->sector)
7997c5b0
JA
974 p = &(*p)->rb_left;
975 else if (iot->sector > __iot->sector)
976 p = &(*p)->rb_right;
977 else {
e7c9f3ff 978 fprintf(stderr,
ab197ca7
AB
979 "sector alias (%Lu) on device %d,%d!\n",
980 (unsigned long long) iot->sector,
f7bd1a9b 981 MAJOR(pdi->dev), MINOR(pdi->dev));
7997c5b0
JA
982 return 1;
983 }
984 }
985
986 rb_link_node(&iot->rb_node, parent, p);
f7bd1a9b 987 rb_insert_color(&iot->rb_node, &pdi->rb_track);
7997c5b0
JA
988 return 0;
989}
990
f7bd1a9b 991static struct io_track *__find_track(struct per_dev_info *pdi, __u64 sector)
7997c5b0 992{
f7bd1a9b 993 struct rb_node *n = pdi->rb_track.rb_node;
7997c5b0
JA
994 struct io_track *__iot;
995
2a1b3424
JA
996 while (n) {
997 __iot = rb_entry(n, struct io_track, rb_node);
7997c5b0 998
f7bd1a9b 999 if (sector < __iot->sector)
2a1b3424 1000 n = n->rb_left;
7997c5b0 1001 else if (sector > __iot->sector)
2a1b3424 1002 n = n->rb_right;
7997c5b0
JA
1003 else
1004 return __iot;
1005 }
1006
1007 return NULL;
1008}
1009
e81829a5
AG
1010static inline struct io_track *first_iot(struct io_track_req *req)
1011{
1012 return (struct io_track *)(req + 1);
1013}
1014
9e4cd1b8 1015static struct io_track *find_track(struct per_dev_info *pdi, pid_t pid,
bfc70ad5 1016 __u64 sector)
7997c5b0 1017{
916b5501 1018 struct io_track *iot;
7997c5b0 1019
f7bd1a9b 1020 iot = __find_track(pdi, sector);
7997c5b0 1021 if (!iot) {
e81829a5
AG
1022 struct io_track_req *req;
1023
1024 req = malloc(sizeof(*req) + sizeof(*iot));
1025 req->ppm = find_ppm(pid);
1026 if (!req->ppm)
1027 req->ppm = add_ppm_hash(pid, "unknown");
1028 req->allocation_time = -1ULL;
1029 req->queue_time = -1ULL;
1030 req->dispatch_time = -1ULL;
1031 req->completion_time = -1ULL;
1032 iot = first_iot(req);
1033 iot->req = req;
1034 iot->next = NULL;
7997c5b0 1035 iot->sector = sector;
f7bd1a9b 1036 track_rb_insert(pdi, iot);
7997c5b0
JA
1037 }
1038
1039 return iot;
1040}
1041
f7bd1a9b
JA
1042static void log_track_frontmerge(struct per_dev_info *pdi,
1043 struct blk_io_trace *t)
2e3e8ded
JA
1044{
1045 struct io_track *iot;
1046
1047 if (!track_ios)
1048 return;
2e3e8ded 1049
ae957cbc 1050 iot = __find_track(pdi, t->sector + t_sec(t));
cb2a1a62 1051 if (!iot) {
57ea8602
JA
1052 if (verbose)
1053 fprintf(stderr, "merge not found for (%d,%d): %llu\n",
3c667f3c 1054 MAJOR(t->device), MINOR(t->device),
57ea8602 1055 (unsigned long long) t->sector + t_sec(t));
cb2a1a62 1056 return;
2e3e8ded 1057 }
cb2a1a62 1058
f7bd1a9b 1059 rb_erase(&iot->rb_node, &pdi->rb_track);
ae957cbc 1060 iot->sector -= t_sec(t);
f7bd1a9b 1061 track_rb_insert(pdi, iot);
2e3e8ded
JA
1062}
1063
f7bd1a9b 1064static void log_track_getrq(struct per_dev_info *pdi, struct blk_io_trace *t)
2e3e8ded
JA
1065{
1066 struct io_track *iot;
e81829a5 1067 struct io_track_req *req;
2e3e8ded
JA
1068
1069 if (!track_ios)
1070 return;
1071
bfc70ad5 1072 iot = find_track(pdi, t->pid, t->sector);
e81829a5
AG
1073 req = iot->req;
1074 io_warn_unless(t, req->allocation_time == -1ULL,
a61c0677 1075 "confused about %s time", "allocation");
e81829a5 1076 req->allocation_time = t->time;
95c15013
JA
1077}
1078
753f9091
JA
1079/*
1080 * for md/dm setups, the interesting cycle is Q -> C. So track queueing
1081 * time here, as dispatch time
1082 */
1083static void log_track_queue(struct per_dev_info *pdi, struct blk_io_trace *t)
1084{
1085 struct io_track *iot;
e81829a5 1086 struct io_track_req *req;
753f9091
JA
1087
1088 if (!track_ios)
1089 return;
753f9091 1090
bfc70ad5 1091 iot = find_track(pdi, t->pid, t->sector);
e81829a5
AG
1092 req = iot->req;
1093 io_warn_unless(t, req->dispatch_time == -1ULL,
a61c0677 1094 "confused about %s time", "dispatch");
e81829a5
AG
1095 req->dispatch_time = t->time;
1096}
1097
1098static void log_track_split(struct per_dev_info *pdi, struct blk_io_trace *t)
1099{
1100 struct io_track *iot, *split;
1101
1102 /*
1103 * With a split request, the completion event will refer to the last
1104 * part of the original request, but other events might refer to other
1105 * parts.
1106 */
1107 iot = find_track(pdi, t->pid, t->sector);
1108 split = malloc(sizeof(*iot));
1109 split->req = iot->req;
1110 split->next = iot->next;
1111 iot->next = split;
1112 split->sector = iot->sector + t_sec(t);
1113 track_rb_insert(pdi, split);
753f9091
JA
1114}
1115
95c15013 1116/*
b6076a9b 1117 * return time between rq allocation and insertion
95c15013 1118 */
f7bd1a9b
JA
1119static unsigned long long log_track_insert(struct per_dev_info *pdi,
1120 struct blk_io_trace *t)
95c15013 1121{
50adc0ba 1122 unsigned long long elapsed;
95c15013 1123 struct io_track *iot;
e81829a5 1124 struct io_track_req *req;
95c15013
JA
1125
1126 if (!track_ios)
1127 return -1;
1128
bfc70ad5 1129 iot = find_track(pdi, t->pid, t->sector);
e81829a5
AG
1130 req = iot->req;
1131 io_warn_unless(t, req->queue_time == -1ULL,
a61c0677 1132 "confused about %s time", "queue");
e81829a5 1133 req->queue_time = t->time;
acd70d21 1134
e81829a5 1135 if (req->allocation_time == -1ULL)
acd70d21
JA
1136 return -1;
1137
e81829a5 1138 elapsed = req->queue_time - req->allocation_time;
50adc0ba
JA
1139
1140 if (per_process_stats) {
e81829a5 1141 struct per_process_info *ppi = find_ppi(req->ppm->pid);
b9d40d6f 1142 int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
50adc0ba 1143
b9d40d6f
JA
1144 if (ppi && elapsed > ppi->longest_allocation_wait[w])
1145 ppi->longest_allocation_wait[w] = elapsed;
50adc0ba
JA
1146 }
1147
1148 return elapsed;
2e3e8ded
JA
1149}
1150
1151/*
1152 * return time between queue and issue
1153 */
f7bd1a9b
JA
1154static unsigned long long log_track_issue(struct per_dev_info *pdi,
1155 struct blk_io_trace *t)
2e3e8ded 1156{
a61c0677 1157 unsigned long long elapsed = -1ULL;
2e3e8ded 1158 struct io_track *iot;
e81829a5 1159 struct io_track_req *req;
2e3e8ded
JA
1160
1161 if (!track_ios)
1162 return -1;
1163 if ((t->action & BLK_TC_ACT(BLK_TC_FS)) == 0)
1164 return -1;
1165
f7bd1a9b 1166 iot = __find_track(pdi, t->sector);
cb2a1a62 1167 if (!iot) {
57ea8602
JA
1168 if (verbose)
1169 fprintf(stderr, "issue not found for (%d,%d): %llu\n",
3c667f3c 1170 MAJOR(t->device), MINOR(t->device),
57ea8602 1171 (unsigned long long) t->sector);
2e3e8ded 1172 return -1;
cb2a1a62 1173 }
2e3e8ded 1174
e81829a5
AG
1175 req = iot->req;
1176 io_warn_unless(t, req->dispatch_time == -1ULL,
a61c0677 1177 "confused about %s time", "dispatch");
e81829a5
AG
1178 req->dispatch_time = t->time;
1179 if (req->queue_time != -1ULL)
1180 elapsed = req->dispatch_time - req->queue_time;
50adc0ba 1181
a61c0677 1182 if (elapsed != -1ULL && per_process_stats) {
e81829a5 1183 struct per_process_info *ppi = find_ppi(req->ppm->pid);
b9d40d6f 1184 int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
50adc0ba 1185
b9d40d6f
JA
1186 if (ppi && elapsed > ppi->longest_dispatch_wait[w])
1187 ppi->longest_dispatch_wait[w] = elapsed;
50adc0ba
JA
1188 }
1189
1190 return elapsed;
2e3e8ded
JA
1191}
1192
e81829a5
AG
1193static void fixup_complete(struct per_dev_info *pdi, struct blk_io_trace *t)
1194{
1195 struct io_track *iot;
1196 __u64 start_sector;
1197
1198 iot = __find_track(pdi, t->sector);
1199 if (!iot)
1200 return;
1201
1202 /*
1203 * When a split io completes, the sector and length of the completion
1204 * refer to the last part of the original request. Fix the sector and
1205 * length of the complete event to match the original request.
1206 */
1207 start_sector = first_iot(iot->req)->sector;
1208 t->bytes += (t->sector - start_sector) << 9;
1209 t->sector = start_sector;
1210}
1211
2e3e8ded
JA
1212/*
1213 * return time between dispatch and complete
1214 */
f7bd1a9b
JA
1215static unsigned long long log_track_complete(struct per_dev_info *pdi,
1216 struct blk_io_trace *t)
2e3e8ded 1217{
a61c0677 1218 unsigned long long elapsed = -1ULL;
e81829a5
AG
1219 struct io_track *iot, *next;
1220 struct io_track_req *req;
2e3e8ded
JA
1221
1222 if (!track_ios)
1223 return -1;
2e3e8ded 1224
f7bd1a9b 1225 iot = __find_track(pdi, t->sector);
cb2a1a62 1226 if (!iot) {
57ea8602
JA
1227 if (verbose)
1228 fprintf(stderr,"complete not found for (%d,%d): %llu\n",
3c667f3c 1229 MAJOR(t->device), MINOR(t->device),
57ea8602 1230 (unsigned long long) t->sector);
2e3e8ded 1231 return -1;
cb2a1a62 1232 }
2e3e8ded 1233
e81829a5
AG
1234 req = iot->req;
1235 io_warn_unless(t, req->completion_time == -1ULL,
a61c0677 1236 "confused about %s time", "completion");
e81829a5
AG
1237 req->completion_time = t->time;
1238 if (req->dispatch_time != -1ULL)
1239 elapsed = req->completion_time - req->dispatch_time;
2e3e8ded 1240
a61c0677 1241 if (elapsed != -1ULL && per_process_stats) {
e81829a5 1242 struct per_process_info *ppi = find_ppi(req->ppm->pid);
b9d40d6f 1243 int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
50adc0ba 1244
b9d40d6f
JA
1245 if (ppi && elapsed > ppi->longest_completion_wait[w])
1246 ppi->longest_completion_wait[w] = elapsed;
50adc0ba
JA
1247 }
1248
2e3e8ded
JA
1249 /*
1250 * kill the trace, we don't need it after completion
1251 */
e81829a5
AG
1252 for (iot = first_iot(req); iot; iot = next) {
1253 next = iot->next;
1254 rb_erase(&iot->rb_node, &pdi->rb_track);
1255 if (iot != first_iot(req))
1256 free(iot);
1257 }
1258 free(req);
2e3e8ded
JA
1259
1260 return elapsed;
1261}
1262
1263
9e4cd1b8 1264static struct io_stats *find_process_io_stats(pid_t pid)
152f6476 1265{
bfc70ad5 1266 struct per_process_info *ppi = find_ppi(pid);
152f6476
JA
1267
1268 if (!ppi) {
1269 ppi = malloc(sizeof(*ppi));
1270 memset(ppi, 0, sizeof(*ppi));
2990e589 1271 ppi->ppm = find_ppm(pid);
ebe2d1aa
JA
1272 if (!ppi->ppm)
1273 ppi->ppm = add_ppm_hash(pid, "unknown");
bfc70ad5
JA
1274 add_ppi_to_hash(ppi);
1275 add_ppi_to_list(ppi);
152f6476
JA
1276 }
1277
1278 return &ppi->io_stats;
1279}
1280
e7c9f3ff
NS
1281static char *get_dev_name(struct per_dev_info *pdi, char *buffer, int size)
1282{
1283 if (pdi->name)
1284 snprintf(buffer, size, "%s", pdi->name);
1285 else
f7bd1a9b 1286 snprintf(buffer, size, "%d,%d",MAJOR(pdi->dev),MINOR(pdi->dev));
e7c9f3ff
NS
1287 return buffer;
1288}
1289
e7c9f3ff 1290static void check_time(struct per_dev_info *pdi, struct blk_io_trace *bit)
cfab07eb
AB
1291{
1292 unsigned long long this = bit->time;
e7c9f3ff 1293 unsigned long long last = pdi->last_reported_time;
cfab07eb 1294
e7c9f3ff
NS
1295 pdi->backwards = (this < last) ? 'B' : ' ';
1296 pdi->last_reported_time = this;
cfab07eb
AB
1297}
1298
fb2ec796
JA
1299static inline void __account_m(struct io_stats *ios, struct blk_io_trace *t,
1300 int rw)
d0ca268b 1301{
fb2ec796 1302 if (rw) {
152f6476 1303 ios->mwrites++;
fb2ec796 1304 ios->mwrite_kb += t_kb(t);
cd0ae0f6 1305 ios->mwrite_b += t_b(t);
fb2ec796 1306 } else {
152f6476 1307 ios->mreads++;
fb2ec796 1308 ios->mread_kb += t_kb(t);
cd0ae0f6 1309 ios->mread_b += t_b(t);
fb2ec796 1310 }
152f6476
JA
1311}
1312
1313static inline void account_m(struct blk_io_trace *t, struct per_cpu_info *pci,
1314 int rw)
1315{
fb2ec796 1316 __account_m(&pci->io_stats, t, rw);
152f6476
JA
1317
1318 if (per_process_stats) {
bfc70ad5 1319 struct io_stats *ios = find_process_io_stats(t->pid);
152f6476 1320
fb2ec796 1321 __account_m(ios, t, rw);
d0ca268b
JA
1322 }
1323}
1324
801646d6
CS
1325static inline void __account_pc_queue(struct io_stats *ios,
1326 struct blk_io_trace *t, int rw)
1327{
1328 if (rw) {
1329 ios->qwrites_pc++;
1330 ios->qwrite_kb_pc += t_kb(t);
cd0ae0f6 1331 ios->qwrite_b_pc += t_b(t);
801646d6
CS
1332 } else {
1333 ios->qreads_pc++;
1334 ios->qread_kb += t_kb(t);
cd0ae0f6 1335 ios->qread_b_pc += t_b(t);
801646d6
CS
1336 }
1337}
1338
1339static inline void account_pc_queue(struct blk_io_trace *t,
1340 struct per_cpu_info *pci, int rw)
1341{
1342 __account_pc_queue(&pci->io_stats, t, rw);
1343
1344 if (per_process_stats) {
1345 struct io_stats *ios = find_process_io_stats(t->pid);
1346
1347 __account_pc_queue(ios, t, rw);
1348 }
1349}
1350
1351static inline void __account_pc_issue(struct io_stats *ios, int rw,
1352 unsigned int bytes)
1353{
1354 if (rw) {
1355 ios->iwrites_pc++;
1356 ios->iwrite_kb_pc += bytes >> 10;
cd0ae0f6 1357 ios->iwrite_b_pc += bytes & 1023;
801646d6
CS
1358 } else {
1359 ios->ireads_pc++;
1360 ios->iread_kb_pc += bytes >> 10;
cd0ae0f6 1361 ios->iread_b_pc += bytes & 1023;
801646d6
CS
1362 }
1363}
1364
1365static inline void account_pc_issue(struct blk_io_trace *t,
1366 struct per_cpu_info *pci, int rw)
1367{
1368 __account_pc_issue(&pci->io_stats, rw, t->bytes);
1369
1370 if (per_process_stats) {
1371 struct io_stats *ios = find_process_io_stats(t->pid);
1372
1373 __account_pc_issue(ios, rw, t->bytes);
1374 }
1375}
1376
1377static inline void __account_pc_requeue(struct io_stats *ios,
1378 struct blk_io_trace *t, int rw)
1379{
1380 if (rw) {
1381 ios->wrqueue_pc++;
1382 ios->iwrite_kb_pc -= t_kb(t);
cd0ae0f6 1383 ios->iwrite_b_pc -= t_b(t);
801646d6
CS
1384 } else {
1385 ios->rrqueue_pc++;
1386 ios->iread_kb_pc -= t_kb(t);
cd0ae0f6 1387 ios->iread_b_pc -= t_b(t);
801646d6
CS
1388 }
1389}
1390
1391static inline void account_pc_requeue(struct blk_io_trace *t,
1392 struct per_cpu_info *pci, int rw)
1393{
1394 __account_pc_requeue(&pci->io_stats, t, rw);
1395
1396 if (per_process_stats) {
1397 struct io_stats *ios = find_process_io_stats(t->pid);
1398
1399 __account_pc_requeue(ios, t, rw);
1400 }
1401}
1402
1403static inline void __account_pc_c(struct io_stats *ios, int rw)
1404{
1405 if (rw)
1406 ios->cwrites_pc++;
1407 else
1408 ios->creads_pc++;
1409}
1410
1411static inline void account_pc_c(struct blk_io_trace *t,
1412 struct per_cpu_info *pci, int rw)
1413{
1414 __account_pc_c(&pci->io_stats, rw);
1415
1416 if (per_process_stats) {
1417 struct io_stats *ios = find_process_io_stats(t->pid);
1418
1419 __account_pc_c(ios, rw);
1420 }
1421}
1422
b6076a9b
JA
1423static inline void __account_queue(struct io_stats *ios, struct blk_io_trace *t,
1424 int rw)
d0ca268b
JA
1425{
1426 if (rw) {
152f6476 1427 ios->qwrites++;
ae957cbc 1428 ios->qwrite_kb += t_kb(t);
cd0ae0f6 1429 ios->qwrite_b += t_b(t);
d0ca268b 1430 } else {
152f6476 1431 ios->qreads++;
ae957cbc 1432 ios->qread_kb += t_kb(t);
cd0ae0f6 1433 ios->qread_b += t_b(t);
152f6476
JA
1434 }
1435}
1436
b6076a9b
JA
1437static inline void account_queue(struct blk_io_trace *t,
1438 struct per_cpu_info *pci, int rw)
152f6476 1439{
b6076a9b 1440 __account_queue(&pci->io_stats, t, rw);
152f6476
JA
1441
1442 if (per_process_stats) {
bfc70ad5 1443 struct io_stats *ios = find_process_io_stats(t->pid);
152f6476 1444
b6076a9b 1445 __account_queue(ios, t, rw);
d0ca268b
JA
1446 }
1447}
1448
e21dc4dd 1449static inline void __account_c(struct io_stats *ios, int rw, int bytes)
d0ca268b
JA
1450{
1451 if (rw) {
152f6476
JA
1452 ios->cwrites++;
1453 ios->cwrite_kb += bytes >> 10;
cd0ae0f6 1454 ios->cwrite_b += bytes & 1023;
d0ca268b 1455 } else {
152f6476
JA
1456 ios->creads++;
1457 ios->cread_kb += bytes >> 10;
cd0ae0f6 1458 ios->cread_b += bytes & 1023;
152f6476
JA
1459 }
1460}
1461
1462static inline void account_c(struct blk_io_trace *t, struct per_cpu_info *pci,
1463 int rw, int bytes)
1464{
1465 __account_c(&pci->io_stats, rw, bytes);
1466
1467 if (per_process_stats) {
bfc70ad5 1468 struct io_stats *ios = find_process_io_stats(t->pid);
152f6476
JA
1469
1470 __account_c(ios, rw, bytes);
d0ca268b
JA
1471 }
1472}
1473
b6076a9b
JA
1474static inline void __account_issue(struct io_stats *ios, int rw,
1475 unsigned int bytes)
afd2d7ad 1476{
1477 if (rw) {
152f6476
JA
1478 ios->iwrites++;
1479 ios->iwrite_kb += bytes >> 10;
cd0ae0f6 1480 ios->iwrite_b += bytes & 1023;
afd2d7ad 1481 } else {
152f6476
JA
1482 ios->ireads++;
1483 ios->iread_kb += bytes >> 10;
cd0ae0f6 1484 ios->iread_b += bytes & 1023;
afd2d7ad 1485 }
1486}
1487
b6076a9b
JA
1488static inline void account_issue(struct blk_io_trace *t,
1489 struct per_cpu_info *pci, int rw)
d0ca268b 1490{
b6076a9b 1491 __account_issue(&pci->io_stats, rw, t->bytes);
152f6476
JA
1492
1493 if (per_process_stats) {
bfc70ad5 1494 struct io_stats *ios = find_process_io_stats(t->pid);
d5396421 1495
b6076a9b 1496 __account_issue(ios, rw, t->bytes);
152f6476
JA
1497 }
1498}
1499
06639b27
JA
1500static inline void __account_unplug(struct io_stats *ios, int timer)
1501{
1502 if (timer)
1503 ios->timer_unplugs++;
1504 else
1505 ios->io_unplugs++;
1506}
1507
1508static inline void account_unplug(struct blk_io_trace *t,
1509 struct per_cpu_info *pci, int timer)
1510{
1511 __account_unplug(&pci->io_stats, timer);
1512
1513 if (per_process_stats) {
bfc70ad5 1514 struct io_stats *ios = find_process_io_stats(t->pid);
06639b27
JA
1515
1516 __account_unplug(ios, timer);
1517 }
1518}
1519
4054070a
JA
1520static inline void __account_requeue(struct io_stats *ios,
1521 struct blk_io_trace *t, int rw)
1522{
1523 if (rw) {
1524 ios->wrqueue++;
1525 ios->iwrite_kb -= t_kb(t);
cd0ae0f6 1526 ios->iwrite_b -= t_b(t);
4054070a
JA
1527 } else {
1528 ios->rrqueue++;
1529 ios->iread_kb -= t_kb(t);
cd0ae0f6 1530 ios->iread_b -= t_b(t);
4054070a
JA
1531 }
1532}
1533
1534static inline void account_requeue(struct blk_io_trace *t,
1535 struct per_cpu_info *pci, int rw)
1536{
1537 __account_requeue(&pci->io_stats, t, rw);
1538
1539 if (per_process_stats) {
bfc70ad5 1540 struct io_stats *ios = find_process_io_stats(t->pid);
4054070a
JA
1541
1542 __account_requeue(ios, t, rw);
1543 }
1544}
1545
f7bd1a9b
JA
1546static void log_complete(struct per_dev_info *pdi, struct per_cpu_info *pci,
1547 struct blk_io_trace *t, char *act)
ab197ca7 1548{
f7bd1a9b 1549 process_fmt(act, pci, t, log_track_complete(pdi, t), 0, NULL);
ab197ca7
AB
1550}
1551
f7bd1a9b
JA
1552static void log_insert(struct per_dev_info *pdi, struct per_cpu_info *pci,
1553 struct blk_io_trace *t, char *act)
b6076a9b 1554{
f7bd1a9b 1555 process_fmt(act, pci, t, log_track_insert(pdi, t), 0, NULL);
b6076a9b
JA
1556}
1557
ab197ca7
AB
1558static void log_queue(struct per_cpu_info *pci, struct blk_io_trace *t,
1559 char *act)
1560{
b6076a9b 1561 process_fmt(act, pci, t, -1, 0, NULL);
ab197ca7 1562}
2e3e8ded 1563
f7bd1a9b
JA
1564static void log_issue(struct per_dev_info *pdi, struct per_cpu_info *pci,
1565 struct blk_io_trace *t, char *act)
ab197ca7 1566{
f7bd1a9b 1567 process_fmt(act, pci, t, log_track_issue(pdi, t), 0, NULL);
d0ca268b
JA
1568}
1569
f7bd1a9b
JA
1570static void log_merge(struct per_dev_info *pdi, struct per_cpu_info *pci,
1571 struct blk_io_trace *t, char *act)
d0ca268b 1572{
a01516de 1573 if (act[0] == 'F')
f7bd1a9b 1574 log_track_frontmerge(pdi, t);
2e3e8ded 1575
ab197ca7 1576 process_fmt(act, pci, t, -1ULL, 0, NULL);
d0ca268b
JA
1577}
1578
dfe34da1 1579static void log_action(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 1580 char *act)
dfe34da1 1581{
ab197ca7 1582 process_fmt(act, pci, t, -1ULL, 0, NULL);
dfe34da1
JA
1583}
1584
d5396421 1585static void log_generic(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 1586 char *act)
d0ca268b 1587{
ab197ca7 1588 process_fmt(act, pci, t, -1ULL, 0, NULL);
d0ca268b
JA
1589}
1590
ab197ca7 1591static void log_unplug(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 1592 char *act)
67e14fdc 1593{
ab197ca7 1594 process_fmt(act, pci, t, -1ULL, 0, NULL);
67e14fdc
JA
1595}
1596
93f1c611
JA
1597static void log_split(struct per_cpu_info *pci, struct blk_io_trace *t,
1598 char *act)
1599{
1600 process_fmt(act, pci, t, -1ULL, 0, NULL);
1601}
1602
ab197ca7 1603static void log_pc(struct per_cpu_info *pci, struct blk_io_trace *t, char *act)
d0ca268b 1604{
ab197ca7 1605 unsigned char *buf = (unsigned char *) t + sizeof(*t);
d0ca268b 1606
ab197ca7 1607 process_fmt(act, pci, t, -1ULL, t->pdu_len, buf);
d0ca268b
JA
1608}
1609
c82a8c9d
CS
1610static void dump_trace_pc(struct blk_io_trace *t, struct per_dev_info *pdi,
1611 struct per_cpu_info *pci)
d0ca268b 1612{
c82a8c9d 1613 int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
7238673f 1614 int act = (t->action & 0xffff) & ~__BLK_TA_CGROUP;
56f2af81
JA
1615
1616 switch (act) {
d0ca268b 1617 case __BLK_TA_QUEUE:
3639a11e 1618 log_generic(pci, t, "Q");
801646d6 1619 account_pc_queue(t, pci, w);
d0ca268b
JA
1620 break;
1621 case __BLK_TA_GETRQ:
3639a11e 1622 log_generic(pci, t, "G");
d0ca268b
JA
1623 break;
1624 case __BLK_TA_SLEEPRQ:
3639a11e 1625 log_generic(pci, t, "S");
d0ca268b
JA
1626 break;
1627 case __BLK_TA_REQUEUE:
c82a8c9d
CS
1628 /*
1629 * can happen if we miss traces, don't let it go
1630 * below zero
1631 */
1632 if (pdi->cur_depth[w])
1633 pdi->cur_depth[w]--;
801646d6 1634 account_pc_requeue(t, pci, w);
3639a11e 1635 log_generic(pci, t, "R");
d0ca268b
JA
1636 break;
1637 case __BLK_TA_ISSUE:
801646d6 1638 account_pc_issue(t, pci, w);
c82a8c9d
CS
1639 pdi->cur_depth[w]++;
1640 if (pdi->cur_depth[w] > pdi->max_depth[w])
1641 pdi->max_depth[w] = pdi->cur_depth[w];
ab197ca7 1642 log_pc(pci, t, "D");
d0ca268b
JA
1643 break;
1644 case __BLK_TA_COMPLETE:
c82a8c9d
CS
1645 if (pdi->cur_depth[w])
1646 pdi->cur_depth[w]--;
3639a11e 1647 log_pc(pci, t, "C");
801646d6 1648 account_pc_c(t, pci, w);
d0ca268b 1649 break;
56f2af81
JA
1650 case __BLK_TA_INSERT:
1651 log_pc(pci, t, "I");
1652 break;
d0ca268b 1653 default:
56f2af81 1654 fprintf(stderr, "Bad pc action %x\n", act);
87b72777 1655 break;
d0ca268b 1656 }
d0ca268b
JA
1657}
1658
f7bd1a9b
JA
1659static void dump_trace_fs(struct blk_io_trace *t, struct per_dev_info *pdi,
1660 struct per_cpu_info *pci)
d0ca268b 1661{
649c7b66 1662 int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
7238673f 1663 int act = (t->action & 0xffff) & ~__BLK_TA_CGROUP;
d0ca268b 1664
7997c5b0 1665 switch (act) {
d0ca268b 1666 case __BLK_TA_QUEUE:
753f9091 1667 log_track_queue(pdi, t);
b6076a9b 1668 account_queue(t, pci, w);
3639a11e 1669 log_queue(pci, t, "Q");
d0ca268b 1670 break;
b6076a9b 1671 case __BLK_TA_INSERT:
f7bd1a9b 1672 log_insert(pdi, pci, t, "I");
b6076a9b 1673 break;
d0ca268b 1674 case __BLK_TA_BACKMERGE:
152f6476 1675 account_m(t, pci, w);
f7bd1a9b 1676 log_merge(pdi, pci, t, "M");
d0ca268b
JA
1677 break;
1678 case __BLK_TA_FRONTMERGE:
152f6476 1679 account_m(t, pci, w);
f7bd1a9b 1680 log_merge(pdi, pci, t, "F");
d0ca268b
JA
1681 break;
1682 case __BLK_TA_GETRQ:
f7bd1a9b 1683 log_track_getrq(pdi, t);
3639a11e 1684 log_generic(pci, t, "G");
d0ca268b
JA
1685 break;
1686 case __BLK_TA_SLEEPRQ:
3639a11e 1687 log_generic(pci, t, "S");
d0ca268b
JA
1688 break;
1689 case __BLK_TA_REQUEUE:
65f2deb5
JA
1690 /*
1691 * can happen if we miss traces, don't let it go
1692 * below zero
1693 */
1694 if (pdi->cur_depth[w])
1695 pdi->cur_depth[w]--;
4054070a 1696 account_requeue(t, pci, w);
3639a11e 1697 log_queue(pci, t, "R");
d0ca268b
JA
1698 break;
1699 case __BLK_TA_ISSUE:
b6076a9b 1700 account_issue(t, pci, w);
649c7b66
JA
1701 pdi->cur_depth[w]++;
1702 if (pdi->cur_depth[w] > pdi->max_depth[w])
1703 pdi->max_depth[w] = pdi->cur_depth[w];
f7bd1a9b 1704 log_issue(pdi, pci, t, "D");
d0ca268b
JA
1705 break;
1706 case __BLK_TA_COMPLETE:
65f2deb5
JA
1707 if (pdi->cur_depth[w])
1708 pdi->cur_depth[w]--;
e81829a5 1709 fixup_complete(pdi, t);
152f6476 1710 account_c(t, pci, w, t->bytes);
f7bd1a9b 1711 log_complete(pdi, pci, t, "C");
d0ca268b 1712 break;
88b1a526 1713 case __BLK_TA_PLUG:
3639a11e 1714 log_action(pci, t, "P");
88b1a526 1715 break;
3639a11e 1716 case __BLK_TA_UNPLUG_IO:
06639b27 1717 account_unplug(t, pci, 0);
3639a11e
JA
1718 log_unplug(pci, t, "U");
1719 break;
1720 case __BLK_TA_UNPLUG_TIMER:
06639b27 1721 account_unplug(t, pci, 1);
3639a11e 1722 log_unplug(pci, t, "UT");
88b1a526 1723 break;
93f1c611 1724 case __BLK_TA_SPLIT:
e81829a5 1725 log_track_split(pdi, t);
93f1c611
JA
1726 log_split(pci, t, "X");
1727 break;
1728 case __BLK_TA_BOUNCE:
1729 log_generic(pci, t, "B");
1730 break;
a8f30e64
JA
1731 case __BLK_TA_REMAP:
1732 log_generic(pci, t, "A");
1733 break;
c54b9dd9 1734 case __BLK_TA_DRV_DATA:
c701176c 1735 have_drv_data = 1;
c54b9dd9
SR
1736 /* dump to binary file only */
1737 break;
d0ca268b
JA
1738 default:
1739 fprintf(stderr, "Bad fs action %x\n", t->action);
1f79c4a0 1740 break;
d0ca268b 1741 }
d0ca268b
JA
1742}
1743
ff3a732c
JA
1744static void dump_trace(struct blk_io_trace *t, struct per_cpu_info *pci,
1745 struct per_dev_info *pdi)
d0ca268b 1746{
234db09d 1747 if (text_output) {
7238673f 1748 if ((t->action & ~__BLK_TN_CGROUP) == BLK_TN_MESSAGE)
1a15f6a8
AB
1749 handle_notify(t);
1750 else if (t->action & BLK_TC_ACT(BLK_TC_PC))
c82a8c9d 1751 dump_trace_pc(t, pdi, pci);
234db09d
AB
1752 else
1753 dump_trace_fs(t, pdi, pci);
1754 }
87b72777 1755
20ed6177
JA
1756 if (!pdi->events)
1757 pdi->first_reported_time = t->time;
1758
e7c9f3ff 1759 pdi->events++;
a2594911 1760
19cfaf3f
AB
1761 if (bin_output_msgs ||
1762 !(t->action & BLK_TC_ACT(BLK_TC_NOTIFY) &&
7238673f 1763 (t->action & ~__BLK_TN_CGROUP) == BLK_TN_MESSAGE))
19cfaf3f 1764 output_binary(t, sizeof(*t) + t->pdu_len);
d0ca268b
JA
1765}
1766
4c523165
JA
1767/*
1768 * print in a proper way, not too small and not too big. if more than
1769 * 1000,000K, turn into M and so on
1770 */
1771static char *size_cnv(char *dst, unsigned long long num, int in_kb)
1772{
da19e768 1773 char suff[] = { '\0', 'K', 'M', 'G', 'P' };
0dc3602c 1774 unsigned int i = 0;
4c523165
JA
1775
1776 if (in_kb)
1777 i++;
1778
0dc3602c 1779 while (num > 1000 * 1000ULL && (i < sizeof(suff) - 1)) {
4c523165
JA
1780 i++;
1781 num /= 1000;
1782 }
1783
1784 sprintf(dst, "%'8Lu%c", num, suff[i]);
1785 return dst;
1786}
1787
649c7b66
JA
1788static void dump_io_stats(struct per_dev_info *pdi, struct io_stats *ios,
1789 char *msg)
5c017e4b 1790{
4c523165
JA
1791 static char x[256], y[256];
1792
152f6476
JA
1793 fprintf(ofp, "%s\n", msg);
1794
cd0ae0f6
ID
1795 fprintf(ofp, " Reads Queued: %s, %siB\t",
1796 size_cnv(x, ios->qreads, 0),
1797 size_cnv(y, ios->qread_kb + (ios->qread_b>>10), 1));
1798 fprintf(ofp, " Writes Queued: %s, %siB\n",
1799 size_cnv(x, ios->qwrites, 0),
1800 size_cnv(y, ios->qwrite_kb + (ios->qwrite_b>>10), 1));
1801 fprintf(ofp, " Read Dispatches: %s, %siB\t",
1802 size_cnv(x, ios->ireads, 0),
1803 size_cnv(y, ios->iread_kb + (ios->iread_b>>10), 1));
1804 fprintf(ofp, " Write Dispatches: %s, %siB\n",
1805 size_cnv(x, ios->iwrites, 0),
1806 size_cnv(y, ios->iwrite_kb + (ios->iwrite_b>>10), 1));
4054070a
JA
1807 fprintf(ofp, " Reads Requeued: %s\t\t", size_cnv(x, ios->rrqueue, 0));
1808 fprintf(ofp, " Writes Requeued: %s\n", size_cnv(x, ios->wrqueue, 0));
cd0ae0f6
ID
1809 fprintf(ofp, " Reads Completed: %s, %siB\t",
1810 size_cnv(x, ios->creads, 0),
1811 size_cnv(y, ios->cread_kb + (ios->cread_b>>10), 1));
1812 fprintf(ofp, " Writes Completed: %s, %siB\n",
1813 size_cnv(x, ios->cwrites, 0),
1814 size_cnv(y, ios->cwrite_kb + (ios->cwrite_b>>10), 1));
1815 fprintf(ofp, " Read Merges: %s, %siB\t",
1816 size_cnv(x, ios->mreads, 0),
1817 size_cnv(y, ios->mread_kb + (ios->mread_b>>10), 1));
1818 fprintf(ofp, " Write Merges: %s, %siB\n",
1819 size_cnv(x, ios->mwrites, 0),
1820 size_cnv(y, ios->mwrite_kb + (ios->mwrite_b>>10), 1));
649c7b66
JA
1821 if (pdi) {
1822 fprintf(ofp, " Read depth: %'8u%8c\t", pdi->max_depth[0], ' ');
1823 fprintf(ofp, " Write depth: %'8u\n", pdi->max_depth[1]);
1824 }
801646d6
CS
1825 if (ios->qreads_pc || ios->qwrites_pc || ios->ireads_pc || ios->iwrites_pc ||
1826 ios->rrqueue_pc || ios->wrqueue_pc || ios->creads_pc || ios->cwrites_pc) {
cd0ae0f6
ID
1827 fprintf(ofp, " PC Reads Queued: %s, %siB\t",
1828 size_cnv(x, ios->qreads_pc, 0),
1829 size_cnv(y,
1830 ios->qread_kb_pc + (ios->qread_b_pc>>10), 1));
1831 fprintf(ofp, " PC Writes Queued: %s, %siB\n",
1832 size_cnv(x, ios->qwrites_pc, 0),
1833 size_cnv(y,
1834 ios->qwrite_kb_pc + (ios->qwrite_b_pc>>10), 1));
1835 fprintf(ofp, " PC Read Disp.: %s, %siB\t",
1836 size_cnv(x, ios->ireads_pc, 0),
1837 size_cnv(y,
1838 ios->iread_kb_pc + (ios->iread_b_pc>>10), 1));
1839 fprintf(ofp, " PC Write Disp.: %s, %siB\n",
1840 size_cnv(x, ios->iwrites_pc, 0),
1841 size_cnv(y,
1842 ios->iwrite_kb_pc + (ios->iwrite_b_pc>>10),
1843 1));
801646d6
CS
1844 fprintf(ofp, " PC Reads Req.: %s\t\t", size_cnv(x, ios->rrqueue_pc, 0));
1845 fprintf(ofp, " PC Writes Req.: %s\n", size_cnv(x, ios->wrqueue_pc, 0));
1846 fprintf(ofp, " PC Reads Compl.: %s\t\t", size_cnv(x, ios->creads_pc, 0));
d0576a3a 1847 fprintf(ofp, " PC Writes Compl.: %s\n", size_cnv(x, ios->cwrites_pc, 0));
801646d6 1848 }
06639b27
JA
1849 fprintf(ofp, " IO unplugs: %'8lu%8c\t", ios->io_unplugs, ' ');
1850 fprintf(ofp, " Timer unplugs: %'8lu\n", ios->timer_unplugs);
5c017e4b
JA
1851}
1852
50adc0ba
JA
1853static void dump_wait_stats(struct per_process_info *ppi)
1854{
b9d40d6f
JA
1855 unsigned long rawait = ppi->longest_allocation_wait[0] / 1000;
1856 unsigned long rdwait = ppi->longest_dispatch_wait[0] / 1000;
1857 unsigned long rcwait = ppi->longest_completion_wait[0] / 1000;
1858 unsigned long wawait = ppi->longest_allocation_wait[1] / 1000;
1859 unsigned long wdwait = ppi->longest_dispatch_wait[1] / 1000;
1860 unsigned long wcwait = ppi->longest_completion_wait[1] / 1000;
1861
1862 fprintf(ofp, " Allocation wait: %'8lu%8c\t", rawait, ' ');
1863 fprintf(ofp, " Allocation wait: %'8lu\n", wawait);
1864 fprintf(ofp, " Dispatch wait: %'8lu%8c\t", rdwait, ' ');
1865 fprintf(ofp, " Dispatch wait: %'8lu\n", wdwait);
1866 fprintf(ofp, " Completion wait: %'8lu%8c\t", rcwait, ' ');
1867 fprintf(ofp, " Completion wait: %'8lu\n", wcwait);
50adc0ba
JA
1868}
1869
886ecf0e
JA
1870static int ppi_name_compare(const void *p1, const void *p2)
1871{
1872 struct per_process_info *ppi1 = *((struct per_process_info **) p1);
1873 struct per_process_info *ppi2 = *((struct per_process_info **) p2);
1874 int res;
1875
2990e589 1876 res = strverscmp(ppi1->ppm->comm, ppi2->ppm->comm);
886ecf0e 1877 if (!res)
2990e589 1878 res = ppi1->ppm->pid > ppi2->ppm->pid;
886ecf0e
JA
1879
1880 return res;
1881}
1882
a7263b8f
WZ
1883static int ppi_event_compare(const void *p1, const void *p2)
1884{
1885 struct per_process_info *ppi1 = *((struct per_process_info **) p1);
1886 struct per_process_info *ppi2 = *((struct per_process_info **) p2);
1887 struct io_stats *ios1 = &ppi1->io_stats;
1888 struct io_stats *ios2 = &ppi2->io_stats;
1889 unsigned long io1, io2;
1890 unsigned long long kb1,kb2;
1891 int sort_by_kb = 1;
1892
1893 io1 = io2 = 0;
1894 kb1 = kb2 = 0;
1895
1896 switch (per_process_stats_event) {
1897 case SORT_PROG_EVENT_QKB: /* KB: Queued read and write */
1898 kb1 = ios1->qwrite_kb + (ios1->qwrite_b>>10) +
1899 ios1->qread_kb + (ios1->qread_b>>10);
1900 kb2 = ios2->qwrite_kb + (ios2->qwrite_b>>10) +
1901 ios2->qread_kb + (ios2->qread_b>>10);
1902 break;
1903 case SORT_PROG_EVENT_RKB: /* KB: Queued Read */
1904 kb1 = ios1->qread_kb + (ios1->qread_b>>10);
1905 kb2 = ios2->qread_kb + (ios2->qread_b>>10);
1906 break;
1907 case SORT_PROG_EVENT_WKB: /* KB: Queued Write */
1908 kb1 = ios1->qwrite_kb + (ios1->qwrite_b>>10);
1909 kb2 = ios2->qwrite_kb + (ios2->qwrite_b>>10);
1910 break;
1911 case SORT_PROG_EVENT_CKB: /* KB: Complete */
1912 kb1 = ios1->cwrite_kb + (ios1->cwrite_b>>10) +
1913 ios1->cread_kb + (ios1->cread_b>>10);
1914 kb2 = ios2->cwrite_kb + (ios2->cwrite_b>>10) +
1915 ios2->cread_kb + (ios2->cread_b>>10);
1916 break;
1917 case SORT_PROG_EVENT_QIO: /* IO: Queued read and write */
1918 sort_by_kb = 0;
1919 io1 = ios1->qreads + ios1->qwrites;
1920 io2 = ios2->qreads + ios2->qwrites;
1921 break;
1922 case SORT_PROG_EVENT_RIO: /* IO: Queued Read */
1923 sort_by_kb = 0;
1924 io1 = ios1->qreads;
1925 io2 = ios2->qreads;
1926 break;
1927 case SORT_PROG_EVENT_WIO: /* IO: Queued Write */
1928 sort_by_kb = 0;
1929 io1 = ios1->qwrites;
1930 io2 = ios2->qwrites;
1931 break;
1932 case SORT_PROG_EVENT_CIO: /* IO: Complete */
1933 sort_by_kb = 0;
1934 io1 = ios1->creads + ios1->cwrites;
1935 io2 = ios2->creads + ios2->cwrites;
1936 break;
1937 }
1938
1939
1940 /* compare kb */
1941 if (sort_by_kb) {
1942 if (kb1 > kb2)
1943 return 1;
1944 else if (kb1 == kb2)
1945 return 0;
1946 return -1;
1947 }
1948
1949 /* compare io */
1950 if (io1 > io2)
1951 return 1;
1952 else if (io1 == io2)
1953 return 0;
1954 return -1;
1955}
1956
1957static int ppi_compare(const void *p1, const void *p2)
1958{
1959 if (per_process_stats_event == SORT_PROG_EVENT_N)
1960 return ppi_name_compare(p1, p2);
1961
1962 return ppi_event_compare(p1, p2);
1963}
1964
886ecf0e
JA
1965static void sort_process_list(void)
1966{
1967 struct per_process_info **ppis;
1968 struct per_process_info *ppi;
1969 int i = 0;
1970
1971 ppis = malloc(ppi_list_entries * sizeof(struct per_process_info *));
1972
1973 ppi = ppi_list;
1974 while (ppi) {
06e6f286 1975 ppis[i++] = ppi;
886ecf0e
JA
1976 ppi = ppi->list_next;
1977 }
1978
a7263b8f 1979 qsort(ppis, ppi_list_entries, sizeof(ppi), ppi_compare);
886ecf0e
JA
1980
1981 i = ppi_list_entries - 1;
1982 ppi_list = NULL;
1983 while (i >= 0) {
1984 ppi = ppis[i];
1985
1986 ppi->list_next = ppi_list;
1987 ppi_list = ppi;
1988 i--;
1989 }
50c38702
JA
1990
1991 free(ppis);
886ecf0e
JA
1992}
1993
152f6476
JA
1994static void show_process_stats(void)
1995{
1996 struct per_process_info *ppi;
1997
886ecf0e
JA
1998 sort_process_list();
1999
152f6476
JA
2000 ppi = ppi_list;
2001 while (ppi) {
2990e589 2002 struct process_pid_map *ppm = ppi->ppm;
ce8b6b4f
JA
2003 char name[64];
2004
715d8021 2005 if (ppi->more_than_one)
2990e589 2006 sprintf(name, "%s (%u, ...)", ppm->comm, ppm->pid);
715d8021 2007 else
2990e589 2008 sprintf(name, "%s (%u)", ppm->comm, ppm->pid);
bf0720af 2009
649c7b66 2010 dump_io_stats(NULL, &ppi->io_stats, name);
50adc0ba 2011 dump_wait_stats(ppi);
152f6476
JA
2012 ppi = ppi->list_next;
2013 }
2014
2015 fprintf(ofp, "\n");
2016}
2017
e7c9f3ff 2018static void show_device_and_cpu_stats(void)
d0ca268b 2019{
e7c9f3ff
NS
2020 struct per_dev_info *pdi;
2021 struct per_cpu_info *pci;
2022 struct io_stats total, *ios;
20ed6177 2023 unsigned long long rrate, wrate, msec;
e7c9f3ff
NS
2024 int i, j, pci_events;
2025 char line[3 + 8/*cpu*/ + 2 + 32/*dev*/ + 3];
2026 char name[32];
8a82e321 2027 double ratio;
e7c9f3ff
NS
2028
2029 for (pdi = devices, i = 0; i < ndevices; i++, pdi++) {
2030
2031 memset(&total, 0, sizeof(total));
2032 pci_events = 0;
2033
2034 if (i > 0)
2035 fprintf(ofp, "\n");
2036
2037 for (pci = pdi->cpus, j = 0; j < pdi->ncpus; j++, pci++) {
2038 if (!pci->nelems)
2039 continue;
2040
2041 ios = &pci->io_stats;
2042 total.qreads += ios->qreads;
2043 total.qwrites += ios->qwrites;
2044 total.creads += ios->creads;
2045 total.cwrites += ios->cwrites;
2046 total.mreads += ios->mreads;
2047 total.mwrites += ios->mwrites;
2048 total.ireads += ios->ireads;
2049 total.iwrites += ios->iwrites;
4054070a
JA
2050 total.rrqueue += ios->rrqueue;
2051 total.wrqueue += ios->wrqueue;
e7c9f3ff
NS
2052 total.qread_kb += ios->qread_kb;
2053 total.qwrite_kb += ios->qwrite_kb;
2054 total.cread_kb += ios->cread_kb;
2055 total.cwrite_kb += ios->cwrite_kb;
2056 total.iread_kb += ios->iread_kb;
2057 total.iwrite_kb += ios->iwrite_kb;
fb2ec796
JA
2058 total.mread_kb += ios->mread_kb;
2059 total.mwrite_kb += ios->mwrite_kb;
cd0ae0f6
ID
2060 total.qread_b += ios->qread_b;
2061 total.qwrite_b += ios->qwrite_b;
2062 total.cread_b += ios->cread_b;
2063 total.cwrite_b += ios->cwrite_b;
2064 total.iread_b += ios->iread_b;
2065 total.iwrite_b += ios->iwrite_b;
2066 total.mread_b += ios->mread_b;
2067 total.mwrite_b += ios->mwrite_b;
801646d6
CS
2068
2069 total.qreads_pc += ios->qreads_pc;
2070 total.qwrites_pc += ios->qwrites_pc;
2071 total.creads_pc += ios->creads_pc;
2072 total.cwrites_pc += ios->cwrites_pc;
2073 total.ireads_pc += ios->ireads_pc;
2074 total.iwrites_pc += ios->iwrites_pc;
2075 total.rrqueue_pc += ios->rrqueue_pc;
2076 total.wrqueue_pc += ios->wrqueue_pc;
2077 total.qread_kb_pc += ios->qread_kb_pc;
2078 total.qwrite_kb_pc += ios->qwrite_kb_pc;
2079 total.iread_kb_pc += ios->iread_kb_pc;
2080 total.iwrite_kb_pc += ios->iwrite_kb_pc;
cd0ae0f6
ID
2081 total.qread_b_pc += ios->qread_b_pc;
2082 total.qwrite_b_pc += ios->qwrite_b_pc;
2083 total.iread_b_pc += ios->iread_b_pc;
2084 total.iwrite_b_pc += ios->iwrite_b_pc;
801646d6 2085
06639b27
JA
2086 total.timer_unplugs += ios->timer_unplugs;
2087 total.io_unplugs += ios->io_unplugs;
e7c9f3ff
NS
2088
2089 snprintf(line, sizeof(line) - 1, "CPU%d (%s):",
2090 j, get_dev_name(pdi, name, sizeof(name)));
649c7b66 2091 dump_io_stats(pdi, ios, line);
e7c9f3ff
NS
2092 pci_events++;
2093 }
5c017e4b 2094
e7c9f3ff
NS
2095 if (pci_events > 1) {
2096 fprintf(ofp, "\n");
2097 snprintf(line, sizeof(line) - 1, "Total (%s):",
2098 get_dev_name(pdi, name, sizeof(name)));
649c7b66 2099 dump_io_stats(NULL, &total, line);
e7c9f3ff 2100 }
d0ca268b 2101
20ed6177 2102 wrate = rrate = 0;
20ed6177
JA
2103 msec = (pdi->last_reported_time - pdi->first_reported_time) / 1000000;
2104 if (msec) {
cd0ae0f6
ID
2105 rrate = ((1000 * total.cread_kb) + total.cread_b) /
2106 msec;
2107 wrate = ((1000 * total.cwrite_kb) + total.cwrite_b) /
2108 msec;
20ed6177
JA
2109 }
2110
dce0f678
AB
2111 fprintf(ofp, "\nThroughput (R/W): %'LuKiB/s / %'LuKiB/s\n",
2112 rrate, wrate);
2113 fprintf(ofp, "Events (%s): %'Lu entries\n",
2114 get_dev_name(pdi, line, sizeof(line)), pdi->events);
492da111
AB
2115
2116 collect_pdi_skips(pdi);
8a82e321
MZ
2117 if (!pdi->skips && !pdi->events)
2118 ratio = 0.0;
2119 else
2120 ratio = 100.0 * ((double)pdi->seq_skips /
2121 (double)(pdi->events + pdi->seq_skips));
492da111 2122 fprintf(ofp, "Skips: %'lu forward (%'llu - %5.1lf%%)\n",
8a82e321 2123 pdi->skips, pdi->seq_skips, ratio);
e7c9f3ff 2124 }
d0ca268b
JA
2125}
2126
d025d6c6
HM
2127static void correct_abs_start_time(void)
2128{
2129 long delta = genesis_time - start_timestamp;
2130
2131 abs_start_time.tv_sec += SECONDS(delta);
2132 abs_start_time.tv_nsec += NANO_SECONDS(delta);
2133 if (abs_start_time.tv_nsec < 0) {
2134 abs_start_time.tv_nsec += 1000000000;
2135 abs_start_time.tv_sec -= 1;
2136 } else
2137 if (abs_start_time.tv_nsec > 1000000000) {
2138 abs_start_time.tv_nsec -= 1000000000;
2139 abs_start_time.tv_sec += 1;
2140 }
2141}
2142
4f0ae44f
JA
2143static void find_genesis(void)
2144{
2145 struct trace *t = trace_list;
2146
2147 genesis_time = -1ULL;
2148 while (t != NULL) {
2149 if (t->bit->time < genesis_time)
2150 genesis_time = t->bit->time;
2151
2152 t = t->next;
2153 }
7bd4fd0a
OK
2154
2155 /* The time stamp record will usually be the first
2156 * record in the trace, but not always.
2157 */
2158 if (start_timestamp
2159 && start_timestamp != genesis_time) {
d025d6c6 2160 correct_abs_start_time();
7bd4fd0a 2161 }
4f0ae44f
JA
2162}
2163
7f4d89e6 2164static inline int check_stopwatch(struct blk_io_trace *bit)
4f0ae44f 2165{
7f4d89e6
JA
2166 if (bit->time < stopwatch_end &&
2167 bit->time >= stopwatch_start)
4f0ae44f
JA
2168 return 0;
2169
2170 return 1;
2171}
2172
53c68c88
JA
2173/*
2174 * return youngest entry read
2175 */
2176static int sort_entries(unsigned long long *youngest)
4f0ae44f 2177{
210824c3
JA
2178 struct per_dev_info *pdi = NULL;
2179 struct per_cpu_info *pci = NULL;
4f0ae44f 2180 struct trace *t;
4f0ae44f
JA
2181
2182 if (!genesis_time)
2183 find_genesis();
2184
d6222db8 2185 *youngest = 0;
4f0ae44f
JA
2186 while ((t = trace_list) != NULL) {
2187 struct blk_io_trace *bit = t->bit;
2188
2189 trace_list = t->next;
2190
7f4d89e6 2191 bit->time -= genesis_time;
4f0ae44f 2192
d6222db8
JA
2193 if (bit->time < *youngest || !*youngest)
2194 *youngest = bit->time;
2195
210824c3
JA
2196 if (!pdi || pdi->dev != bit->device) {
2197 pdi = get_dev_info(bit->device);
2198 pci = NULL;
2199 }
2200
2201 if (!pci || pci->cpu != bit->cpu)
2202 pci = get_cpu_info(pdi, bit->cpu);
2203
2204 if (bit->sequence < pci->smallest_seq_read)
2205 pci->smallest_seq_read = bit->sequence;
774a1a10 2206
7f4d89e6 2207 if (check_stopwatch(bit)) {
4f0ae44f
JA
2208 bit_free(bit);
2209 t_free(t);
2210 continue;
2211 }
2212
2a1b3424 2213 if (trace_rb_insert_sort(t))
53c68c88 2214 return -1;
4f0ae44f
JA
2215 }
2216
53c68c88 2217 return 0;
4f0ae44f
JA
2218}
2219
824c2b39
JA
2220/*
2221 * to continue, we must have traces from all online cpus in the tree
2222 */
2223static int check_cpu_map(struct per_dev_info *pdi)
2224{
2225 unsigned long *cpu_map;
2226 struct rb_node *n;
2227 struct trace *__t;
2228 unsigned int i;
2229 int ret, cpu;
2230
2231 /*
2232 * create a map of the cpus we have traces for
2233 */
2234 cpu_map = malloc(pdi->cpu_map_max / sizeof(long));
cd992d08 2235 memset(cpu_map, 0, sizeof(*cpu_map));
824c2b39
JA
2236 n = rb_first(&rb_sort_root);
2237 while (n) {
2238 __t = rb_entry(n, struct trace, rb_node);
2239 cpu = __t->bit->cpu;
2240
2241 cpu_map[CPU_IDX(cpu)] |= (1UL << CPU_BIT(cpu));
2242 n = rb_next(n);
2243 }
2244
2245 /*
b1c8e614
JA
2246 * we can't continue if pdi->cpu_map has entries set that we don't
2247 * have in the sort rbtree. the opposite is not a problem, though
824c2b39
JA
2248 */
2249 ret = 0;
2250 for (i = 0; i < pdi->cpu_map_max / CPUS_PER_LONG; i++) {
2251 if (pdi->cpu_map[i] & ~(cpu_map[i])) {
2252 ret = 1;
2253 break;
2254 }
2255 }
2256
2257 free(cpu_map);
2258 return ret;
2259}
2260
a141a7cd 2261static int check_sequence(struct per_dev_info *pdi, struct trace *t, int force)
2a1b3424 2262{
1ca323a5 2263 struct blk_io_trace *bit = t->bit;
210824c3
JA
2264 unsigned long expected_sequence;
2265 struct per_cpu_info *pci;
1ca323a5 2266 struct trace *__t;
492da111 2267
210824c3
JA
2268 pci = get_cpu_info(pdi, bit->cpu);
2269 expected_sequence = pci->last_sequence + 1;
2270
774a1a10 2271 if (!expected_sequence) {
774a1a10
JA
2272 /*
2273 * 1 should be the first entry, just allow it
2274 */
2275 if (bit->sequence == 1)
2276 return 0;
210824c3 2277 if (bit->sequence == pci->smallest_seq_read)
79ee9704 2278 return 0;
774a1a10 2279
824c2b39 2280 return check_cpu_map(pdi);
774a1a10 2281 }
2a1b3424
JA
2282
2283 if (bit->sequence == expected_sequence)
2284 return 0;
2285
2a1b3424 2286 /*
1c7c54aa
JA
2287 * we may not have seen that sequence yet. if we are not doing
2288 * the final run, break and wait for more entries.
1c24add6 2289 */
210824c3
JA
2290 if (expected_sequence < pci->smallest_seq_read) {
2291 __t = trace_rb_find_last(pdi, pci, expected_sequence);
1ca323a5 2292 if (!__t)
1c7c54aa 2293 goto skip;
2a1b3424 2294
1ca323a5 2295 __put_trace_last(pdi, __t);
2a1b3424 2296 return 0;
a141a7cd
JA
2297 } else if (!force) {
2298 return 1;
0b07f23e 2299 } else {
1c7c54aa 2300skip:
66930177 2301 if (check_current_skips(pci, bit->sequence))
492da111
AB
2302 return 0;
2303
965eca2d 2304 if (expected_sequence < bit->sequence)
66930177 2305 insert_skip(pci, expected_sequence, bit->sequence - 1);
1c7c54aa
JA
2306 return 0;
2307 }
2a1b3424
JA
2308}
2309
a649216c 2310static void show_entries_rb(int force)
8fc0abbc 2311{
1f7afa72
JA
2312 struct per_dev_info *pdi = NULL;
2313 struct per_cpu_info *pci = NULL;
8fc0abbc 2314 struct blk_io_trace *bit;
3aabcd89 2315 struct rb_node *n;
8fc0abbc 2316 struct trace *t;
1f7afa72 2317
7d747d22 2318 while ((n = rb_first(&rb_sort_root)) != NULL) {
dd90748f 2319 if (is_done() && !force && !pipeline)
1f7afa72 2320 break;
8fc0abbc
JA
2321
2322 t = rb_entry(n, struct trace, rb_node);
2323 bit = t->bit;
2324
a43c1c17
JA
2325 if (read_sequence - t->read_sequence < 1 && !force)
2326 break;
2327
210824c3 2328 if (!pdi || pdi->dev != bit->device) {
287fa3d6 2329 pdi = get_dev_info(bit->device);
210824c3
JA
2330 pci = NULL;
2331 }
1f7afa72 2332
e7c9f3ff
NS
2333 if (!pdi) {
2334 fprintf(stderr, "Unknown device ID? (%d,%d)\n",
2335 MAJOR(bit->device), MINOR(bit->device));
2336 break;
2337 }
1f7afa72 2338
7238673f 2339 if (!((bit->action & ~__BLK_TN_CGROUP) == BLK_TN_MESSAGE) &&
9bf422b1 2340 check_sequence(pdi, t, force))
a141a7cd 2341 break;
cb2a1a62 2342
a141a7cd
JA
2343 if (!force && bit->time > last_allowed_time)
2344 break;
8fc0abbc 2345
4f0ae44f 2346 check_time(pdi, bit);
8fc0abbc 2347
4f0ae44f
JA
2348 if (!pci || pci->cpu != bit->cpu)
2349 pci = get_cpu_info(pdi, bit->cpu);
287fa3d6 2350
7238673f 2351 if (!((bit->action & ~__BLK_TN_CGROUP) == BLK_TN_MESSAGE))
9bf422b1 2352 pci->last_sequence = bit->sequence;
210824c3 2353
cbc927b6
JA
2354 pci->nelems++;
2355
66930177 2356 if (bit->action & (act_mask << BLK_TC_SHIFT))
98f8386b 2357 dump_trace(bit, pci, pdi);
87b72777 2358
2a1b3424 2359 put_trace(pdi, t);
cb2a1a62 2360 }
8fc0abbc
JA
2361}
2362
c0e0dbc2 2363static int read_data(int fd, void *buffer, int bytes, int block, int *fdblock)
1f79c4a0
JA
2364{
2365 int ret, bytes_left, fl;
2366 void *p;
2367
c0e0dbc2
JA
2368 if (block != *fdblock) {
2369 fl = fcntl(fd, F_GETFL);
1f79c4a0 2370
c0e0dbc2
JA
2371 if (!block) {
2372 *fdblock = 0;
2373 fcntl(fd, F_SETFL, fl | O_NONBLOCK);
2374 } else {
2375 *fdblock = 1;
2376 fcntl(fd, F_SETFL, fl & ~O_NONBLOCK);
2377 }
2378 }
1f79c4a0
JA
2379
2380 bytes_left = bytes;
2381 p = buffer;
2382 while (bytes_left > 0) {
2383 ret = read(fd, p, bytes_left);
2384 if (!ret)
2385 return 1;
2386 else if (ret < 0) {
db7e0552 2387 if (errno != EAGAIN) {
1f79c4a0 2388 perror("read");
db7e0552
JA
2389 return -1;
2390 }
a649216c 2391
5c0f40f7
JA
2392 /*
2393 * never do partial reads. we can return if we
2394 * didn't read anything and we should not block,
2395 * otherwise wait for data
2396 */
2397 if ((bytes_left == bytes) && !block)
2398 return 1;
2399
2400 usleep(10);
2401 continue;
1f79c4a0
JA
2402 } else {
2403 p += ret;
2404 bytes_left -= ret;
2405 }
2406 }
2407
2408 return 0;
2409}
2410
017d1660
JA
2411static inline __u16 get_pdulen(struct blk_io_trace *bit)
2412{
2413 if (data_is_native)
2414 return bit->pdu_len;
2415
2416 return __bswap_16(bit->pdu_len);
2417}
2418
2419static inline __u32 get_magic(struct blk_io_trace *bit)
2420{
2421 if (data_is_native)
2422 return bit->magic;
2423
2424 return __bswap_32(bit->magic);
2425}
2426
c0e0dbc2 2427static int read_events(int fd, int always_block, int *fdblock)
cb2a1a62 2428{
287fa3d6 2429 struct per_dev_info *pdi = NULL;
e820abd7 2430 unsigned int events = 0;
7d747d22
JA
2431
2432 while (!is_done() && events < rb_batch) {
2433 struct blk_io_trace *bit;
2434 struct trace *t;
db7e0552 2435 int pdu_len, should_block, ret;
7d747d22
JA
2436 __u32 magic;
2437
d36421e4 2438 bit = bit_alloc();
cb2a1a62 2439
c0e0dbc2
JA
2440 should_block = !events || always_block;
2441
db7e0552
JA
2442 ret = read_data(fd, bit, sizeof(*bit), should_block, fdblock);
2443 if (ret) {
eb9bd4e9 2444 bit_free(bit);
db7e0552
JA
2445 if (!events && ret < 0)
2446 events = ret;
cb2a1a62 2447 break;
eb9bd4e9 2448 }
cb2a1a62 2449
017d1660
JA
2450 /*
2451 * look at first trace to check whether we need to convert
2452 * data in the future
2453 */
9e4cd1b8 2454 if (data_is_native == -1 && check_data_endianness(bit->magic))
017d1660
JA
2455 break;
2456
2457 magic = get_magic(bit);
7d747d22
JA
2458 if ((magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
2459 fprintf(stderr, "Bad magic %x\n", magic);
2460 break;
2461 }
2462
017d1660 2463 pdu_len = get_pdulen(bit);
7d747d22
JA
2464 if (pdu_len) {
2465 void *ptr = realloc(bit, sizeof(*bit) + pdu_len);
2466
c0e0dbc2 2467 if (read_data(fd, ptr + sizeof(*bit), pdu_len, 1, fdblock)) {
eb9bd4e9 2468 bit_free(ptr);
7d747d22 2469 break;
eb9bd4e9 2470 }
7d747d22
JA
2471
2472 bit = ptr;
2473 }
2474
d6222db8
JA
2475 trace_to_cpu(bit);
2476
2477 if (verify_trace(bit)) {
2478 bit_free(bit);
2479 continue;
2480 }
2481
bfc70ad5
JA
2482 /*
2483 * not a real trace, so grab and handle it here
2484 */
7238673f 2485 if (bit->action & BLK_TC_ACT(BLK_TC_NOTIFY) && (bit->action & ~__BLK_TN_CGROUP) != BLK_TN_MESSAGE) {
7bd4fd0a 2486 handle_notify(bit);
a2594911 2487 output_binary(bit, sizeof(*bit) + bit->pdu_len);
bfc70ad5
JA
2488 continue;
2489 }
2490
d36421e4 2491 t = t_alloc();
cb2a1a62
JA
2492 memset(t, 0, sizeof(*t));
2493 t->bit = bit;
a43c1c17 2494 t->read_sequence = read_sequence;
cb2a1a62 2495
7d747d22
JA
2496 t->next = trace_list;
2497 trace_list = t;
1f7afa72 2498
f7bd1a9b 2499 if (!pdi || pdi->dev != bit->device)
287fa3d6
JA
2500 pdi = get_dev_info(bit->device);
2501
2502 if (bit->time > pdi->last_read_time)
2503 pdi->last_read_time = bit->time;
2504
7d747d22 2505 events++;
cb2a1a62
JA
2506 }
2507
7d747d22 2508 return events;
cb2a1a62
JA
2509}
2510
70317a16
AB
2511/*
2512 * Managing input streams
2513 */
2514
2515struct ms_stream {
2516 struct ms_stream *next;
2517 struct trace *first, *last;
73877e12 2518 struct per_dev_info *pdi;
70317a16
AB
2519 unsigned int cpu;
2520};
d0ca268b 2521
70317a16 2522#define MS_HASH(d, c) ((MAJOR(d) & 0xff) ^ (MINOR(d) & 0xff) ^ (cpu & 0xff))
73877e12 2523
70317a16
AB
2524struct ms_stream *ms_head;
2525struct ms_stream *ms_hash[256];
87b72777 2526
70317a16
AB
2527static void ms_sort(struct ms_stream *msp);
2528static int ms_prime(struct ms_stream *msp);
2529
2530static inline struct trace *ms_peek(struct ms_stream *msp)
2531{
2532 return (msp == NULL) ? NULL : msp->first;
2533}
d0ca268b 2534
70317a16
AB
2535static inline __u64 ms_peek_time(struct ms_stream *msp)
2536{
2537 return ms_peek(msp)->bit->time;
2538}
d1d7f15f 2539
70317a16
AB
2540static inline void ms_resort(struct ms_stream *msp)
2541{
2542 if (msp->next && ms_peek_time(msp) > ms_peek_time(msp->next)) {
2543 ms_head = msp->next;
2544 msp->next = NULL;
2545 ms_sort(msp);
2546 }
2547}
e7c9f3ff 2548
70317a16
AB
2549static inline void ms_deq(struct ms_stream *msp)
2550{
2551 msp->first = msp->first->next;
2552 if (!msp->first) {
2553 msp->last = NULL;
2554 if (!ms_prime(msp)) {
2555 ms_head = msp->next;
2556 msp->next = NULL;
2557 return;
d0ca268b 2558 }
d5396421
JA
2559 }
2560
70317a16
AB
2561 ms_resort(msp);
2562}
53c68c88 2563
70317a16
AB
2564static void ms_sort(struct ms_stream *msp)
2565{
2566 __u64 msp_t = ms_peek_time(msp);
2567 struct ms_stream *this_msp = ms_head;
d5396421 2568
70317a16
AB
2569 if (this_msp == NULL)
2570 ms_head = msp;
2571 else if (msp_t < ms_peek_time(this_msp)) {
2572 msp->next = this_msp;
2573 ms_head = msp;
2574 }
2575 else {
2576 while (this_msp->next && ms_peek_time(this_msp->next) < msp_t)
2577 this_msp = this_msp->next;
73877e12 2578
70317a16
AB
2579 msp->next = this_msp->next;
2580 this_msp->next = msp;
2581 }
2582}
d5396421 2583
70317a16
AB
2584static int ms_prime(struct ms_stream *msp)
2585{
2586 __u32 magic;
2587 unsigned int i;
2588 struct trace *t;
2589 struct per_dev_info *pdi = msp->pdi;
2590 struct per_cpu_info *pci = get_cpu_info(pdi, msp->cpu);
2591 struct blk_io_trace *bit = NULL;
2592 int ret, pdu_len, ndone = 0;
d5396421 2593
70317a16
AB
2594 for (i = 0; !is_done() && pci->fd >= 0 && i < rb_batch; i++) {
2595 bit = bit_alloc();
2596 ret = read_data(pci->fd, bit, sizeof(*bit), 1, &pci->fdblock);
2597 if (ret)
2598 goto err;
51128a28 2599
70317a16
AB
2600 if (data_is_native == -1 && check_data_endianness(bit->magic))
2601 goto err;
210824c3 2602
70317a16
AB
2603 magic = get_magic(bit);
2604 if ((magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
2605 fprintf(stderr, "Bad magic %x\n", magic);
2606 goto err;
d5396421 2607
70317a16 2608 }
d5396421 2609
70317a16
AB
2610 pdu_len = get_pdulen(bit);
2611 if (pdu_len) {
2612 void *ptr = realloc(bit, sizeof(*bit) + pdu_len);
2613 ret = read_data(pci->fd, ptr + sizeof(*bit), pdu_len,
2614 1, &pci->fdblock);
2615 if (ret) {
2616 free(ptr);
4eb899a6 2617 bit = NULL;
70317a16 2618 goto err;
7d747d22 2619 }
70317a16
AB
2620
2621 bit = ptr;
2ff323b0 2622 }
d5396421 2623
70317a16
AB
2624 trace_to_cpu(bit);
2625 if (verify_trace(bit))
2626 goto err;
53c68c88 2627
13d48592
TM
2628 if (bit->cpu != pci->cpu) {
2629 fprintf(stderr, "cpu %d trace info has error cpu %d\n",
2630 pci->cpu, bit->cpu);
2631 continue;
2632 }
2633
7238673f 2634 if (bit->action & BLK_TC_ACT(BLK_TC_NOTIFY) && (bit->action & ~__BLK_TN_CGROUP) != BLK_TN_MESSAGE) {
7bd4fd0a 2635 handle_notify(bit);
70317a16
AB
2636 output_binary(bit, sizeof(*bit) + bit->pdu_len);
2637 bit_free(bit);
287fa3d6 2638
70317a16
AB
2639 i -= 1;
2640 continue;
2641 }
cb2a1a62 2642
70317a16
AB
2643 if (bit->time > pdi->last_read_time)
2644 pdi->last_read_time = bit->time;
d5396421 2645
70317a16
AB
2646 t = t_alloc();
2647 memset(t, 0, sizeof(*t));
2648 t->bit = bit;
2649
2650 if (msp->first == NULL)
2651 msp->first = msp->last = t;
2652 else {
2653 msp->last->next = t;
2654 msp->last = t;
2655 }
2656
2657 ndone++;
2658 }
2659
2660 return ndone;
2661
2662err:
2663 if (bit) bit_free(bit);
2664
2665 cpu_mark_offline(pdi, pci->cpu);
2666 close(pci->fd);
2667 pci->fd = -1;
2668
2669 return ndone;
2670}
2671
2672static struct ms_stream *ms_alloc(struct per_dev_info *pdi, int cpu)
2673{
2674 struct ms_stream *msp = malloc(sizeof(*msp));
2675
2676 msp->next = NULL;
2677 msp->first = msp->last = NULL;
2678 msp->pdi = pdi;
2679 msp->cpu = cpu;
2680
2681 if (ms_prime(msp))
2682 ms_sort(msp);
2683
2684 return msp;
2685}
2686
2687static int setup_file(struct per_dev_info *pdi, int cpu)
2688{
2689 int len = 0;
2690 struct stat st;
2691 char *p, *dname;
2692 struct per_cpu_info *pci = get_cpu_info(pdi, cpu);
2693
2694 pci->cpu = cpu;
2695 pci->fdblock = -1;
2696
2697 p = strdup(pdi->name);
2698 dname = dirname(p);
2699 if (strcmp(dname, ".")) {
2700 input_dir = dname;
2701 p = strdup(pdi->name);
2702 strcpy(pdi->name, basename(p));
2703 }
2704 free(p);
2705
2706 if (input_dir)
2707 len = sprintf(pci->fname, "%s/", input_dir);
2708
2709 snprintf(pci->fname + len, sizeof(pci->fname)-1-len,
2710 "%s.blktrace.%d", pdi->name, pci->cpu);
8afe3d7d 2711 if (stat(pci->fname, &st) < 0)
70317a16 2712 return 0;
8afe3d7d
AB
2713 if (!st.st_size)
2714 return 1;
70317a16
AB
2715
2716 pci->fd = open(pci->fname, O_RDONLY);
2717 if (pci->fd < 0) {
2718 perror(pci->fname);
2719 return 0;
2720 }
2721
2722 printf("Input file %s added\n", pci->fname);
2723 cpu_mark_online(pdi, pci->cpu);
2724
2725 pdi->nfiles++;
2726 ms_alloc(pdi, pci->cpu);
2727
2728 return 1;
2729}
2730
2731static int handle(struct ms_stream *msp)
2732{
2733 struct trace *t;
2734 struct per_dev_info *pdi;
2735 struct per_cpu_info *pci;
2736 struct blk_io_trace *bit;
2737
2738 t = ms_peek(msp);
70317a16
AB
2739
2740 bit = t->bit;
2741 pdi = msp->pdi;
2742 pci = get_cpu_info(pdi, msp->cpu);
2743 pci->nelems++;
8091de93 2744 bit->time -= genesis_time;
7072ee3f
LU
2745
2746 if (t->bit->time > stopwatch_end)
2747 return 0;
2748
8091de93 2749 pdi->last_reported_time = bit->time;
7072ee3f
LU
2750 if ((bit->action & (act_mask << BLK_TC_SHIFT))&&
2751 t->bit->time >= stopwatch_start)
70317a16
AB
2752 dump_trace(bit, pci, pdi);
2753
2754 ms_deq(msp);
2755
2756 if (text_output)
2757 trace_rb_insert_last(pdi, t);
2758 else {
2759 bit_free(t->bit);
2760 t_free(t);
2761 }
2762
2763 return 1;
2764}
2765
7d340756
MZ
2766/*
2767 * Check if we need to sanitize the name. We allow 'foo', or if foo.blktrace.X
2768 * is given, then strip back down to 'foo' to avoid missing files.
2769 */
2770static int name_fixup(char *name)
2771{
2772 char *b;
2773
2774 if (!name)
2775 return 1;
2776
2777 b = strstr(name, ".blktrace.");
2778 if (b)
2779 *b = '\0';
2780
2781 return 0;
2782}
2783
70317a16
AB
2784static int do_file(void)
2785{
7d340756 2786 int i, cpu, ret;
70317a16
AB
2787 struct per_dev_info *pdi;
2788
2789 /*
2790 * first prepare all files for reading
2791 */
2792 for (i = 0; i < ndevices; i++) {
2793 pdi = &devices[i];
7d340756
MZ
2794 ret = name_fixup(pdi->name);
2795 if (ret)
2796 return ret;
2797
70317a16
AB
2798 for (cpu = 0; setup_file(pdi, cpu); cpu++)
2799 ;
a2b1f355
ES
2800
2801 if (!cpu) {
2802 fprintf(stderr,"No input files found for %s\n",
2803 pdi->name);
2804 return 1;
2805 }
70317a16
AB
2806 }
2807
8091de93
AB
2808 /*
2809 * Get the initial time stamp
2810 */
2811 if (ms_head)
2812 genesis_time = ms_peek_time(ms_head);
2813
918e9797
HM
2814 /*
2815 * Correct abs_start_time if necessary
2816 */
2817 if (start_timestamp
2818 && start_timestamp != genesis_time) {
2819 correct_abs_start_time();
2820 }
2821
70317a16
AB
2822 /*
2823 * Keep processing traces while any are left
2824 */
2825 while (!is_done() && ms_head && handle(ms_head))
2826 ;
a649216c 2827
7d747d22 2828 return 0;
412819ce 2829}
d5396421 2830
67076cbc 2831static void do_pipe(int fd)
412819ce 2832{
53c68c88 2833 unsigned long long youngest;
67076cbc 2834 int events, fdblock;
d5396421 2835
be925321 2836 last_allowed_time = -1ULL;
c0e0dbc2 2837 fdblock = -1;
db7e0552 2838 while ((events = read_events(fd, 0, &fdblock)) > 0) {
4ab42801 2839 read_sequence++;
412819ce 2840
210824c3 2841#if 0
0b07f23e 2842 smallest_seq_read = -1U;
210824c3 2843#endif
0b07f23e 2844
53c68c88
JA
2845 if (sort_entries(&youngest))
2846 break;
2847
2848 if (youngest > stopwatch_end)
2ff323b0
JA
2849 break;
2850
763d936e 2851 show_entries_rb(0);
0b07f23e 2852 }
d5396421 2853
a649216c
JA
2854 if (rb_sort_entries)
2855 show_entries_rb(1);
67076cbc
JA
2856}
2857
2858static int do_fifo(void)
2859{
2860 int fd;
2861
2862 if (!strcmp(pipename, "-"))
2863 fd = dup(STDIN_FILENO);
2864 else
2865 fd = open(pipename, O_RDONLY);
2866
2867 if (fd == -1) {
2868 perror("dup stdin");
2869 return -1;
2870 }
a649216c 2871
67076cbc 2872 do_pipe(fd);
d5396421 2873 close(fd);
d5396421
JA
2874 return 0;
2875}
d0ca268b 2876
cbc927b6 2877static void show_stats(void)
412819ce 2878{
cbc927b6
JA
2879 if (!ofp)
2880 return;
2881 if (stats_printed)
2882 return;
2883
2884 stats_printed = 1;
2885
2886 if (per_process_stats)
2887 show_process_stats();
2888
2889 if (per_device_and_cpu_stats)
2890 show_device_and_cpu_stats();
2891
152f6476 2892 fflush(ofp);
412819ce
JA
2893}
2894
e820abd7 2895static void handle_sigint(__attribute__((__unused__)) int sig)
412819ce
JA
2896{
2897 done = 1;
412819ce
JA
2898}
2899
46e6968b
NS
2900/*
2901 * Extract start and duration times from a string, allowing
2902 * us to specify a time interval of interest within a trace.
2903 * Format: "duration" (start is zero) or "start:duration".
2904 */
2905static int find_stopwatch_interval(char *string)
2906{
2907 double value;
2908 char *sp;
2909
2910 value = strtod(string, &sp);
2911 if (sp == string) {
2912 fprintf(stderr,"Invalid stopwatch timer: %s\n", string);
2913 return 1;
2914 }
2915 if (*sp == ':') {
2916 stopwatch_start = DOUBLE_TO_NANO_ULL(value);
2917 string = sp + 1;
2918 value = strtod(string, &sp);
2919 if (sp == string || *sp != '\0') {
2920 fprintf(stderr,"Invalid stopwatch duration time: %s\n",
2921 string);
2922 return 1;
2923 }
2924 } else if (*sp != '\0') {
2925 fprintf(stderr,"Invalid stopwatch start timer: %s\n", string);
2926 return 1;
2927 }
1b928247
JA
2928 stopwatch_end = DOUBLE_TO_NANO_ULL(value);
2929 if (stopwatch_end <= stopwatch_start) {
2930 fprintf(stderr, "Invalid stopwatch interval: %Lu -> %Lu\n",
2931 stopwatch_start, stopwatch_end);
2932 return 1;
2933 }
2934
46e6968b
NS
2935 return 0;
2936}
2937
67076cbc
JA
2938static int is_pipe(const char *str)
2939{
2940 struct stat st;
2941
2942 if (!strcmp(str, "-"))
2943 return 1;
2944 if (!stat(str, &st) && S_ISFIFO(st.st_mode))
2945 return 1;
2946
2947 return 0;
2948}
2949
a7263b8f
WZ
2950static int get_program_sort_event(const char *str)
2951{
2952 char evt = str[0];
2953
2954 switch (evt) {
2955 case 'N':
2956 per_process_stats_event = SORT_PROG_EVENT_N;
2957 break;
2958 case 'Q':
2959 per_process_stats_event = SORT_PROG_EVENT_QKB;
2960 break;
2961 case 'q':
2962 per_process_stats_event = SORT_PROG_EVENT_QIO;
2963 break;
2964 case 'R':
2965 per_process_stats_event = SORT_PROG_EVENT_RKB;
2966 break;
2967 case 'r':
2968 per_process_stats_event = SORT_PROG_EVENT_RIO;
2969 break;
2970 case 'W':
2971 per_process_stats_event = SORT_PROG_EVENT_WKB;
2972 break;
2973 case 'w':
2974 per_process_stats_event = SORT_PROG_EVENT_WIO;
2975 break;
2976 case 'C':
2977 per_process_stats_event = SORT_PROG_EVENT_CKB;
2978 break;
2979 case 'c':
2980 per_process_stats_event = SORT_PROG_EVENT_CIO;
2981 break;
2982 default:
2983 return 1;
2984 }
2985
2986 return 0;
2987}
2988
2989#define S_OPTS "a:A:b:D:d:f:F:hi:o:OqsS:tw:vVM"
234db09d
AB
2990static char usage_str[] = "\n\n" \
2991 "-i <file> | --input=<file>\n" \
2992 "[ -a <action field> | --act-mask=<action field> ]\n" \
2993 "[ -A <action mask> | --set-mask=<action mask> ]\n" \
2994 "[ -b <traces> | --batch=<traces> ]\n" \
2995 "[ -d <file> | --dump-binary=<file> ]\n" \
2996 "[ -D <dir> | --input-directory=<dir> ]\n" \
2997 "[ -f <format> | --format=<format> ]\n" \
2998 "[ -F <spec> | --format-spec=<spec> ]\n" \
2999 "[ -h | --hash-by-name ]\n" \
3000 "[ -o <file> | --output=<file> ]\n" \
3001 "[ -O | --no-text-output ]\n" \
3002 "[ -q | --quiet ]\n" \
3003 "[ -s | --per-program-stats ]\n" \
a7263b8f 3004 "[ -S <event> | --sort-program-stats=<event> ]\n" \
234db09d
AB
3005 "[ -t | --track-ios ]\n" \
3006 "[ -w <time> | --stopwatch=<time> ]\n" \
19cfaf3f 3007 "[ -M | --no-msgs\n" \
234db09d
AB
3008 "[ -v | --verbose ]\n" \
3009 "[ -V | --version ]\n\n" \
541c9bf6
ES
3010 "\t-a Only trace specified actions. See documentation\n" \
3011 "\t-A Give trace mask as a single value. See documentation\n" \
234db09d
AB
3012 "\t-b stdin read batching\n" \
3013 "\t-d Output file. If specified, binary data is written to file\n" \
d1d7f15f 3014 "\t-D Directory to prepend to input file names\n" \
234db09d
AB
3015 "\t-f Output format. Customize the output format. The format field\n" \
3016 "\t identifies can be found in the documentation\n" \
3017 "\t-F Format specification. Can be found in the documentation\n" \
3018 "\t-h Hash processes by name, not pid\n" \
3019 "\t-i Input file containing trace data, or '-' for stdin\n" \
52724a0e 3020 "\t-o Output file. If not given, output is stdout\n" \
234db09d
AB
3021 "\t-O Do NOT output text data\n" \
3022 "\t-q Quiet. Don't display any stats at the end of the trace\n" \
52724a0e 3023 "\t-s Show per-program io statistics\n" \
a7263b8f
WZ
3024 "\t-S Show per-program io statistics sorted by N/Q/q/R/r/W/w/C/c\n" \
3025 "\t N:Name, Q/q:Queued(read & write), R/r:Queued Read, W/w:Queued Write, C/c:Complete.\n" \
3026 "\t Sort programs by how much data(KB): Q,R,W,C.\n" \
3027 "\t Sort programs by how many IO operations: q,r,w,c.\n" \
3028 "\t if -S was used, the -s parameter will be ignored.\n" \
52724a0e
JA
3029 "\t-t Track individual ios. Will tell you the time a request took\n" \
3030 "\t to get queued, to get dispatched, and to get completed\n" \
52724a0e
JA
3031 "\t-w Only parse data between the given time interval in seconds.\n" \
3032 "\t If 'start' isn't given, blkparse defaults the start time to 0\n" \
19cfaf3f 3033 "\t-M Do not output messages to binary file\n" \
57ea8602
JA
3034 "\t-v More verbose for marginal errors\n" \
3035 "\t-V Print program version info\n\n";
52724a0e 3036
1f79c4a0
JA
3037static void usage(char *prog)
3038{
bc14c53f 3039 fprintf(stderr, "Usage: %s %s", prog, usage_str);
1f79c4a0
JA
3040}
3041
d5396421
JA
3042int main(int argc, char *argv[])
3043{
98f8386b 3044 int i, c, ret, mode;
98f8386b 3045 int act_mask_tmp = 0;
234db09d 3046 char *ofp_buffer = NULL;
346d8a74 3047 char *bin_ofp_buffer = NULL;
d5396421
JA
3048
3049 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) != -1) {
3050 switch (c) {
98f8386b
AB
3051 case 'a':
3052 i = find_mask_map(optarg);
3053 if (i < 0) {
3054 fprintf(stderr,"Invalid action mask %s\n",
3055 optarg);
3056 return 1;
3057 }
3058 act_mask_tmp |= i;
3059 break;
3060
3061 case 'A':
3062 if ((sscanf(optarg, "%x", &i) != 1) ||
3063 !valid_act_opt(i)) {
3064 fprintf(stderr,
3065 "Invalid set action mask %s/0x%x\n",
3066 optarg, i);
3067 return 1;
3068 }
3069 act_mask_tmp = i;
3070 break;
d5396421 3071 case 'i':
67076cbc 3072 if (is_pipe(optarg) && !pipeline) {
e7c9f3ff 3073 pipeline = 1;
67076cbc
JA
3074 pipename = strdup(optarg);
3075 } else if (resize_devices(optarg) != 0)
e7c9f3ff 3076 return 1;
d5396421 3077 break;
d1d7f15f
JA
3078 case 'D':
3079 input_dir = optarg;
3080 break;
d5396421 3081 case 'o':
66efebf8 3082 output_name = optarg;
d5396421 3083 break;
234db09d
AB
3084 case 'O':
3085 text_output = 0;
3086 break;
79f19470
JA
3087 case 'b':
3088 rb_batch = atoi(optarg);
3089 if (rb_batch <= 0)
3090 rb_batch = RB_BATCH_DEFAULT;
3091 break;
152f6476
JA
3092 case 's':
3093 per_process_stats = 1;
3094 break;
a7263b8f
WZ
3095 case 'S':
3096 per_process_stats = 1;
3097 if (get_program_sort_event(optarg))
3098 return 1;
3099 break;
7997c5b0
JA
3100 case 't':
3101 track_ios = 1;
3102 break;
1e1c60f1
NS
3103 case 'q':
3104 per_device_and_cpu_stats = 0;
3105 break;
46e6968b
NS
3106 case 'w':
3107 if (find_stopwatch_interval(optarg) != 0)
3108 return 1;
3109 break;
ab197ca7
AB
3110 case 'f':
3111 set_all_format_specs(optarg);
3112 break;
3113 case 'F':
3114 if (add_format_spec(optarg) != 0)
3115 return 1;
3116 break;
d915dee6 3117 case 'h':
715d8021 3118 ppi_hash_by_pid = 0;
bf0720af 3119 break;
52724a0e 3120 case 'v':
57ea8602
JA
3121 verbose++;
3122 break;
3123 case 'V':
52724a0e
JA
3124 printf("%s version %s\n", argv[0], blkparse_version);
3125 return 0;
a2594911
AB
3126 case 'd':
3127 dump_binary = optarg;
3128 break;
19cfaf3f
AB
3129 case 'M':
3130 bin_output_msgs = 0;
3131 break;
d5396421 3132 default:
1f79c4a0 3133 usage(argv[0]);
d5396421
JA
3134 return 1;
3135 }
d0ca268b
JA
3136 }
3137
e7c9f3ff 3138 while (optind < argc) {
67076cbc 3139 if (is_pipe(argv[optind]) && !pipeline) {
e7c9f3ff 3140 pipeline = 1;
00cd3044 3141 pipename = strdup(argv[optind]);
67076cbc 3142 } else if (resize_devices(argv[optind]) != 0)
e7c9f3ff
NS
3143 return 1;
3144 optind++;
3145 }
3146
3147 if (!pipeline && !ndevices) {
1f79c4a0 3148 usage(argv[0]);
d5396421
JA
3149 return 1;
3150 }
3151
98f8386b
AB
3152 if (act_mask_tmp != 0)
3153 act_mask = act_mask_tmp;
3154
7997c5b0 3155 memset(&rb_sort_root, 0, sizeof(rb_sort_root));
412819ce
JA
3156
3157 signal(SIGINT, handle_sigint);
3158 signal(SIGHUP, handle_sigint);
3159 signal(SIGTERM, handle_sigint);
d5396421 3160
d69db225
JA
3161 setlocale(LC_NUMERIC, "en_US");
3162
234db09d
AB
3163 if (text_output) {
3164 if (!output_name) {
3165 ofp = fdopen(STDOUT_FILENO, "w");
3166 mode = _IOLBF;
3167 } else {
c3ce73f5 3168 char ofname[PATH_MAX];
152f6476 3169
234db09d
AB
3170 snprintf(ofname, sizeof(ofname) - 1, "%s", output_name);
3171 ofp = fopen(ofname, "w");
3172 mode = _IOFBF;
3173 }
152f6476 3174
234db09d
AB
3175 if (!ofp) {
3176 perror("fopen");
3177 return 1;
3178 }
152f6476 3179
234db09d
AB
3180 ofp_buffer = malloc(4096);
3181 if (setvbuf(ofp, ofp_buffer, mode, 4096)) {
3182 perror("setvbuf");
3183 return 1;
3184 }
152f6476
JA
3185 }
3186
a2594911 3187 if (dump_binary) {
cf659442
JA
3188 if (!strcmp(dump_binary, "-"))
3189 dump_fp = stdout;
3190 else {
3191 dump_fp = fopen(dump_binary, "w");
3192 if (!dump_fp) {
3193 perror(dump_binary);
3194 dump_binary = NULL;
3195 return 1;
3196 }
a2594911 3197 }
346d8a74
AB
3198 bin_ofp_buffer = malloc(128 * 1024);
3199 if (setvbuf(dump_fp, bin_ofp_buffer, _IOFBF, 128 * 1024)) {
3200 perror("setvbuf binary");
3201 return 1;
3202 }
a2594911
AB
3203 }
3204
e7c9f3ff 3205 if (pipeline)
67076cbc 3206 ret = do_fifo();
d5396421
JA
3207 else
3208 ret = do_file();
3209
fb863d7c
MZ
3210 if (!ret)
3211 show_stats();
3212
c701176c
MP
3213 if (have_drv_data && !dump_binary)
3214 printf("\ndiscarded traces containing low-level device driver "
3215 "specific data (only available in binary output)\n");
3216
8091de93
AB
3217 if (ofp_buffer) {
3218 fflush(ofp);
234db09d 3219 free(ofp_buffer);
8091de93
AB
3220 }
3221 if (bin_ofp_buffer) {
3222 fflush(dump_fp);
346d8a74 3223 free(bin_ofp_buffer);
8091de93 3224 }
d5396421 3225 return ret;
d0ca268b 3226}