blkparse: fix incorrectly sized memset in check_cpu_map
[blktrace.git] / blkparse.c
CommitLineData
d956a2cd
JA
1/*
2 * block queue tracing parse application
3 *
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
46e37c55 5 * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
d956a2cd
JA
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 */
d0ca268b
JA
22#include <sys/types.h>
23#include <sys/stat.h>
24#include <unistd.h>
25#include <stdio.h>
26#include <fcntl.h>
27#include <stdlib.h>
a61c0677 28#include <stdarg.h>
8fc0abbc 29#include <string.h>
d5396421 30#include <getopt.h>
412819ce
JA
31#include <errno.h>
32#include <signal.h>
d69db225 33#include <locale.h>
6e0073ed 34#include <libgen.h>
dc24c67d 35#include <time.h>
d0ca268b 36
8fc0abbc
JA
37#include "blktrace.h"
38#include "rbtree.h"
bf0720af 39#include "jhash.h"
d0ca268b 40
366d30b9 41static char blkparse_version[] = "1.3.0";
52724a0e 42
492da111
AB
43struct skip_info {
44 unsigned long start, end;
45 struct skip_info *prev, *next;
46};
47
e7c9f3ff 48struct per_dev_info {
f7bd1a9b 49 dev_t dev;
e7c9f3ff
NS
50 char *name;
51
52 int backwards;
53 unsigned long long events;
20ed6177 54 unsigned long long first_reported_time;
e7c9f3ff 55 unsigned long long last_reported_time;
287fa3d6 56 unsigned long long last_read_time;
e7c9f3ff 57 struct io_stats io_stats;
2990e589
JA
58 unsigned long skips;
59 unsigned long long seq_skips;
649c7b66
JA
60 unsigned int max_depth[2];
61 unsigned int cur_depth[2];
e7c9f3ff 62
f7bd1a9b
JA
63 struct rb_root rb_track;
64
73877e12 65 int nfiles;
e7c9f3ff 66 int ncpus;
824c2b39
JA
67
68 unsigned long *cpu_map;
69 unsigned int cpu_map_max;
70
e7c9f3ff
NS
71 struct per_cpu_info *cpus;
72};
73
2990e589
JA
74/*
75 * some duplicated effort here, we can unify this hash and the ppi hash later
76 */
77struct process_pid_map {
78 pid_t pid;
79 char comm[16];
80 struct process_pid_map *hash_next, *list_next;
81};
82
83#define PPM_HASH_SHIFT (8)
84#define PPM_HASH_SIZE (1 << PPM_HASH_SHIFT)
85#define PPM_HASH_MASK (PPM_HASH_SIZE - 1)
86static struct process_pid_map *ppm_hash_table[PPM_HASH_SIZE];
87
152f6476 88struct per_process_info {
2990e589 89 struct process_pid_map *ppm;
152f6476
JA
90 struct io_stats io_stats;
91 struct per_process_info *hash_next, *list_next;
715d8021 92 int more_than_one;
50adc0ba
JA
93
94 /*
95 * individual io stats
96 */
b9d40d6f
JA
97 unsigned long long longest_allocation_wait[2];
98 unsigned long long longest_dispatch_wait[2];
99 unsigned long long longest_completion_wait[2];
d0ca268b
JA
100};
101
152f6476 102#define PPI_HASH_SHIFT (8)
bf0720af
JA
103#define PPI_HASH_SIZE (1 << PPI_HASH_SHIFT)
104#define PPI_HASH_MASK (PPI_HASH_SIZE - 1)
a7263b8f
WZ
105
106enum {
107 SORT_PROG_EVENT_N, /* Program Name */
108 SORT_PROG_EVENT_QKB, /* KB: Queued read and write */
109 SORT_PROG_EVENT_RKB, /* KB: Queued Read */
110 SORT_PROG_EVENT_WKB, /* KB: Queued Write */
111 SORT_PROG_EVENT_CKB, /* KB: Complete */
112 SORT_PROG_EVENT_QIO, /* IO: Queued read and write */
113 SORT_PROG_EVENT_RIO, /* IO: Queued Read */
114 SORT_PROG_EVENT_WIO, /* IO: Queued Write */
115 SORT_PROG_EVENT_CIO, /* IO: Complete */
116};
117
bf0720af 118static struct per_process_info *ppi_hash_table[PPI_HASH_SIZE];
152f6476 119static struct per_process_info *ppi_list;
886ecf0e 120static int ppi_list_entries;
152f6476 121
d5396421 122static struct option l_opts[] = {
98f8386b
AB
123 {
124 .name = "act-mask",
125 .has_arg = required_argument,
126 .flag = NULL,
127 .val = 'a'
128 },
129 {
130 .name = "set-mask",
131 .has_arg = required_argument,
132 .flag = NULL,
133 .val = 'A'
134 },
d5396421 135 {
234db09d 136 .name = "batch",
428683db 137 .has_arg = required_argument,
d5396421 138 .flag = NULL,
234db09d 139 .val = 'b'
d5396421
JA
140 },
141 {
234db09d 142 .name = "input-directory",
428683db 143 .has_arg = required_argument,
d5396421 144 .flag = NULL,
234db09d 145 .val = 'D'
d5396421 146 },
79f19470 147 {
234db09d 148 .name = "dump-binary",
428683db 149 .has_arg = required_argument,
79f19470 150 .flag = NULL,
234db09d 151 .val = 'd'
79f19470 152 },
152f6476 153 {
234db09d
AB
154 .name = "format",
155 .has_arg = required_argument,
152f6476 156 .flag = NULL,
234db09d 157 .val = 'f'
152f6476 158 },
7997c5b0 159 {
234db09d
AB
160 .name = "format-spec",
161 .has_arg = required_argument,
7997c5b0 162 .flag = NULL,
234db09d 163 .val = 'F'
7997c5b0 164 },
1e1c60f1 165 {
234db09d 166 .name = "hash-by-name",
428683db 167 .has_arg = no_argument,
1e1c60f1 168 .flag = NULL,
234db09d 169 .val = 'h'
1e1c60f1 170 },
46e6968b 171 {
234db09d 172 .name = "input",
428683db 173 .has_arg = required_argument,
46e6968b 174 .flag = NULL,
234db09d 175 .val = 'i'
46e6968b 176 },
19cfaf3f
AB
177 {
178 .name = "no-msgs",
179 .has_arg = no_argument,
180 .flag = NULL,
181 .val = 'M'
182 },
ab197ca7 183 {
234db09d 184 .name = "output",
428683db 185 .has_arg = required_argument,
ab197ca7 186 .flag = NULL,
234db09d 187 .val = 'o'
ab197ca7
AB
188 },
189 {
234db09d
AB
190 .name = "no-text-output",
191 .has_arg = no_argument,
ab197ca7 192 .flag = NULL,
234db09d 193 .val = 'O'
ab197ca7 194 },
bf0720af 195 {
234db09d 196 .name = "quiet",
bf0720af
JA
197 .has_arg = no_argument,
198 .flag = NULL,
234db09d 199 .val = 'q'
bf0720af 200 },
7d1c0411 201 {
234db09d 202 .name = "per-program-stats",
7d1c0411
JA
203 .has_arg = no_argument,
204 .flag = NULL,
234db09d 205 .val = 's'
7d1c0411 206 },
a7263b8f
WZ
207 {
208 .name = "sort-program-stats",
209 .has_arg = required_argument,
210 .flag = NULL,
211 .val = 'S'
212 },
52724a0e 213 {
234db09d 214 .name = "track-ios",
52724a0e
JA
215 .has_arg = no_argument,
216 .flag = NULL,
234db09d 217 .val = 't'
52724a0e 218 },
d1d7f15f 219 {
234db09d 220 .name = "stopwatch",
d1d7f15f
JA
221 .has_arg = required_argument,
222 .flag = NULL,
234db09d 223 .val = 'w'
d1d7f15f 224 },
a2594911 225 {
234db09d
AB
226 .name = "verbose",
227 .has_arg = no_argument,
a2594911 228 .flag = NULL,
234db09d
AB
229 .val = 'v'
230 },
231 {
232 .name = "version",
233 .has_arg = no_argument,
234 .flag = NULL,
235 .val = 'V'
a2594911 236 },
71ef8b7c
JA
237 {
238 .name = NULL,
239 }
d5396421
JA
240};
241
7997c5b0
JA
242/*
243 * for sorting the displayed output
244 */
8fc0abbc
JA
245struct trace {
246 struct blk_io_trace *bit;
247 struct rb_node rb_node;
cb2a1a62 248 struct trace *next;
a43c1c17 249 unsigned long read_sequence;
8fc0abbc
JA
250};
251
cb2a1a62 252static struct rb_root rb_sort_root;
a649216c
JA
253static unsigned long rb_sort_entries;
254
cb2a1a62
JA
255static struct trace *trace_list;
256
d36421e4
JA
257/*
258 * allocation cache
259 */
260static struct blk_io_trace *bit_alloc_list;
261static struct trace *t_alloc_list;
262
7997c5b0
JA
263/*
264 * for tracking individual ios
265 */
e81829a5 266struct io_track_req {
2990e589 267 struct process_pid_map *ppm;
95c15013 268 unsigned long long allocation_time;
7997c5b0
JA
269 unsigned long long queue_time;
270 unsigned long long dispatch_time;
271 unsigned long long completion_time;
272};
273
e81829a5
AG
274struct io_track {
275 struct rb_node rb_node;
276 struct io_track_req *req;
277 struct io_track *next;
278 __u64 sector;
279};
280
e7c9f3ff
NS
281static int ndevices;
282static struct per_dev_info *devices;
283static char *get_dev_name(struct per_dev_info *, char *, int);
210824c3 284static int trace_rb_insert_last(struct per_dev_info *, struct trace *);
d0ca268b 285
71d5d4c9 286FILE *ofp = NULL;
e7c9f3ff 287static char *output_name;
d1d7f15f 288static char *input_dir;
e7c9f3ff
NS
289
290static unsigned long long genesis_time;
287fa3d6 291static unsigned long long last_allowed_time;
46e6968b 292static unsigned long long stopwatch_start; /* start from zero by default */
bc171579 293static unsigned long long stopwatch_end = -1ULL; /* "infinity" */
a43c1c17 294static unsigned long read_sequence;
152f6476
JA
295
296static int per_process_stats;
a7263b8f 297static int per_process_stats_event = SORT_PROG_EVENT_N;
cbc927b6 298static int per_device_and_cpu_stats = 1;
7997c5b0 299static int track_ios;
bf0720af 300static int ppi_hash_by_pid = 1;
57ea8602 301static int verbose;
98f8386b 302static unsigned int act_mask = -1U;
cbc927b6 303static int stats_printed;
19cfaf3f 304static int bin_output_msgs = 1;
86368eb5 305int data_is_native = -1;
d0ca268b 306
346d8a74 307static FILE *dump_fp;
a2594911
AB
308static char *dump_binary;
309
1d24fc14
JA
310static unsigned int t_alloc_cache;
311static unsigned int bit_alloc_cache;
312
7d747d22 313#define RB_BATCH_DEFAULT (512)
e820abd7 314static unsigned int rb_batch = RB_BATCH_DEFAULT;
79f19470 315
e7c9f3ff 316static int pipeline;
67076cbc 317static char *pipename;
e7c9f3ff 318
234db09d
AB
319static int text_output = 1;
320
412819ce
JA
321#define is_done() (*(volatile int *)(&done))
322static volatile int done;
323
7bd4fd0a
OK
324struct timespec abs_start_time;
325static unsigned long long start_timestamp;
326
c701176c
MP
327static int have_drv_data = 0;
328
bf0720af
JA
329#define JHASH_RANDOM (0x3af5f2ee)
330
824c2b39
JA
331#define CPUS_PER_LONG (8 * sizeof(unsigned long))
332#define CPU_IDX(cpu) ((cpu) / CPUS_PER_LONG)
333#define CPU_BIT(cpu) ((cpu) & (CPUS_PER_LONG - 1))
334
a61c0677
AG
335static void io_warn_unless(struct blk_io_trace *t, int condition,
336 const char *fmt, ...)
337{
338 va_list ap;
339
340 if (condition)
341 return;
342 va_start(ap, fmt);
343 printf("(%d,%d) request %llu + %u: ",
344 MAJOR(t->device), MINOR(t->device),
345 t->sector, t->bytes);
346 vfprintf(stderr, fmt, ap);
347 va_end(ap);
348}
349
a2594911
AB
350static void output_binary(void *buf, int len)
351{
352 if (dump_binary) {
346d8a74
AB
353 size_t n = fwrite(buf, len, 1, dump_fp);
354 if (n != 1) {
a2594911 355 perror(dump_binary);
346d8a74 356 fclose(dump_fp);
a2594911
AB
357 dump_binary = NULL;
358 }
359 }
360}
361
210824c3
JA
362static void resize_cpu_info(struct per_dev_info *pdi, int cpu)
363{
364 struct per_cpu_info *cpus = pdi->cpus;
365 int ncpus = pdi->ncpus;
366 int new_count = cpu + 1;
367 int new_space, size;
368 char *new_start;
369
370 size = new_count * sizeof(struct per_cpu_info);
371 cpus = realloc(cpus, size);
372 if (!cpus) {
373 char name[20];
374 fprintf(stderr, "Out of memory, CPU info for device %s (%d)\n",
375 get_dev_name(pdi, name, sizeof(name)), size);
376 exit(1);
377 }
378
379 new_start = (char *)cpus + (ncpus * sizeof(struct per_cpu_info));
380 new_space = (new_count - ncpus) * sizeof(struct per_cpu_info);
381 memset(new_start, 0, new_space);
382
383 pdi->ncpus = new_count;
384 pdi->cpus = cpus;
385
386 for (new_count = 0; new_count < pdi->ncpus; new_count++) {
387 struct per_cpu_info *pci = &pdi->cpus[new_count];
388
389 if (!pci->fd) {
390 pci->fd = -1;
391 memset(&pci->rb_last, 0, sizeof(pci->rb_last));
392 pci->rb_last_entries = 0;
393 pci->last_sequence = -1;
394 }
395 }
396}
397
398static struct per_cpu_info *get_cpu_info(struct per_dev_info *pdi, int cpu)
399{
400 struct per_cpu_info *pci;
401
402 if (cpu >= pdi->ncpus)
403 resize_cpu_info(pdi, cpu);
404
405 pci = &pdi->cpus[cpu];
406 pci->cpu = cpu;
407 return pci;
408}
409
410
411static int resize_devices(char *name)
412{
413 int size = (ndevices + 1) * sizeof(struct per_dev_info);
414
415 devices = realloc(devices, size);
416 if (!devices) {
417 fprintf(stderr, "Out of memory, device %s (%d)\n", name, size);
418 return 1;
419 }
420 memset(&devices[ndevices], 0, sizeof(struct per_dev_info));
421 devices[ndevices].name = name;
422 ndevices++;
423 return 0;
424}
425
426static struct per_dev_info *get_dev_info(dev_t dev)
427{
428 struct per_dev_info *pdi;
429 int i;
430
431 for (i = 0; i < ndevices; i++) {
432 if (!devices[i].dev)
433 devices[i].dev = dev;
434 if (devices[i].dev == dev)
435 return &devices[i];
436 }
437
438 if (resize_devices(NULL))
439 return NULL;
440
441 pdi = &devices[ndevices - 1];
442 pdi->dev = dev;
443 pdi->first_reported_time = 0;
444 pdi->last_read_time = 0;
210824c3
JA
445
446 return pdi;
447}
448
66930177 449static void insert_skip(struct per_cpu_info *pci, unsigned long start,
492da111
AB
450 unsigned long end)
451{
452 struct skip_info *sip;
453
66930177 454 for (sip = pci->skips_tail; sip != NULL; sip = sip->prev) {
492da111
AB
455 if (end == (sip->start - 1)) {
456 sip->start = start;
457 return;
458 } else if (start == (sip->end + 1)) {
459 sip->end = end;
460 return;
461 }
462 }
463
464 sip = malloc(sizeof(struct skip_info));
465 sip->start = start;
466 sip->end = end;
467 sip->prev = sip->next = NULL;
66930177
JA
468 if (pci->skips_tail == NULL)
469 pci->skips_head = pci->skips_tail = sip;
492da111 470 else {
66930177
JA
471 sip->prev = pci->skips_tail;
472 pci->skips_tail->next = sip;
473 pci->skips_tail = sip;
492da111
AB
474 }
475}
476
66930177 477static void remove_sip(struct per_cpu_info *pci, struct skip_info *sip)
492da111
AB
478{
479 if (sip->prev == NULL) {
480 if (sip->next == NULL)
66930177 481 pci->skips_head = pci->skips_tail = NULL;
492da111 482 else {
66930177 483 pci->skips_head = sip->next;
492da111
AB
484 sip->next->prev = NULL;
485 }
486 } else if (sip->next == NULL) {
66930177 487 pci->skips_tail = sip->prev;
492da111
AB
488 sip->prev->next = NULL;
489 } else {
490 sip->prev->next = sip->next;
491 sip->next->prev = sip->prev;
492 }
493
494 sip->prev = sip->next = NULL;
495 free(sip);
496}
497
498#define IN_SKIP(sip,seq) (((sip)->start <= (seq)) && ((seq) <= sip->end))
66930177 499static int check_current_skips(struct per_cpu_info *pci, unsigned long seq)
492da111
AB
500{
501 struct skip_info *sip;
502
66930177
JA
503 for (sip = pci->skips_tail; sip != NULL; sip = sip->prev) {
504 if (IN_SKIP(sip, seq)) {
492da111
AB
505 if (sip->start == seq) {
506 if (sip->end == seq)
66930177 507 remove_sip(pci, sip);
492da111
AB
508 else
509 sip->start += 1;
510 } else if (sip->end == seq)
511 sip->end -= 1;
512 else {
513 sip->end = seq - 1;
66930177 514 insert_skip(pci, seq + 1, sip->end);
492da111
AB
515 }
516 return 1;
517 }
518 }
66930177 519
492da111
AB
520 return 0;
521}
522
523static void collect_pdi_skips(struct per_dev_info *pdi)
524{
525 struct skip_info *sip;
66930177 526 int cpu;
492da111
AB
527
528 pdi->skips = 0;
529 pdi->seq_skips = 0;
66930177
JA
530
531 for (cpu = 0; cpu < pdi->ncpus; cpu++) {
532 struct per_cpu_info *pci = &pdi->cpus[cpu];
533
534 for (sip = pci->skips_head; sip != NULL; sip = sip->next) {
535 pdi->skips++;
536 pdi->seq_skips += (sip->end - sip->start + 1);
537 if (verbose)
538 fprintf(stderr,"(%d,%d): skipping %lu -> %lu\n",
539 MAJOR(pdi->dev), MINOR(pdi->dev),
540 sip->start, sip->end);
541 }
492da111
AB
542 }
543}
544
824c2b39
JA
545static void cpu_mark_online(struct per_dev_info *pdi, unsigned int cpu)
546{
547 if (cpu >= pdi->cpu_map_max || !pdi->cpu_map) {
548 int new_max = (cpu + CPUS_PER_LONG) & ~(CPUS_PER_LONG - 1);
549 unsigned long *map = malloc(new_max / sizeof(long));
550
551 memset(map, 0, new_max / sizeof(long));
552
553 if (pdi->cpu_map) {
554 memcpy(map, pdi->cpu_map, pdi->cpu_map_max / sizeof(long));
555 free(pdi->cpu_map);
556 }
557
558 pdi->cpu_map = map;
559 pdi->cpu_map_max = new_max;
560 }
561
562 pdi->cpu_map[CPU_IDX(cpu)] |= (1UL << CPU_BIT(cpu));
563}
564
565static inline void cpu_mark_offline(struct per_dev_info *pdi, int cpu)
566{
567 pdi->cpu_map[CPU_IDX(cpu)] &= ~(1UL << CPU_BIT(cpu));
568}
569
570static inline int cpu_is_online(struct per_dev_info *pdi, int cpu)
571{
572 return (pdi->cpu_map[CPU_IDX(cpu)] & (1UL << CPU_BIT(cpu))) != 0;
573}
574
bfc70ad5
JA
575static inline int ppm_hash_pid(pid_t pid)
576{
577 return jhash_1word(pid, JHASH_RANDOM) & PPM_HASH_MASK;
578}
579
580static struct process_pid_map *find_ppm(pid_t pid)
581{
582 const int hash_idx = ppm_hash_pid(pid);
583 struct process_pid_map *ppm;
584
585 ppm = ppm_hash_table[hash_idx];
586 while (ppm) {
587 if (ppm->pid == pid)
588 return ppm;
589
590 ppm = ppm->hash_next;
591 }
592
593 return NULL;
594}
595
ebe2d1aa 596static struct process_pid_map *add_ppm_hash(pid_t pid, const char *name)
bfc70ad5
JA
597{
598 const int hash_idx = ppm_hash_pid(pid);
599 struct process_pid_map *ppm;
600
601 ppm = find_ppm(pid);
248eac8f
JA
602 if (!ppm) {
603 ppm = malloc(sizeof(*ppm));
604 memset(ppm, 0, sizeof(*ppm));
605 ppm->pid = pid;
d324757e
ES
606 memset(ppm->comm, 0, sizeof(ppm->comm));
607 strncpy(ppm->comm, name, sizeof(ppm->comm));
608 ppm->comm[sizeof(ppm->comm) - 1] = '\0';
248eac8f
JA
609 ppm->hash_next = ppm_hash_table[hash_idx];
610 ppm_hash_table[hash_idx] = ppm;
bfc70ad5 611 }
ebe2d1aa
JA
612
613 return ppm;
bfc70ad5
JA
614}
615
7bd4fd0a
OK
616static void handle_notify(struct blk_io_trace *bit)
617{
618 void *payload = (caddr_t) bit + sizeof(*bit);
619 __u32 two32[2];
620
7238673f 621 switch (bit->action & ~__BLK_TN_CGROUP) {
7bd4fd0a
OK
622 case BLK_TN_PROCESS:
623 add_ppm_hash(bit->pid, payload);
624 break;
625
626 case BLK_TN_TIMESTAMP:
627 if (bit->pdu_len != sizeof(two32))
628 return;
629 memcpy(two32, payload, sizeof(two32));
630 if (!data_is_native) {
631 two32[0] = be32_to_cpu(two32[0]);
632 two32[1] = be32_to_cpu(two32[1]);
633 }
634 start_timestamp = bit->time;
635 abs_start_time.tv_sec = two32[0];
636 abs_start_time.tv_nsec = two32[1];
637 if (abs_start_time.tv_nsec < 0) {
638 abs_start_time.tv_sec--;
639 abs_start_time.tv_nsec += 1000000000;
640 }
641
642 break;
643
1a15f6a8
AB
644 case BLK_TN_MESSAGE:
645 if (bit->pdu_len > 0) {
646 char msg[bit->pdu_len+1];
7238673f
JK
647 int len = bit->pdu_len;
648 char cgidstr[24];
1a15f6a8 649
7238673f
JK
650 cgidstr[0] = 0;
651 if (bit->action & __BLK_TN_CGROUP) {
652 struct blk_io_cgroup_payload *cgid = payload;
653
654 sprintf(cgidstr, "%x,%x ", cgid->ino,
655 cgid->gen);
656 payload += sizeof(struct blk_io_cgroup_payload);
657 len -= sizeof(struct blk_io_cgroup_payload);
658 }
659 memcpy(msg, (char *)payload, len);
660 msg[len] = '\0';
1a15f6a8
AB
661
662 fprintf(ofp,
7238673f 663 "%3d,%-3d %2d %8s %5d.%09lu %5u %s%2s %3s %s\n",
1a15f6a8 664 MAJOR(bit->device), MINOR(bit->device),
7238673f
JK
665 bit->cpu, "0", (int)SECONDS(bit->time),
666 (unsigned long)NANO_SECONDS(bit->time),
a021a33b 667 bit->pid, cgidstr, "m", "N", msg);
1a15f6a8
AB
668 }
669 break;
670
7bd4fd0a
OK
671 default:
672 /* Ignore unknown notify events */
673 ;
674 }
675}
676
bfc70ad5
JA
677char *find_process_name(pid_t pid)
678{
679 struct process_pid_map *ppm = find_ppm(pid);
680
681 if (ppm)
682 return ppm->comm;
683
684 return NULL;
685}
686
9e4cd1b8 687static inline int ppi_hash_pid(pid_t pid)
bf0720af
JA
688{
689 return jhash_1word(pid, JHASH_RANDOM) & PPI_HASH_MASK;
690}
691
692static inline int ppi_hash_name(const char *name)
152f6476 693{
bf0720af
JA
694 return jhash(name, 16, JHASH_RANDOM) & PPI_HASH_MASK;
695}
696
697static inline int ppi_hash(struct per_process_info *ppi)
698{
2990e589
JA
699 struct process_pid_map *ppm = ppi->ppm;
700
bf0720af 701 if (ppi_hash_by_pid)
2990e589 702 return ppi_hash_pid(ppm->pid);
bf0720af 703
2990e589 704 return ppi_hash_name(ppm->comm);
152f6476
JA
705}
706
bfc70ad5 707static inline void add_ppi_to_hash(struct per_process_info *ppi)
152f6476 708{
bf0720af 709 const int hash_idx = ppi_hash(ppi);
152f6476 710
bf0720af
JA
711 ppi->hash_next = ppi_hash_table[hash_idx];
712 ppi_hash_table[hash_idx] = ppi;
152f6476
JA
713}
714
bfc70ad5 715static inline void add_ppi_to_list(struct per_process_info *ppi)
152f6476
JA
716{
717 ppi->list_next = ppi_list;
718 ppi_list = ppi;
886ecf0e 719 ppi_list_entries++;
152f6476
JA
720}
721
bfc70ad5 722static struct per_process_info *find_ppi_by_name(char *name)
bf0720af
JA
723{
724 const int hash_idx = ppi_hash_name(name);
725 struct per_process_info *ppi;
726
727 ppi = ppi_hash_table[hash_idx];
728 while (ppi) {
2990e589
JA
729 struct process_pid_map *ppm = ppi->ppm;
730
731 if (!strcmp(ppm->comm, name))
bf0720af
JA
732 return ppi;
733
734 ppi = ppi->hash_next;
735 }
736
737 return NULL;
738}
739
9e4cd1b8 740static struct per_process_info *find_ppi_by_pid(pid_t pid)
152f6476 741{
bf0720af 742 const int hash_idx = ppi_hash_pid(pid);
152f6476
JA
743 struct per_process_info *ppi;
744
bf0720af 745 ppi = ppi_hash_table[hash_idx];
152f6476 746 while (ppi) {
2990e589
JA
747 struct process_pid_map *ppm = ppi->ppm;
748
749 if (ppm->pid == pid)
152f6476
JA
750 return ppi;
751
752 ppi = ppi->hash_next;
753 }
754
755 return NULL;
756}
757
9e4cd1b8 758static struct per_process_info *find_ppi(pid_t pid)
bf0720af 759{
715d8021 760 struct per_process_info *ppi;
bfc70ad5 761 char *name;
715d8021 762
bf0720af 763 if (ppi_hash_by_pid)
bfc70ad5
JA
764 return find_ppi_by_pid(pid);
765
766 name = find_process_name(pid);
767 if (!name)
768 return NULL;
bf0720af 769
bfc70ad5 770 ppi = find_ppi_by_name(name);
2990e589 771 if (ppi && ppi->ppm->pid != pid)
715d8021
JA
772 ppi->more_than_one = 1;
773
774 return ppi;
bf0720af
JA
775}
776
210824c3
JA
777/*
778 * struct trace and blktrace allocation cache, we do potentially
779 * millions of mallocs for these structures while only using at most
780 * a few thousand at the time
781 */
782static inline void t_free(struct trace *t)
783{
784 if (t_alloc_cache < 1024) {
785 t->next = t_alloc_list;
786 t_alloc_list = t;
787 t_alloc_cache++;
788 } else
789 free(t);
790}
791
792static inline struct trace *t_alloc(void)
793{
794 struct trace *t = t_alloc_list;
795
796 if (t) {
797 t_alloc_list = t->next;
798 t_alloc_cache--;
799 return t;
800 }
801
802 return malloc(sizeof(*t));
803}
804
805static inline void bit_free(struct blk_io_trace *bit)
806{
807 if (bit_alloc_cache < 1024 && !bit->pdu_len) {
808 /*
809 * abuse a 64-bit field for a next pointer for the free item
810 */
811 bit->time = (__u64) (unsigned long) bit_alloc_list;
812 bit_alloc_list = (struct blk_io_trace *) bit;
813 bit_alloc_cache++;
814 } else
815 free(bit);
816}
817
818static inline struct blk_io_trace *bit_alloc(void)
819{
820 struct blk_io_trace *bit = bit_alloc_list;
821
822 if (bit) {
823 bit_alloc_list = (struct blk_io_trace *) (unsigned long) \
824 bit->time;
825 bit_alloc_cache--;
826 return bit;
827 }
828
829 return malloc(sizeof(*bit));
830}
831
832static inline void __put_trace_last(struct per_dev_info *pdi, struct trace *t)
833{
834 struct per_cpu_info *pci = get_cpu_info(pdi, t->bit->cpu);
835
836 rb_erase(&t->rb_node, &pci->rb_last);
837 pci->rb_last_entries--;
838
839 bit_free(t->bit);
840 t_free(t);
841}
842
843static void put_trace(struct per_dev_info *pdi, struct trace *t)
844{
845 rb_erase(&t->rb_node, &rb_sort_root);
846 rb_sort_entries--;
847
848 trace_rb_insert_last(pdi, t);
849}
850
89482da6 851static inline int trace_rb_insert(struct trace *t, struct rb_root *root)
7997c5b0 852{
2a1b3424 853 struct rb_node **p = &root->rb_node;
7997c5b0
JA
854 struct rb_node *parent = NULL;
855 struct trace *__t;
856
857 while (*p) {
858 parent = *p;
2a1b3424 859
7997c5b0
JA
860 __t = rb_entry(parent, struct trace, rb_node);
861
89482da6
JA
862 if (t->bit->time < __t->bit->time)
863 p = &(*p)->rb_left;
864 else if (t->bit->time > __t->bit->time)
865 p = &(*p)->rb_right;
866 else if (t->bit->device < __t->bit->device)
e7c9f3ff
NS
867 p = &(*p)->rb_left;
868 else if (t->bit->device > __t->bit->device)
869 p = &(*p)->rb_right;
dcf0f7ed
JA
870 else if (t->bit->sequence < __t->bit->sequence)
871 p = &(*p)->rb_left;
0b07f23e 872 else /* >= sequence */
dcf0f7ed 873 p = &(*p)->rb_right;
7997c5b0
JA
874 }
875
876 rb_link_node(&t->rb_node, parent, p);
2a1b3424 877 rb_insert_color(&t->rb_node, root);
7997c5b0
JA
878 return 0;
879}
880
2a1b3424 881static inline int trace_rb_insert_sort(struct trace *t)
e3556946 882{
89482da6 883 if (!trace_rb_insert(t, &rb_sort_root)) {
2a1b3424
JA
884 rb_sort_entries++;
885 return 0;
886 }
887
888 return 1;
889}
890
210824c3 891static int trace_rb_insert_last(struct per_dev_info *pdi, struct trace *t)
2a1b3424 892{
210824c3
JA
893 struct per_cpu_info *pci = get_cpu_info(pdi, t->bit->cpu);
894
895 if (trace_rb_insert(t, &pci->rb_last))
896 return 1;
897
898 pci->rb_last_entries++;
899
900 if (pci->rb_last_entries > rb_batch * pdi->nfiles) {
901 struct rb_node *n = rb_first(&pci->rb_last);
902
903 t = rb_entry(n, struct trace, rb_node);
904 __put_trace_last(pdi, t);
2a1b3424
JA
905 }
906
210824c3 907 return 0;
2a1b3424
JA
908}
909
910static struct trace *trace_rb_find(dev_t device, unsigned long sequence,
911 struct rb_root *root, int order)
912{
913 struct rb_node *n = root->rb_node;
914 struct rb_node *prev = NULL;
e3556946
JA
915 struct trace *__t;
916
2a1b3424
JA
917 while (n) {
918 __t = rb_entry(n, struct trace, rb_node);
919 prev = n;
e3556946 920
0583b6a2 921 if (device < __t->bit->device)
2a1b3424 922 n = n->rb_left;
0583b6a2 923 else if (device > __t->bit->device)
2a1b3424 924 n = n->rb_right;
0583b6a2 925 else if (sequence < __t->bit->sequence)
2a1b3424 926 n = n->rb_left;
e3556946 927 else if (sequence > __t->bit->sequence)
2a1b3424 928 n = n->rb_right;
e3556946
JA
929 else
930 return __t;
931 }
932
2a1b3424
JA
933 /*
934 * hack - the list may not be sequence ordered because some
935 * events don't have sequence and time matched. so we end up
936 * being a little off in the rb lookup here, because we don't
937 * know the time we are looking for. compensate by browsing
938 * a little ahead from the last entry to find the match
939 */
940 if (order && prev) {
941 int max = 5;
942
943 while (((n = rb_next(prev)) != NULL) && max--) {
944 __t = rb_entry(n, struct trace, rb_node);
492da111 945
2a1b3424
JA
946 if (__t->bit->device == device &&
947 __t->bit->sequence == sequence)
948 return __t;
949
950 prev = n;
951 }
952 }
492da111 953
e3556946
JA
954 return NULL;
955}
956
2a1b3424 957static inline struct trace *trace_rb_find_last(struct per_dev_info *pdi,
210824c3 958 struct per_cpu_info *pci,
2a1b3424
JA
959 unsigned long seq)
960{
210824c3 961 return trace_rb_find(pdi->dev, seq, &pci->rb_last, 0);
2a1b3424
JA
962}
963
f7bd1a9b 964static inline int track_rb_insert(struct per_dev_info *pdi,struct io_track *iot)
7997c5b0 965{
f7bd1a9b 966 struct rb_node **p = &pdi->rb_track.rb_node;
7997c5b0
JA
967 struct rb_node *parent = NULL;
968 struct io_track *__iot;
969
970 while (*p) {
971 parent = *p;
7997c5b0
JA
972 __iot = rb_entry(parent, struct io_track, rb_node);
973
f7bd1a9b 974 if (iot->sector < __iot->sector)
7997c5b0
JA
975 p = &(*p)->rb_left;
976 else if (iot->sector > __iot->sector)
977 p = &(*p)->rb_right;
978 else {
e7c9f3ff 979 fprintf(stderr,
ab197ca7
AB
980 "sector alias (%Lu) on device %d,%d!\n",
981 (unsigned long long) iot->sector,
f7bd1a9b 982 MAJOR(pdi->dev), MINOR(pdi->dev));
7997c5b0
JA
983 return 1;
984 }
985 }
986
987 rb_link_node(&iot->rb_node, parent, p);
f7bd1a9b 988 rb_insert_color(&iot->rb_node, &pdi->rb_track);
7997c5b0
JA
989 return 0;
990}
991
f7bd1a9b 992static struct io_track *__find_track(struct per_dev_info *pdi, __u64 sector)
7997c5b0 993{
f7bd1a9b 994 struct rb_node *n = pdi->rb_track.rb_node;
7997c5b0
JA
995 struct io_track *__iot;
996
2a1b3424
JA
997 while (n) {
998 __iot = rb_entry(n, struct io_track, rb_node);
7997c5b0 999
f7bd1a9b 1000 if (sector < __iot->sector)
2a1b3424 1001 n = n->rb_left;
7997c5b0 1002 else if (sector > __iot->sector)
2a1b3424 1003 n = n->rb_right;
7997c5b0
JA
1004 else
1005 return __iot;
1006 }
1007
1008 return NULL;
1009}
1010
e81829a5
AG
1011static inline struct io_track *first_iot(struct io_track_req *req)
1012{
1013 return (struct io_track *)(req + 1);
1014}
1015
9e4cd1b8 1016static struct io_track *find_track(struct per_dev_info *pdi, pid_t pid,
bfc70ad5 1017 __u64 sector)
7997c5b0 1018{
916b5501 1019 struct io_track *iot;
7997c5b0 1020
f7bd1a9b 1021 iot = __find_track(pdi, sector);
7997c5b0 1022 if (!iot) {
e81829a5
AG
1023 struct io_track_req *req;
1024
1025 req = malloc(sizeof(*req) + sizeof(*iot));
1026 req->ppm = find_ppm(pid);
1027 if (!req->ppm)
1028 req->ppm = add_ppm_hash(pid, "unknown");
1029 req->allocation_time = -1ULL;
1030 req->queue_time = -1ULL;
1031 req->dispatch_time = -1ULL;
1032 req->completion_time = -1ULL;
1033 iot = first_iot(req);
1034 iot->req = req;
1035 iot->next = NULL;
7997c5b0 1036 iot->sector = sector;
f7bd1a9b 1037 track_rb_insert(pdi, iot);
7997c5b0
JA
1038 }
1039
1040 return iot;
1041}
1042
f7bd1a9b
JA
1043static void log_track_frontmerge(struct per_dev_info *pdi,
1044 struct blk_io_trace *t)
2e3e8ded
JA
1045{
1046 struct io_track *iot;
1047
1048 if (!track_ios)
1049 return;
2e3e8ded 1050
ae957cbc 1051 iot = __find_track(pdi, t->sector + t_sec(t));
cb2a1a62 1052 if (!iot) {
57ea8602
JA
1053 if (verbose)
1054 fprintf(stderr, "merge not found for (%d,%d): %llu\n",
3c667f3c 1055 MAJOR(t->device), MINOR(t->device),
57ea8602 1056 (unsigned long long) t->sector + t_sec(t));
cb2a1a62 1057 return;
2e3e8ded 1058 }
cb2a1a62 1059
f7bd1a9b 1060 rb_erase(&iot->rb_node, &pdi->rb_track);
ae957cbc 1061 iot->sector -= t_sec(t);
f7bd1a9b 1062 track_rb_insert(pdi, iot);
2e3e8ded
JA
1063}
1064
f7bd1a9b 1065static void log_track_getrq(struct per_dev_info *pdi, struct blk_io_trace *t)
2e3e8ded
JA
1066{
1067 struct io_track *iot;
e81829a5 1068 struct io_track_req *req;
2e3e8ded
JA
1069
1070 if (!track_ios)
1071 return;
1072
bfc70ad5 1073 iot = find_track(pdi, t->pid, t->sector);
e81829a5
AG
1074 req = iot->req;
1075 io_warn_unless(t, req->allocation_time == -1ULL,
a61c0677 1076 "confused about %s time", "allocation");
e81829a5 1077 req->allocation_time = t->time;
95c15013
JA
1078}
1079
753f9091
JA
1080/*
1081 * for md/dm setups, the interesting cycle is Q -> C. So track queueing
1082 * time here, as dispatch time
1083 */
1084static void log_track_queue(struct per_dev_info *pdi, struct blk_io_trace *t)
1085{
1086 struct io_track *iot;
e81829a5 1087 struct io_track_req *req;
753f9091
JA
1088
1089 if (!track_ios)
1090 return;
753f9091 1091
bfc70ad5 1092 iot = find_track(pdi, t->pid, t->sector);
e81829a5
AG
1093 req = iot->req;
1094 io_warn_unless(t, req->dispatch_time == -1ULL,
a61c0677 1095 "confused about %s time", "dispatch");
e81829a5
AG
1096 req->dispatch_time = t->time;
1097}
1098
1099static void log_track_split(struct per_dev_info *pdi, struct blk_io_trace *t)
1100{
1101 struct io_track *iot, *split;
1102
1103 /*
1104 * With a split request, the completion event will refer to the last
1105 * part of the original request, but other events might refer to other
1106 * parts.
1107 */
1108 iot = find_track(pdi, t->pid, t->sector);
1109 split = malloc(sizeof(*iot));
1110 split->req = iot->req;
1111 split->next = iot->next;
1112 iot->next = split;
1113 split->sector = iot->sector + t_sec(t);
1114 track_rb_insert(pdi, split);
753f9091
JA
1115}
1116
95c15013 1117/*
b6076a9b 1118 * return time between rq allocation and insertion
95c15013 1119 */
f7bd1a9b
JA
1120static unsigned long long log_track_insert(struct per_dev_info *pdi,
1121 struct blk_io_trace *t)
95c15013 1122{
50adc0ba 1123 unsigned long long elapsed;
95c15013 1124 struct io_track *iot;
e81829a5 1125 struct io_track_req *req;
95c15013
JA
1126
1127 if (!track_ios)
1128 return -1;
1129
bfc70ad5 1130 iot = find_track(pdi, t->pid, t->sector);
e81829a5
AG
1131 req = iot->req;
1132 io_warn_unless(t, req->queue_time == -1ULL,
a61c0677 1133 "confused about %s time", "queue");
e81829a5 1134 req->queue_time = t->time;
acd70d21 1135
e81829a5 1136 if (req->allocation_time == -1ULL)
acd70d21
JA
1137 return -1;
1138
e81829a5 1139 elapsed = req->queue_time - req->allocation_time;
50adc0ba
JA
1140
1141 if (per_process_stats) {
e81829a5 1142 struct per_process_info *ppi = find_ppi(req->ppm->pid);
b9d40d6f 1143 int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
50adc0ba 1144
b9d40d6f
JA
1145 if (ppi && elapsed > ppi->longest_allocation_wait[w])
1146 ppi->longest_allocation_wait[w] = elapsed;
50adc0ba
JA
1147 }
1148
1149 return elapsed;
2e3e8ded
JA
1150}
1151
1152/*
1153 * return time between queue and issue
1154 */
f7bd1a9b
JA
1155static unsigned long long log_track_issue(struct per_dev_info *pdi,
1156 struct blk_io_trace *t)
2e3e8ded 1157{
a61c0677 1158 unsigned long long elapsed = -1ULL;
2e3e8ded 1159 struct io_track *iot;
e81829a5 1160 struct io_track_req *req;
2e3e8ded
JA
1161
1162 if (!track_ios)
1163 return -1;
1164 if ((t->action & BLK_TC_ACT(BLK_TC_FS)) == 0)
1165 return -1;
1166
f7bd1a9b 1167 iot = __find_track(pdi, t->sector);
cb2a1a62 1168 if (!iot) {
57ea8602
JA
1169 if (verbose)
1170 fprintf(stderr, "issue not found for (%d,%d): %llu\n",
3c667f3c 1171 MAJOR(t->device), MINOR(t->device),
57ea8602 1172 (unsigned long long) t->sector);
2e3e8ded 1173 return -1;
cb2a1a62 1174 }
2e3e8ded 1175
e81829a5
AG
1176 req = iot->req;
1177 io_warn_unless(t, req->dispatch_time == -1ULL,
a61c0677 1178 "confused about %s time", "dispatch");
e81829a5
AG
1179 req->dispatch_time = t->time;
1180 if (req->queue_time != -1ULL)
1181 elapsed = req->dispatch_time - req->queue_time;
50adc0ba 1182
a61c0677 1183 if (elapsed != -1ULL && per_process_stats) {
e81829a5 1184 struct per_process_info *ppi = find_ppi(req->ppm->pid);
b9d40d6f 1185 int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
50adc0ba 1186
b9d40d6f
JA
1187 if (ppi && elapsed > ppi->longest_dispatch_wait[w])
1188 ppi->longest_dispatch_wait[w] = elapsed;
50adc0ba
JA
1189 }
1190
1191 return elapsed;
2e3e8ded
JA
1192}
1193
e81829a5
AG
1194static void fixup_complete(struct per_dev_info *pdi, struct blk_io_trace *t)
1195{
1196 struct io_track *iot;
1197 __u64 start_sector;
1198
1199 iot = __find_track(pdi, t->sector);
1200 if (!iot)
1201 return;
1202
1203 /*
1204 * When a split io completes, the sector and length of the completion
1205 * refer to the last part of the original request. Fix the sector and
1206 * length of the complete event to match the original request.
1207 */
1208 start_sector = first_iot(iot->req)->sector;
1209 t->bytes += (t->sector - start_sector) << 9;
1210 t->sector = start_sector;
1211}
1212
2e3e8ded
JA
1213/*
1214 * return time between dispatch and complete
1215 */
f7bd1a9b
JA
1216static unsigned long long log_track_complete(struct per_dev_info *pdi,
1217 struct blk_io_trace *t)
2e3e8ded 1218{
a61c0677 1219 unsigned long long elapsed = -1ULL;
e81829a5
AG
1220 struct io_track *iot, *next;
1221 struct io_track_req *req;
2e3e8ded
JA
1222
1223 if (!track_ios)
1224 return -1;
2e3e8ded 1225
f7bd1a9b 1226 iot = __find_track(pdi, t->sector);
cb2a1a62 1227 if (!iot) {
57ea8602
JA
1228 if (verbose)
1229 fprintf(stderr,"complete not found for (%d,%d): %llu\n",
3c667f3c 1230 MAJOR(t->device), MINOR(t->device),
57ea8602 1231 (unsigned long long) t->sector);
2e3e8ded 1232 return -1;
cb2a1a62 1233 }
2e3e8ded 1234
e81829a5
AG
1235 req = iot->req;
1236 io_warn_unless(t, req->completion_time == -1ULL,
a61c0677 1237 "confused about %s time", "completion");
e81829a5
AG
1238 req->completion_time = t->time;
1239 if (req->dispatch_time != -1ULL)
1240 elapsed = req->completion_time - req->dispatch_time;
2e3e8ded 1241
a61c0677 1242 if (elapsed != -1ULL && per_process_stats) {
e81829a5 1243 struct per_process_info *ppi = find_ppi(req->ppm->pid);
b9d40d6f 1244 int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
50adc0ba 1245
b9d40d6f
JA
1246 if (ppi && elapsed > ppi->longest_completion_wait[w])
1247 ppi->longest_completion_wait[w] = elapsed;
50adc0ba
JA
1248 }
1249
2e3e8ded
JA
1250 /*
1251 * kill the trace, we don't need it after completion
1252 */
e81829a5
AG
1253 for (iot = first_iot(req); iot; iot = next) {
1254 next = iot->next;
1255 rb_erase(&iot->rb_node, &pdi->rb_track);
1256 if (iot != first_iot(req))
1257 free(iot);
1258 }
1259 free(req);
2e3e8ded
JA
1260
1261 return elapsed;
1262}
1263
1264
9e4cd1b8 1265static struct io_stats *find_process_io_stats(pid_t pid)
152f6476 1266{
bfc70ad5 1267 struct per_process_info *ppi = find_ppi(pid);
152f6476
JA
1268
1269 if (!ppi) {
1270 ppi = malloc(sizeof(*ppi));
1271 memset(ppi, 0, sizeof(*ppi));
2990e589 1272 ppi->ppm = find_ppm(pid);
ebe2d1aa
JA
1273 if (!ppi->ppm)
1274 ppi->ppm = add_ppm_hash(pid, "unknown");
bfc70ad5
JA
1275 add_ppi_to_hash(ppi);
1276 add_ppi_to_list(ppi);
152f6476
JA
1277 }
1278
1279 return &ppi->io_stats;
1280}
1281
e7c9f3ff
NS
1282static char *get_dev_name(struct per_dev_info *pdi, char *buffer, int size)
1283{
1284 if (pdi->name)
1285 snprintf(buffer, size, "%s", pdi->name);
1286 else
f7bd1a9b 1287 snprintf(buffer, size, "%d,%d",MAJOR(pdi->dev),MINOR(pdi->dev));
e7c9f3ff
NS
1288 return buffer;
1289}
1290
e7c9f3ff 1291static void check_time(struct per_dev_info *pdi, struct blk_io_trace *bit)
cfab07eb
AB
1292{
1293 unsigned long long this = bit->time;
e7c9f3ff 1294 unsigned long long last = pdi->last_reported_time;
cfab07eb 1295
e7c9f3ff
NS
1296 pdi->backwards = (this < last) ? 'B' : ' ';
1297 pdi->last_reported_time = this;
cfab07eb
AB
1298}
1299
fb2ec796
JA
1300static inline void __account_m(struct io_stats *ios, struct blk_io_trace *t,
1301 int rw)
d0ca268b 1302{
fb2ec796 1303 if (rw) {
152f6476 1304 ios->mwrites++;
fb2ec796 1305 ios->mwrite_kb += t_kb(t);
cd0ae0f6 1306 ios->mwrite_b += t_b(t);
fb2ec796 1307 } else {
152f6476 1308 ios->mreads++;
fb2ec796 1309 ios->mread_kb += t_kb(t);
cd0ae0f6 1310 ios->mread_b += t_b(t);
fb2ec796 1311 }
152f6476
JA
1312}
1313
1314static inline void account_m(struct blk_io_trace *t, struct per_cpu_info *pci,
1315 int rw)
1316{
fb2ec796 1317 __account_m(&pci->io_stats, t, rw);
152f6476
JA
1318
1319 if (per_process_stats) {
bfc70ad5 1320 struct io_stats *ios = find_process_io_stats(t->pid);
152f6476 1321
fb2ec796 1322 __account_m(ios, t, rw);
d0ca268b
JA
1323 }
1324}
1325
801646d6
CS
1326static inline void __account_pc_queue(struct io_stats *ios,
1327 struct blk_io_trace *t, int rw)
1328{
1329 if (rw) {
1330 ios->qwrites_pc++;
1331 ios->qwrite_kb_pc += t_kb(t);
cd0ae0f6 1332 ios->qwrite_b_pc += t_b(t);
801646d6
CS
1333 } else {
1334 ios->qreads_pc++;
1335 ios->qread_kb += t_kb(t);
cd0ae0f6 1336 ios->qread_b_pc += t_b(t);
801646d6
CS
1337 }
1338}
1339
1340static inline void account_pc_queue(struct blk_io_trace *t,
1341 struct per_cpu_info *pci, int rw)
1342{
1343 __account_pc_queue(&pci->io_stats, t, rw);
1344
1345 if (per_process_stats) {
1346 struct io_stats *ios = find_process_io_stats(t->pid);
1347
1348 __account_pc_queue(ios, t, rw);
1349 }
1350}
1351
1352static inline void __account_pc_issue(struct io_stats *ios, int rw,
1353 unsigned int bytes)
1354{
1355 if (rw) {
1356 ios->iwrites_pc++;
1357 ios->iwrite_kb_pc += bytes >> 10;
cd0ae0f6 1358 ios->iwrite_b_pc += bytes & 1023;
801646d6
CS
1359 } else {
1360 ios->ireads_pc++;
1361 ios->iread_kb_pc += bytes >> 10;
cd0ae0f6 1362 ios->iread_b_pc += bytes & 1023;
801646d6
CS
1363 }
1364}
1365
1366static inline void account_pc_issue(struct blk_io_trace *t,
1367 struct per_cpu_info *pci, int rw)
1368{
1369 __account_pc_issue(&pci->io_stats, rw, t->bytes);
1370
1371 if (per_process_stats) {
1372 struct io_stats *ios = find_process_io_stats(t->pid);
1373
1374 __account_pc_issue(ios, rw, t->bytes);
1375 }
1376}
1377
1378static inline void __account_pc_requeue(struct io_stats *ios,
1379 struct blk_io_trace *t, int rw)
1380{
1381 if (rw) {
1382 ios->wrqueue_pc++;
1383 ios->iwrite_kb_pc -= t_kb(t);
cd0ae0f6 1384 ios->iwrite_b_pc -= t_b(t);
801646d6
CS
1385 } else {
1386 ios->rrqueue_pc++;
1387 ios->iread_kb_pc -= t_kb(t);
cd0ae0f6 1388 ios->iread_b_pc -= t_b(t);
801646d6
CS
1389 }
1390}
1391
1392static inline void account_pc_requeue(struct blk_io_trace *t,
1393 struct per_cpu_info *pci, int rw)
1394{
1395 __account_pc_requeue(&pci->io_stats, t, rw);
1396
1397 if (per_process_stats) {
1398 struct io_stats *ios = find_process_io_stats(t->pid);
1399
1400 __account_pc_requeue(ios, t, rw);
1401 }
1402}
1403
1404static inline void __account_pc_c(struct io_stats *ios, int rw)
1405{
1406 if (rw)
1407 ios->cwrites_pc++;
1408 else
1409 ios->creads_pc++;
1410}
1411
1412static inline void account_pc_c(struct blk_io_trace *t,
1413 struct per_cpu_info *pci, int rw)
1414{
1415 __account_pc_c(&pci->io_stats, rw);
1416
1417 if (per_process_stats) {
1418 struct io_stats *ios = find_process_io_stats(t->pid);
1419
1420 __account_pc_c(ios, rw);
1421 }
1422}
1423
b6076a9b
JA
1424static inline void __account_queue(struct io_stats *ios, struct blk_io_trace *t,
1425 int rw)
d0ca268b
JA
1426{
1427 if (rw) {
152f6476 1428 ios->qwrites++;
ae957cbc 1429 ios->qwrite_kb += t_kb(t);
cd0ae0f6 1430 ios->qwrite_b += t_b(t);
d0ca268b 1431 } else {
152f6476 1432 ios->qreads++;
ae957cbc 1433 ios->qread_kb += t_kb(t);
cd0ae0f6 1434 ios->qread_b += t_b(t);
152f6476
JA
1435 }
1436}
1437
b6076a9b
JA
1438static inline void account_queue(struct blk_io_trace *t,
1439 struct per_cpu_info *pci, int rw)
152f6476 1440{
b6076a9b 1441 __account_queue(&pci->io_stats, t, rw);
152f6476
JA
1442
1443 if (per_process_stats) {
bfc70ad5 1444 struct io_stats *ios = find_process_io_stats(t->pid);
152f6476 1445
b6076a9b 1446 __account_queue(ios, t, rw);
d0ca268b
JA
1447 }
1448}
1449
e21dc4dd 1450static inline void __account_c(struct io_stats *ios, int rw, int bytes)
d0ca268b
JA
1451{
1452 if (rw) {
152f6476
JA
1453 ios->cwrites++;
1454 ios->cwrite_kb += bytes >> 10;
cd0ae0f6 1455 ios->cwrite_b += bytes & 1023;
d0ca268b 1456 } else {
152f6476
JA
1457 ios->creads++;
1458 ios->cread_kb += bytes >> 10;
cd0ae0f6 1459 ios->cread_b += bytes & 1023;
152f6476
JA
1460 }
1461}
1462
1463static inline void account_c(struct blk_io_trace *t, struct per_cpu_info *pci,
1464 int rw, int bytes)
1465{
1466 __account_c(&pci->io_stats, rw, bytes);
1467
1468 if (per_process_stats) {
bfc70ad5 1469 struct io_stats *ios = find_process_io_stats(t->pid);
152f6476
JA
1470
1471 __account_c(ios, rw, bytes);
d0ca268b
JA
1472 }
1473}
1474
b6076a9b
JA
1475static inline void __account_issue(struct io_stats *ios, int rw,
1476 unsigned int bytes)
afd2d7ad 1477{
1478 if (rw) {
152f6476
JA
1479 ios->iwrites++;
1480 ios->iwrite_kb += bytes >> 10;
cd0ae0f6 1481 ios->iwrite_b += bytes & 1023;
afd2d7ad 1482 } else {
152f6476
JA
1483 ios->ireads++;
1484 ios->iread_kb += bytes >> 10;
cd0ae0f6 1485 ios->iread_b += bytes & 1023;
afd2d7ad 1486 }
1487}
1488
b6076a9b
JA
1489static inline void account_issue(struct blk_io_trace *t,
1490 struct per_cpu_info *pci, int rw)
d0ca268b 1491{
b6076a9b 1492 __account_issue(&pci->io_stats, rw, t->bytes);
152f6476
JA
1493
1494 if (per_process_stats) {
bfc70ad5 1495 struct io_stats *ios = find_process_io_stats(t->pid);
d5396421 1496
b6076a9b 1497 __account_issue(ios, rw, t->bytes);
152f6476
JA
1498 }
1499}
1500
06639b27
JA
1501static inline void __account_unplug(struct io_stats *ios, int timer)
1502{
1503 if (timer)
1504 ios->timer_unplugs++;
1505 else
1506 ios->io_unplugs++;
1507}
1508
1509static inline void account_unplug(struct blk_io_trace *t,
1510 struct per_cpu_info *pci, int timer)
1511{
1512 __account_unplug(&pci->io_stats, timer);
1513
1514 if (per_process_stats) {
bfc70ad5 1515 struct io_stats *ios = find_process_io_stats(t->pid);
06639b27
JA
1516
1517 __account_unplug(ios, timer);
1518 }
1519}
1520
4054070a
JA
1521static inline void __account_requeue(struct io_stats *ios,
1522 struct blk_io_trace *t, int rw)
1523{
1524 if (rw) {
1525 ios->wrqueue++;
1526 ios->iwrite_kb -= t_kb(t);
cd0ae0f6 1527 ios->iwrite_b -= t_b(t);
4054070a
JA
1528 } else {
1529 ios->rrqueue++;
1530 ios->iread_kb -= t_kb(t);
cd0ae0f6 1531 ios->iread_b -= t_b(t);
4054070a
JA
1532 }
1533}
1534
1535static inline void account_requeue(struct blk_io_trace *t,
1536 struct per_cpu_info *pci, int rw)
1537{
1538 __account_requeue(&pci->io_stats, t, rw);
1539
1540 if (per_process_stats) {
bfc70ad5 1541 struct io_stats *ios = find_process_io_stats(t->pid);
4054070a
JA
1542
1543 __account_requeue(ios, t, rw);
1544 }
1545}
1546
f7bd1a9b
JA
1547static void log_complete(struct per_dev_info *pdi, struct per_cpu_info *pci,
1548 struct blk_io_trace *t, char *act)
ab197ca7 1549{
f7bd1a9b 1550 process_fmt(act, pci, t, log_track_complete(pdi, t), 0, NULL);
ab197ca7
AB
1551}
1552
f7bd1a9b
JA
1553static void log_insert(struct per_dev_info *pdi, struct per_cpu_info *pci,
1554 struct blk_io_trace *t, char *act)
b6076a9b 1555{
f7bd1a9b 1556 process_fmt(act, pci, t, log_track_insert(pdi, t), 0, NULL);
b6076a9b
JA
1557}
1558
ab197ca7
AB
1559static void log_queue(struct per_cpu_info *pci, struct blk_io_trace *t,
1560 char *act)
1561{
b6076a9b 1562 process_fmt(act, pci, t, -1, 0, NULL);
ab197ca7 1563}
2e3e8ded 1564
f7bd1a9b
JA
1565static void log_issue(struct per_dev_info *pdi, struct per_cpu_info *pci,
1566 struct blk_io_trace *t, char *act)
ab197ca7 1567{
f7bd1a9b 1568 process_fmt(act, pci, t, log_track_issue(pdi, t), 0, NULL);
d0ca268b
JA
1569}
1570
f7bd1a9b
JA
1571static void log_merge(struct per_dev_info *pdi, struct per_cpu_info *pci,
1572 struct blk_io_trace *t, char *act)
d0ca268b 1573{
a01516de 1574 if (act[0] == 'F')
f7bd1a9b 1575 log_track_frontmerge(pdi, t);
2e3e8ded 1576
ab197ca7 1577 process_fmt(act, pci, t, -1ULL, 0, NULL);
d0ca268b
JA
1578}
1579
dfe34da1 1580static void log_action(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 1581 char *act)
dfe34da1 1582{
ab197ca7 1583 process_fmt(act, pci, t, -1ULL, 0, NULL);
dfe34da1
JA
1584}
1585
d5396421 1586static void log_generic(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 1587 char *act)
d0ca268b 1588{
ab197ca7 1589 process_fmt(act, pci, t, -1ULL, 0, NULL);
d0ca268b
JA
1590}
1591
ab197ca7 1592static void log_unplug(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 1593 char *act)
67e14fdc 1594{
ab197ca7 1595 process_fmt(act, pci, t, -1ULL, 0, NULL);
67e14fdc
JA
1596}
1597
93f1c611
JA
1598static void log_split(struct per_cpu_info *pci, struct blk_io_trace *t,
1599 char *act)
1600{
1601 process_fmt(act, pci, t, -1ULL, 0, NULL);
1602}
1603
ab197ca7 1604static void log_pc(struct per_cpu_info *pci, struct blk_io_trace *t, char *act)
d0ca268b 1605{
ab197ca7 1606 unsigned char *buf = (unsigned char *) t + sizeof(*t);
d0ca268b 1607
ab197ca7 1608 process_fmt(act, pci, t, -1ULL, t->pdu_len, buf);
d0ca268b
JA
1609}
1610
c82a8c9d
CS
1611static void dump_trace_pc(struct blk_io_trace *t, struct per_dev_info *pdi,
1612 struct per_cpu_info *pci)
d0ca268b 1613{
c82a8c9d 1614 int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
7238673f 1615 int act = (t->action & 0xffff) & ~__BLK_TA_CGROUP;
56f2af81
JA
1616
1617 switch (act) {
d0ca268b 1618 case __BLK_TA_QUEUE:
3639a11e 1619 log_generic(pci, t, "Q");
801646d6 1620 account_pc_queue(t, pci, w);
d0ca268b
JA
1621 break;
1622 case __BLK_TA_GETRQ:
3639a11e 1623 log_generic(pci, t, "G");
d0ca268b
JA
1624 break;
1625 case __BLK_TA_SLEEPRQ:
3639a11e 1626 log_generic(pci, t, "S");
d0ca268b
JA
1627 break;
1628 case __BLK_TA_REQUEUE:
c82a8c9d
CS
1629 /*
1630 * can happen if we miss traces, don't let it go
1631 * below zero
1632 */
1633 if (pdi->cur_depth[w])
1634 pdi->cur_depth[w]--;
801646d6 1635 account_pc_requeue(t, pci, w);
3639a11e 1636 log_generic(pci, t, "R");
d0ca268b
JA
1637 break;
1638 case __BLK_TA_ISSUE:
801646d6 1639 account_pc_issue(t, pci, w);
c82a8c9d
CS
1640 pdi->cur_depth[w]++;
1641 if (pdi->cur_depth[w] > pdi->max_depth[w])
1642 pdi->max_depth[w] = pdi->cur_depth[w];
ab197ca7 1643 log_pc(pci, t, "D");
d0ca268b
JA
1644 break;
1645 case __BLK_TA_COMPLETE:
c82a8c9d
CS
1646 if (pdi->cur_depth[w])
1647 pdi->cur_depth[w]--;
3639a11e 1648 log_pc(pci, t, "C");
801646d6 1649 account_pc_c(t, pci, w);
d0ca268b 1650 break;
56f2af81
JA
1651 case __BLK_TA_INSERT:
1652 log_pc(pci, t, "I");
1653 break;
d0ca268b 1654 default:
56f2af81 1655 fprintf(stderr, "Bad pc action %x\n", act);
87b72777 1656 break;
d0ca268b 1657 }
d0ca268b
JA
1658}
1659
f7bd1a9b
JA
1660static void dump_trace_fs(struct blk_io_trace *t, struct per_dev_info *pdi,
1661 struct per_cpu_info *pci)
d0ca268b 1662{
649c7b66 1663 int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
7238673f 1664 int act = (t->action & 0xffff) & ~__BLK_TA_CGROUP;
d0ca268b 1665
7997c5b0 1666 switch (act) {
d0ca268b 1667 case __BLK_TA_QUEUE:
753f9091 1668 log_track_queue(pdi, t);
b6076a9b 1669 account_queue(t, pci, w);
3639a11e 1670 log_queue(pci, t, "Q");
d0ca268b 1671 break;
b6076a9b 1672 case __BLK_TA_INSERT:
f7bd1a9b 1673 log_insert(pdi, pci, t, "I");
b6076a9b 1674 break;
d0ca268b 1675 case __BLK_TA_BACKMERGE:
152f6476 1676 account_m(t, pci, w);
f7bd1a9b 1677 log_merge(pdi, pci, t, "M");
d0ca268b
JA
1678 break;
1679 case __BLK_TA_FRONTMERGE:
152f6476 1680 account_m(t, pci, w);
f7bd1a9b 1681 log_merge(pdi, pci, t, "F");
d0ca268b
JA
1682 break;
1683 case __BLK_TA_GETRQ:
f7bd1a9b 1684 log_track_getrq(pdi, t);
3639a11e 1685 log_generic(pci, t, "G");
d0ca268b
JA
1686 break;
1687 case __BLK_TA_SLEEPRQ:
3639a11e 1688 log_generic(pci, t, "S");
d0ca268b
JA
1689 break;
1690 case __BLK_TA_REQUEUE:
65f2deb5
JA
1691 /*
1692 * can happen if we miss traces, don't let it go
1693 * below zero
1694 */
1695 if (pdi->cur_depth[w])
1696 pdi->cur_depth[w]--;
4054070a 1697 account_requeue(t, pci, w);
3639a11e 1698 log_queue(pci, t, "R");
d0ca268b
JA
1699 break;
1700 case __BLK_TA_ISSUE:
b6076a9b 1701 account_issue(t, pci, w);
649c7b66
JA
1702 pdi->cur_depth[w]++;
1703 if (pdi->cur_depth[w] > pdi->max_depth[w])
1704 pdi->max_depth[w] = pdi->cur_depth[w];
f7bd1a9b 1705 log_issue(pdi, pci, t, "D");
d0ca268b
JA
1706 break;
1707 case __BLK_TA_COMPLETE:
65f2deb5
JA
1708 if (pdi->cur_depth[w])
1709 pdi->cur_depth[w]--;
e81829a5 1710 fixup_complete(pdi, t);
152f6476 1711 account_c(t, pci, w, t->bytes);
f7bd1a9b 1712 log_complete(pdi, pci, t, "C");
d0ca268b 1713 break;
88b1a526 1714 case __BLK_TA_PLUG:
3639a11e 1715 log_action(pci, t, "P");
88b1a526 1716 break;
3639a11e 1717 case __BLK_TA_UNPLUG_IO:
06639b27 1718 account_unplug(t, pci, 0);
3639a11e
JA
1719 log_unplug(pci, t, "U");
1720 break;
1721 case __BLK_TA_UNPLUG_TIMER:
06639b27 1722 account_unplug(t, pci, 1);
3639a11e 1723 log_unplug(pci, t, "UT");
88b1a526 1724 break;
93f1c611 1725 case __BLK_TA_SPLIT:
e81829a5 1726 log_track_split(pdi, t);
93f1c611
JA
1727 log_split(pci, t, "X");
1728 break;
1729 case __BLK_TA_BOUNCE:
1730 log_generic(pci, t, "B");
1731 break;
a8f30e64
JA
1732 case __BLK_TA_REMAP:
1733 log_generic(pci, t, "A");
1734 break;
c54b9dd9 1735 case __BLK_TA_DRV_DATA:
c701176c 1736 have_drv_data = 1;
c54b9dd9
SR
1737 /* dump to binary file only */
1738 break;
d0ca268b
JA
1739 default:
1740 fprintf(stderr, "Bad fs action %x\n", t->action);
1f79c4a0 1741 break;
d0ca268b 1742 }
d0ca268b
JA
1743}
1744
ff3a732c
JA
1745static void dump_trace(struct blk_io_trace *t, struct per_cpu_info *pci,
1746 struct per_dev_info *pdi)
d0ca268b 1747{
234db09d 1748 if (text_output) {
7238673f 1749 if ((t->action & ~__BLK_TN_CGROUP) == BLK_TN_MESSAGE)
1a15f6a8
AB
1750 handle_notify(t);
1751 else if (t->action & BLK_TC_ACT(BLK_TC_PC))
c82a8c9d 1752 dump_trace_pc(t, pdi, pci);
234db09d
AB
1753 else
1754 dump_trace_fs(t, pdi, pci);
1755 }
87b72777 1756
20ed6177
JA
1757 if (!pdi->events)
1758 pdi->first_reported_time = t->time;
1759
e7c9f3ff 1760 pdi->events++;
a2594911 1761
19cfaf3f
AB
1762 if (bin_output_msgs ||
1763 !(t->action & BLK_TC_ACT(BLK_TC_NOTIFY) &&
7238673f 1764 (t->action & ~__BLK_TN_CGROUP) == BLK_TN_MESSAGE))
19cfaf3f 1765 output_binary(t, sizeof(*t) + t->pdu_len);
d0ca268b
JA
1766}
1767
4c523165
JA
1768/*
1769 * print in a proper way, not too small and not too big. if more than
1770 * 1000,000K, turn into M and so on
1771 */
1772static char *size_cnv(char *dst, unsigned long long num, int in_kb)
1773{
da19e768 1774 char suff[] = { '\0', 'K', 'M', 'G', 'P' };
0dc3602c 1775 unsigned int i = 0;
4c523165
JA
1776
1777 if (in_kb)
1778 i++;
1779
0dc3602c 1780 while (num > 1000 * 1000ULL && (i < sizeof(suff) - 1)) {
4c523165
JA
1781 i++;
1782 num /= 1000;
1783 }
1784
1785 sprintf(dst, "%'8Lu%c", num, suff[i]);
1786 return dst;
1787}
1788
649c7b66
JA
1789static void dump_io_stats(struct per_dev_info *pdi, struct io_stats *ios,
1790 char *msg)
5c017e4b 1791{
4c523165
JA
1792 static char x[256], y[256];
1793
152f6476
JA
1794 fprintf(ofp, "%s\n", msg);
1795
cd0ae0f6
ID
1796 fprintf(ofp, " Reads Queued: %s, %siB\t",
1797 size_cnv(x, ios->qreads, 0),
1798 size_cnv(y, ios->qread_kb + (ios->qread_b>>10), 1));
1799 fprintf(ofp, " Writes Queued: %s, %siB\n",
1800 size_cnv(x, ios->qwrites, 0),
1801 size_cnv(y, ios->qwrite_kb + (ios->qwrite_b>>10), 1));
1802 fprintf(ofp, " Read Dispatches: %s, %siB\t",
1803 size_cnv(x, ios->ireads, 0),
1804 size_cnv(y, ios->iread_kb + (ios->iread_b>>10), 1));
1805 fprintf(ofp, " Write Dispatches: %s, %siB\n",
1806 size_cnv(x, ios->iwrites, 0),
1807 size_cnv(y, ios->iwrite_kb + (ios->iwrite_b>>10), 1));
4054070a
JA
1808 fprintf(ofp, " Reads Requeued: %s\t\t", size_cnv(x, ios->rrqueue, 0));
1809 fprintf(ofp, " Writes Requeued: %s\n", size_cnv(x, ios->wrqueue, 0));
cd0ae0f6
ID
1810 fprintf(ofp, " Reads Completed: %s, %siB\t",
1811 size_cnv(x, ios->creads, 0),
1812 size_cnv(y, ios->cread_kb + (ios->cread_b>>10), 1));
1813 fprintf(ofp, " Writes Completed: %s, %siB\n",
1814 size_cnv(x, ios->cwrites, 0),
1815 size_cnv(y, ios->cwrite_kb + (ios->cwrite_b>>10), 1));
1816 fprintf(ofp, " Read Merges: %s, %siB\t",
1817 size_cnv(x, ios->mreads, 0),
1818 size_cnv(y, ios->mread_kb + (ios->mread_b>>10), 1));
1819 fprintf(ofp, " Write Merges: %s, %siB\n",
1820 size_cnv(x, ios->mwrites, 0),
1821 size_cnv(y, ios->mwrite_kb + (ios->mwrite_b>>10), 1));
649c7b66
JA
1822 if (pdi) {
1823 fprintf(ofp, " Read depth: %'8u%8c\t", pdi->max_depth[0], ' ');
1824 fprintf(ofp, " Write depth: %'8u\n", pdi->max_depth[1]);
1825 }
801646d6
CS
1826 if (ios->qreads_pc || ios->qwrites_pc || ios->ireads_pc || ios->iwrites_pc ||
1827 ios->rrqueue_pc || ios->wrqueue_pc || ios->creads_pc || ios->cwrites_pc) {
cd0ae0f6
ID
1828 fprintf(ofp, " PC Reads Queued: %s, %siB\t",
1829 size_cnv(x, ios->qreads_pc, 0),
1830 size_cnv(y,
1831 ios->qread_kb_pc + (ios->qread_b_pc>>10), 1));
1832 fprintf(ofp, " PC Writes Queued: %s, %siB\n",
1833 size_cnv(x, ios->qwrites_pc, 0),
1834 size_cnv(y,
1835 ios->qwrite_kb_pc + (ios->qwrite_b_pc>>10), 1));
1836 fprintf(ofp, " PC Read Disp.: %s, %siB\t",
1837 size_cnv(x, ios->ireads_pc, 0),
1838 size_cnv(y,
1839 ios->iread_kb_pc + (ios->iread_b_pc>>10), 1));
1840 fprintf(ofp, " PC Write Disp.: %s, %siB\n",
1841 size_cnv(x, ios->iwrites_pc, 0),
1842 size_cnv(y,
1843 ios->iwrite_kb_pc + (ios->iwrite_b_pc>>10),
1844 1));
801646d6
CS
1845 fprintf(ofp, " PC Reads Req.: %s\t\t", size_cnv(x, ios->rrqueue_pc, 0));
1846 fprintf(ofp, " PC Writes Req.: %s\n", size_cnv(x, ios->wrqueue_pc, 0));
1847 fprintf(ofp, " PC Reads Compl.: %s\t\t", size_cnv(x, ios->creads_pc, 0));
d0576a3a 1848 fprintf(ofp, " PC Writes Compl.: %s\n", size_cnv(x, ios->cwrites_pc, 0));
801646d6 1849 }
06639b27
JA
1850 fprintf(ofp, " IO unplugs: %'8lu%8c\t", ios->io_unplugs, ' ');
1851 fprintf(ofp, " Timer unplugs: %'8lu\n", ios->timer_unplugs);
5c017e4b
JA
1852}
1853
50adc0ba
JA
1854static void dump_wait_stats(struct per_process_info *ppi)
1855{
b9d40d6f
JA
1856 unsigned long rawait = ppi->longest_allocation_wait[0] / 1000;
1857 unsigned long rdwait = ppi->longest_dispatch_wait[0] / 1000;
1858 unsigned long rcwait = ppi->longest_completion_wait[0] / 1000;
1859 unsigned long wawait = ppi->longest_allocation_wait[1] / 1000;
1860 unsigned long wdwait = ppi->longest_dispatch_wait[1] / 1000;
1861 unsigned long wcwait = ppi->longest_completion_wait[1] / 1000;
1862
1863 fprintf(ofp, " Allocation wait: %'8lu%8c\t", rawait, ' ');
1864 fprintf(ofp, " Allocation wait: %'8lu\n", wawait);
1865 fprintf(ofp, " Dispatch wait: %'8lu%8c\t", rdwait, ' ');
1866 fprintf(ofp, " Dispatch wait: %'8lu\n", wdwait);
1867 fprintf(ofp, " Completion wait: %'8lu%8c\t", rcwait, ' ');
1868 fprintf(ofp, " Completion wait: %'8lu\n", wcwait);
50adc0ba
JA
1869}
1870
886ecf0e
JA
1871static int ppi_name_compare(const void *p1, const void *p2)
1872{
1873 struct per_process_info *ppi1 = *((struct per_process_info **) p1);
1874 struct per_process_info *ppi2 = *((struct per_process_info **) p2);
1875 int res;
1876
2990e589 1877 res = strverscmp(ppi1->ppm->comm, ppi2->ppm->comm);
886ecf0e 1878 if (!res)
2990e589 1879 res = ppi1->ppm->pid > ppi2->ppm->pid;
886ecf0e
JA
1880
1881 return res;
1882}
1883
a7263b8f
WZ
1884static int ppi_event_compare(const void *p1, const void *p2)
1885{
1886 struct per_process_info *ppi1 = *((struct per_process_info **) p1);
1887 struct per_process_info *ppi2 = *((struct per_process_info **) p2);
1888 struct io_stats *ios1 = &ppi1->io_stats;
1889 struct io_stats *ios2 = &ppi2->io_stats;
1890 unsigned long io1, io2;
1891 unsigned long long kb1,kb2;
1892 int sort_by_kb = 1;
1893
1894 io1 = io2 = 0;
1895 kb1 = kb2 = 0;
1896
1897 switch (per_process_stats_event) {
1898 case SORT_PROG_EVENT_QKB: /* KB: Queued read and write */
1899 kb1 = ios1->qwrite_kb + (ios1->qwrite_b>>10) +
1900 ios1->qread_kb + (ios1->qread_b>>10);
1901 kb2 = ios2->qwrite_kb + (ios2->qwrite_b>>10) +
1902 ios2->qread_kb + (ios2->qread_b>>10);
1903 break;
1904 case SORT_PROG_EVENT_RKB: /* KB: Queued Read */
1905 kb1 = ios1->qread_kb + (ios1->qread_b>>10);
1906 kb2 = ios2->qread_kb + (ios2->qread_b>>10);
1907 break;
1908 case SORT_PROG_EVENT_WKB: /* KB: Queued Write */
1909 kb1 = ios1->qwrite_kb + (ios1->qwrite_b>>10);
1910 kb2 = ios2->qwrite_kb + (ios2->qwrite_b>>10);
1911 break;
1912 case SORT_PROG_EVENT_CKB: /* KB: Complete */
1913 kb1 = ios1->cwrite_kb + (ios1->cwrite_b>>10) +
1914 ios1->cread_kb + (ios1->cread_b>>10);
1915 kb2 = ios2->cwrite_kb + (ios2->cwrite_b>>10) +
1916 ios2->cread_kb + (ios2->cread_b>>10);
1917 break;
1918 case SORT_PROG_EVENT_QIO: /* IO: Queued read and write */
1919 sort_by_kb = 0;
1920 io1 = ios1->qreads + ios1->qwrites;
1921 io2 = ios2->qreads + ios2->qwrites;
1922 break;
1923 case SORT_PROG_EVENT_RIO: /* IO: Queued Read */
1924 sort_by_kb = 0;
1925 io1 = ios1->qreads;
1926 io2 = ios2->qreads;
1927 break;
1928 case SORT_PROG_EVENT_WIO: /* IO: Queued Write */
1929 sort_by_kb = 0;
1930 io1 = ios1->qwrites;
1931 io2 = ios2->qwrites;
1932 break;
1933 case SORT_PROG_EVENT_CIO: /* IO: Complete */
1934 sort_by_kb = 0;
1935 io1 = ios1->creads + ios1->cwrites;
1936 io2 = ios2->creads + ios2->cwrites;
1937 break;
1938 }
1939
1940
1941 /* compare kb */
1942 if (sort_by_kb) {
1943 if (kb1 > kb2)
1944 return 1;
1945 else if (kb1 == kb2)
1946 return 0;
1947 return -1;
1948 }
1949
1950 /* compare io */
1951 if (io1 > io2)
1952 return 1;
1953 else if (io1 == io2)
1954 return 0;
1955 return -1;
1956}
1957
1958static int ppi_compare(const void *p1, const void *p2)
1959{
1960 if (per_process_stats_event == SORT_PROG_EVENT_N)
1961 return ppi_name_compare(p1, p2);
1962
1963 return ppi_event_compare(p1, p2);
1964}
1965
886ecf0e
JA
1966static void sort_process_list(void)
1967{
1968 struct per_process_info **ppis;
1969 struct per_process_info *ppi;
1970 int i = 0;
1971
1972 ppis = malloc(ppi_list_entries * sizeof(struct per_process_info *));
1973
1974 ppi = ppi_list;
1975 while (ppi) {
06e6f286 1976 ppis[i++] = ppi;
886ecf0e
JA
1977 ppi = ppi->list_next;
1978 }
1979
a7263b8f 1980 qsort(ppis, ppi_list_entries, sizeof(ppi), ppi_compare);
886ecf0e
JA
1981
1982 i = ppi_list_entries - 1;
1983 ppi_list = NULL;
1984 while (i >= 0) {
1985 ppi = ppis[i];
1986
1987 ppi->list_next = ppi_list;
1988 ppi_list = ppi;
1989 i--;
1990 }
50c38702
JA
1991
1992 free(ppis);
886ecf0e
JA
1993}
1994
152f6476
JA
1995static void show_process_stats(void)
1996{
1997 struct per_process_info *ppi;
1998
886ecf0e
JA
1999 sort_process_list();
2000
152f6476
JA
2001 ppi = ppi_list;
2002 while (ppi) {
2990e589 2003 struct process_pid_map *ppm = ppi->ppm;
ce8b6b4f
JA
2004 char name[64];
2005
715d8021 2006 if (ppi->more_than_one)
2990e589 2007 sprintf(name, "%s (%u, ...)", ppm->comm, ppm->pid);
715d8021 2008 else
2990e589 2009 sprintf(name, "%s (%u)", ppm->comm, ppm->pid);
bf0720af 2010
649c7b66 2011 dump_io_stats(NULL, &ppi->io_stats, name);
50adc0ba 2012 dump_wait_stats(ppi);
152f6476
JA
2013 ppi = ppi->list_next;
2014 }
2015
2016 fprintf(ofp, "\n");
2017}
2018
e7c9f3ff 2019static void show_device_and_cpu_stats(void)
d0ca268b 2020{
e7c9f3ff
NS
2021 struct per_dev_info *pdi;
2022 struct per_cpu_info *pci;
2023 struct io_stats total, *ios;
20ed6177 2024 unsigned long long rrate, wrate, msec;
e7c9f3ff
NS
2025 int i, j, pci_events;
2026 char line[3 + 8/*cpu*/ + 2 + 32/*dev*/ + 3];
2027 char name[32];
8a82e321 2028 double ratio;
e7c9f3ff
NS
2029
2030 for (pdi = devices, i = 0; i < ndevices; i++, pdi++) {
2031
2032 memset(&total, 0, sizeof(total));
2033 pci_events = 0;
2034
2035 if (i > 0)
2036 fprintf(ofp, "\n");
2037
2038 for (pci = pdi->cpus, j = 0; j < pdi->ncpus; j++, pci++) {
2039 if (!pci->nelems)
2040 continue;
2041
2042 ios = &pci->io_stats;
2043 total.qreads += ios->qreads;
2044 total.qwrites += ios->qwrites;
2045 total.creads += ios->creads;
2046 total.cwrites += ios->cwrites;
2047 total.mreads += ios->mreads;
2048 total.mwrites += ios->mwrites;
2049 total.ireads += ios->ireads;
2050 total.iwrites += ios->iwrites;
4054070a
JA
2051 total.rrqueue += ios->rrqueue;
2052 total.wrqueue += ios->wrqueue;
e7c9f3ff
NS
2053 total.qread_kb += ios->qread_kb;
2054 total.qwrite_kb += ios->qwrite_kb;
2055 total.cread_kb += ios->cread_kb;
2056 total.cwrite_kb += ios->cwrite_kb;
2057 total.iread_kb += ios->iread_kb;
2058 total.iwrite_kb += ios->iwrite_kb;
fb2ec796
JA
2059 total.mread_kb += ios->mread_kb;
2060 total.mwrite_kb += ios->mwrite_kb;
cd0ae0f6
ID
2061 total.qread_b += ios->qread_b;
2062 total.qwrite_b += ios->qwrite_b;
2063 total.cread_b += ios->cread_b;
2064 total.cwrite_b += ios->cwrite_b;
2065 total.iread_b += ios->iread_b;
2066 total.iwrite_b += ios->iwrite_b;
2067 total.mread_b += ios->mread_b;
2068 total.mwrite_b += ios->mwrite_b;
801646d6
CS
2069
2070 total.qreads_pc += ios->qreads_pc;
2071 total.qwrites_pc += ios->qwrites_pc;
2072 total.creads_pc += ios->creads_pc;
2073 total.cwrites_pc += ios->cwrites_pc;
2074 total.ireads_pc += ios->ireads_pc;
2075 total.iwrites_pc += ios->iwrites_pc;
2076 total.rrqueue_pc += ios->rrqueue_pc;
2077 total.wrqueue_pc += ios->wrqueue_pc;
2078 total.qread_kb_pc += ios->qread_kb_pc;
2079 total.qwrite_kb_pc += ios->qwrite_kb_pc;
2080 total.iread_kb_pc += ios->iread_kb_pc;
2081 total.iwrite_kb_pc += ios->iwrite_kb_pc;
cd0ae0f6
ID
2082 total.qread_b_pc += ios->qread_b_pc;
2083 total.qwrite_b_pc += ios->qwrite_b_pc;
2084 total.iread_b_pc += ios->iread_b_pc;
2085 total.iwrite_b_pc += ios->iwrite_b_pc;
801646d6 2086
06639b27
JA
2087 total.timer_unplugs += ios->timer_unplugs;
2088 total.io_unplugs += ios->io_unplugs;
e7c9f3ff
NS
2089
2090 snprintf(line, sizeof(line) - 1, "CPU%d (%s):",
2091 j, get_dev_name(pdi, name, sizeof(name)));
649c7b66 2092 dump_io_stats(pdi, ios, line);
e7c9f3ff
NS
2093 pci_events++;
2094 }
5c017e4b 2095
e7c9f3ff
NS
2096 if (pci_events > 1) {
2097 fprintf(ofp, "\n");
2098 snprintf(line, sizeof(line) - 1, "Total (%s):",
2099 get_dev_name(pdi, name, sizeof(name)));
649c7b66 2100 dump_io_stats(NULL, &total, line);
e7c9f3ff 2101 }
d0ca268b 2102
20ed6177 2103 wrate = rrate = 0;
20ed6177
JA
2104 msec = (pdi->last_reported_time - pdi->first_reported_time) / 1000000;
2105 if (msec) {
cd0ae0f6
ID
2106 rrate = ((1000 * total.cread_kb) + total.cread_b) /
2107 msec;
2108 wrate = ((1000 * total.cwrite_kb) + total.cwrite_b) /
2109 msec;
20ed6177
JA
2110 }
2111
dce0f678
AB
2112 fprintf(ofp, "\nThroughput (R/W): %'LuKiB/s / %'LuKiB/s\n",
2113 rrate, wrate);
2114 fprintf(ofp, "Events (%s): %'Lu entries\n",
2115 get_dev_name(pdi, line, sizeof(line)), pdi->events);
492da111
AB
2116
2117 collect_pdi_skips(pdi);
8a82e321
MZ
2118 if (!pdi->skips && !pdi->events)
2119 ratio = 0.0;
2120 else
2121 ratio = 100.0 * ((double)pdi->seq_skips /
2122 (double)(pdi->events + pdi->seq_skips));
492da111 2123 fprintf(ofp, "Skips: %'lu forward (%'llu - %5.1lf%%)\n",
8a82e321 2124 pdi->skips, pdi->seq_skips, ratio);
e7c9f3ff 2125 }
d0ca268b
JA
2126}
2127
d025d6c6
HM
2128static void correct_abs_start_time(void)
2129{
2130 long delta = genesis_time - start_timestamp;
2131
2132 abs_start_time.tv_sec += SECONDS(delta);
2133 abs_start_time.tv_nsec += NANO_SECONDS(delta);
2134 if (abs_start_time.tv_nsec < 0) {
2135 abs_start_time.tv_nsec += 1000000000;
2136 abs_start_time.tv_sec -= 1;
2137 } else
2138 if (abs_start_time.tv_nsec > 1000000000) {
2139 abs_start_time.tv_nsec -= 1000000000;
2140 abs_start_time.tv_sec += 1;
2141 }
2142}
2143
4f0ae44f
JA
2144static void find_genesis(void)
2145{
2146 struct trace *t = trace_list;
2147
2148 genesis_time = -1ULL;
2149 while (t != NULL) {
2150 if (t->bit->time < genesis_time)
2151 genesis_time = t->bit->time;
2152
2153 t = t->next;
2154 }
7bd4fd0a
OK
2155
2156 /* The time stamp record will usually be the first
2157 * record in the trace, but not always.
2158 */
2159 if (start_timestamp
2160 && start_timestamp != genesis_time) {
d025d6c6 2161 correct_abs_start_time();
7bd4fd0a 2162 }
4f0ae44f
JA
2163}
2164
7f4d89e6 2165static inline int check_stopwatch(struct blk_io_trace *bit)
4f0ae44f 2166{
7f4d89e6
JA
2167 if (bit->time < stopwatch_end &&
2168 bit->time >= stopwatch_start)
4f0ae44f
JA
2169 return 0;
2170
2171 return 1;
2172}
2173
53c68c88
JA
2174/*
2175 * return youngest entry read
2176 */
2177static int sort_entries(unsigned long long *youngest)
4f0ae44f 2178{
210824c3
JA
2179 struct per_dev_info *pdi = NULL;
2180 struct per_cpu_info *pci = NULL;
4f0ae44f 2181 struct trace *t;
4f0ae44f
JA
2182
2183 if (!genesis_time)
2184 find_genesis();
2185
d6222db8 2186 *youngest = 0;
4f0ae44f
JA
2187 while ((t = trace_list) != NULL) {
2188 struct blk_io_trace *bit = t->bit;
2189
2190 trace_list = t->next;
2191
7f4d89e6 2192 bit->time -= genesis_time;
4f0ae44f 2193
d6222db8
JA
2194 if (bit->time < *youngest || !*youngest)
2195 *youngest = bit->time;
2196
210824c3
JA
2197 if (!pdi || pdi->dev != bit->device) {
2198 pdi = get_dev_info(bit->device);
2199 pci = NULL;
2200 }
2201
2202 if (!pci || pci->cpu != bit->cpu)
2203 pci = get_cpu_info(pdi, bit->cpu);
2204
2205 if (bit->sequence < pci->smallest_seq_read)
2206 pci->smallest_seq_read = bit->sequence;
774a1a10 2207
7f4d89e6 2208 if (check_stopwatch(bit)) {
4f0ae44f
JA
2209 bit_free(bit);
2210 t_free(t);
2211 continue;
2212 }
2213
2a1b3424 2214 if (trace_rb_insert_sort(t))
53c68c88 2215 return -1;
4f0ae44f
JA
2216 }
2217
53c68c88 2218 return 0;
4f0ae44f
JA
2219}
2220
824c2b39
JA
2221/*
2222 * to continue, we must have traces from all online cpus in the tree
2223 */
2224static int check_cpu_map(struct per_dev_info *pdi)
2225{
2226 unsigned long *cpu_map;
2227 struct rb_node *n;
2228 struct trace *__t;
2229 unsigned int i;
2230 int ret, cpu;
2231
ac416ab6
JM
2232 /* Pipe input doesn't do CPU online tracking. */
2233 if (!pdi->cpu_map_max)
2234 return 0;
2235
824c2b39
JA
2236 /*
2237 * create a map of the cpus we have traces for
2238 */
7f5d2c51 2239 cpu_map = calloc(1, pdi->cpu_map_max / sizeof(long));
824c2b39
JA
2240 n = rb_first(&rb_sort_root);
2241 while (n) {
2242 __t = rb_entry(n, struct trace, rb_node);
2243 cpu = __t->bit->cpu;
2244
2245 cpu_map[CPU_IDX(cpu)] |= (1UL << CPU_BIT(cpu));
2246 n = rb_next(n);
2247 }
2248
2249 /*
b1c8e614
JA
2250 * we can't continue if pdi->cpu_map has entries set that we don't
2251 * have in the sort rbtree. the opposite is not a problem, though
824c2b39
JA
2252 */
2253 ret = 0;
2254 for (i = 0; i < pdi->cpu_map_max / CPUS_PER_LONG; i++) {
2255 if (pdi->cpu_map[i] & ~(cpu_map[i])) {
2256 ret = 1;
2257 break;
2258 }
2259 }
2260
2261 free(cpu_map);
2262 return ret;
2263}
2264
a141a7cd 2265static int check_sequence(struct per_dev_info *pdi, struct trace *t, int force)
2a1b3424 2266{
1ca323a5 2267 struct blk_io_trace *bit = t->bit;
210824c3
JA
2268 unsigned long expected_sequence;
2269 struct per_cpu_info *pci;
1ca323a5 2270 struct trace *__t;
492da111 2271
210824c3
JA
2272 pci = get_cpu_info(pdi, bit->cpu);
2273 expected_sequence = pci->last_sequence + 1;
2274
774a1a10 2275 if (!expected_sequence) {
774a1a10
JA
2276 /*
2277 * 1 should be the first entry, just allow it
2278 */
2279 if (bit->sequence == 1)
2280 return 0;
210824c3 2281 if (bit->sequence == pci->smallest_seq_read)
79ee9704 2282 return 0;
774a1a10 2283
824c2b39 2284 return check_cpu_map(pdi);
774a1a10 2285 }
2a1b3424
JA
2286
2287 if (bit->sequence == expected_sequence)
2288 return 0;
2289
2a1b3424 2290 /*
1c7c54aa
JA
2291 * we may not have seen that sequence yet. if we are not doing
2292 * the final run, break and wait for more entries.
1c24add6 2293 */
210824c3
JA
2294 if (expected_sequence < pci->smallest_seq_read) {
2295 __t = trace_rb_find_last(pdi, pci, expected_sequence);
1ca323a5 2296 if (!__t)
1c7c54aa 2297 goto skip;
2a1b3424 2298
1ca323a5 2299 __put_trace_last(pdi, __t);
2a1b3424 2300 return 0;
a141a7cd
JA
2301 } else if (!force) {
2302 return 1;
0b07f23e 2303 } else {
1c7c54aa 2304skip:
66930177 2305 if (check_current_skips(pci, bit->sequence))
492da111
AB
2306 return 0;
2307
965eca2d 2308 if (expected_sequence < bit->sequence)
66930177 2309 insert_skip(pci, expected_sequence, bit->sequence - 1);
1c7c54aa
JA
2310 return 0;
2311 }
2a1b3424
JA
2312}
2313
a649216c 2314static void show_entries_rb(int force)
8fc0abbc 2315{
1f7afa72
JA
2316 struct per_dev_info *pdi = NULL;
2317 struct per_cpu_info *pci = NULL;
8fc0abbc 2318 struct blk_io_trace *bit;
3aabcd89 2319 struct rb_node *n;
8fc0abbc 2320 struct trace *t;
1f7afa72 2321
7d747d22 2322 while ((n = rb_first(&rb_sort_root)) != NULL) {
dd90748f 2323 if (is_done() && !force && !pipeline)
1f7afa72 2324 break;
8fc0abbc
JA
2325
2326 t = rb_entry(n, struct trace, rb_node);
2327 bit = t->bit;
2328
a43c1c17
JA
2329 if (read_sequence - t->read_sequence < 1 && !force)
2330 break;
2331
210824c3 2332 if (!pdi || pdi->dev != bit->device) {
287fa3d6 2333 pdi = get_dev_info(bit->device);
210824c3
JA
2334 pci = NULL;
2335 }
1f7afa72 2336
e7c9f3ff
NS
2337 if (!pdi) {
2338 fprintf(stderr, "Unknown device ID? (%d,%d)\n",
2339 MAJOR(bit->device), MINOR(bit->device));
2340 break;
2341 }
1f7afa72 2342
7238673f 2343 if (!((bit->action & ~__BLK_TN_CGROUP) == BLK_TN_MESSAGE) &&
9bf422b1 2344 check_sequence(pdi, t, force))
a141a7cd 2345 break;
cb2a1a62 2346
a141a7cd
JA
2347 if (!force && bit->time > last_allowed_time)
2348 break;
8fc0abbc 2349
4f0ae44f 2350 check_time(pdi, bit);
8fc0abbc 2351
4f0ae44f
JA
2352 if (!pci || pci->cpu != bit->cpu)
2353 pci = get_cpu_info(pdi, bit->cpu);
287fa3d6 2354
7238673f 2355 if (!((bit->action & ~__BLK_TN_CGROUP) == BLK_TN_MESSAGE))
9bf422b1 2356 pci->last_sequence = bit->sequence;
210824c3 2357
cbc927b6
JA
2358 pci->nelems++;
2359
66930177 2360 if (bit->action & (act_mask << BLK_TC_SHIFT))
98f8386b 2361 dump_trace(bit, pci, pdi);
87b72777 2362
2a1b3424 2363 put_trace(pdi, t);
cb2a1a62 2364 }
8fc0abbc
JA
2365}
2366
c0e0dbc2 2367static int read_data(int fd, void *buffer, int bytes, int block, int *fdblock)
1f79c4a0
JA
2368{
2369 int ret, bytes_left, fl;
2370 void *p;
2371
c0e0dbc2
JA
2372 if (block != *fdblock) {
2373 fl = fcntl(fd, F_GETFL);
1f79c4a0 2374
c0e0dbc2
JA
2375 if (!block) {
2376 *fdblock = 0;
2377 fcntl(fd, F_SETFL, fl | O_NONBLOCK);
2378 } else {
2379 *fdblock = 1;
2380 fcntl(fd, F_SETFL, fl & ~O_NONBLOCK);
2381 }
2382 }
1f79c4a0
JA
2383
2384 bytes_left = bytes;
2385 p = buffer;
2386 while (bytes_left > 0) {
2387 ret = read(fd, p, bytes_left);
2388 if (!ret)
2389 return 1;
2390 else if (ret < 0) {
db7e0552 2391 if (errno != EAGAIN) {
1f79c4a0 2392 perror("read");
db7e0552
JA
2393 return -1;
2394 }
a649216c 2395
5c0f40f7
JA
2396 /*
2397 * never do partial reads. we can return if we
2398 * didn't read anything and we should not block,
2399 * otherwise wait for data
2400 */
2401 if ((bytes_left == bytes) && !block)
2402 return 1;
2403
2404 usleep(10);
2405 continue;
1f79c4a0
JA
2406 } else {
2407 p += ret;
2408 bytes_left -= ret;
2409 }
2410 }
2411
2412 return 0;
2413}
2414
017d1660
JA
2415static inline __u16 get_pdulen(struct blk_io_trace *bit)
2416{
2417 if (data_is_native)
2418 return bit->pdu_len;
2419
2420 return __bswap_16(bit->pdu_len);
2421}
2422
2423static inline __u32 get_magic(struct blk_io_trace *bit)
2424{
2425 if (data_is_native)
2426 return bit->magic;
2427
2428 return __bswap_32(bit->magic);
2429}
2430
c0e0dbc2 2431static int read_events(int fd, int always_block, int *fdblock)
cb2a1a62 2432{
287fa3d6 2433 struct per_dev_info *pdi = NULL;
e820abd7 2434 unsigned int events = 0;
7d747d22
JA
2435
2436 while (!is_done() && events < rb_batch) {
2437 struct blk_io_trace *bit;
2438 struct trace *t;
db7e0552 2439 int pdu_len, should_block, ret;
7d747d22
JA
2440 __u32 magic;
2441
d36421e4 2442 bit = bit_alloc();
cb2a1a62 2443
c0e0dbc2
JA
2444 should_block = !events || always_block;
2445
db7e0552
JA
2446 ret = read_data(fd, bit, sizeof(*bit), should_block, fdblock);
2447 if (ret) {
eb9bd4e9 2448 bit_free(bit);
db7e0552
JA
2449 if (!events && ret < 0)
2450 events = ret;
cb2a1a62 2451 break;
eb9bd4e9 2452 }
cb2a1a62 2453
017d1660
JA
2454 /*
2455 * look at first trace to check whether we need to convert
2456 * data in the future
2457 */
9e4cd1b8 2458 if (data_is_native == -1 && check_data_endianness(bit->magic))
017d1660
JA
2459 break;
2460
2461 magic = get_magic(bit);
7d747d22
JA
2462 if ((magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
2463 fprintf(stderr, "Bad magic %x\n", magic);
2464 break;
2465 }
2466
017d1660 2467 pdu_len = get_pdulen(bit);
7d747d22
JA
2468 if (pdu_len) {
2469 void *ptr = realloc(bit, sizeof(*bit) + pdu_len);
2470
c0e0dbc2 2471 if (read_data(fd, ptr + sizeof(*bit), pdu_len, 1, fdblock)) {
eb9bd4e9 2472 bit_free(ptr);
7d747d22 2473 break;
eb9bd4e9 2474 }
7d747d22
JA
2475
2476 bit = ptr;
2477 }
2478
d6222db8
JA
2479 trace_to_cpu(bit);
2480
2481 if (verify_trace(bit)) {
2482 bit_free(bit);
2483 continue;
2484 }
2485
bfc70ad5
JA
2486 /*
2487 * not a real trace, so grab and handle it here
2488 */
7238673f 2489 if (bit->action & BLK_TC_ACT(BLK_TC_NOTIFY) && (bit->action & ~__BLK_TN_CGROUP) != BLK_TN_MESSAGE) {
7bd4fd0a 2490 handle_notify(bit);
a2594911 2491 output_binary(bit, sizeof(*bit) + bit->pdu_len);
bfc70ad5
JA
2492 continue;
2493 }
2494
d36421e4 2495 t = t_alloc();
cb2a1a62
JA
2496 memset(t, 0, sizeof(*t));
2497 t->bit = bit;
a43c1c17 2498 t->read_sequence = read_sequence;
cb2a1a62 2499
7d747d22
JA
2500 t->next = trace_list;
2501 trace_list = t;
1f7afa72 2502
f7bd1a9b 2503 if (!pdi || pdi->dev != bit->device)
287fa3d6
JA
2504 pdi = get_dev_info(bit->device);
2505
2506 if (bit->time > pdi->last_read_time)
2507 pdi->last_read_time = bit->time;
2508
7d747d22 2509 events++;
cb2a1a62
JA
2510 }
2511
7d747d22 2512 return events;
cb2a1a62
JA
2513}
2514
70317a16
AB
2515/*
2516 * Managing input streams
2517 */
2518
2519struct ms_stream {
2520 struct ms_stream *next;
2521 struct trace *first, *last;
73877e12 2522 struct per_dev_info *pdi;
70317a16
AB
2523 unsigned int cpu;
2524};
d0ca268b 2525
70317a16 2526#define MS_HASH(d, c) ((MAJOR(d) & 0xff) ^ (MINOR(d) & 0xff) ^ (cpu & 0xff))
73877e12 2527
70317a16
AB
2528struct ms_stream *ms_head;
2529struct ms_stream *ms_hash[256];
87b72777 2530
70317a16
AB
2531static void ms_sort(struct ms_stream *msp);
2532static int ms_prime(struct ms_stream *msp);
2533
2534static inline struct trace *ms_peek(struct ms_stream *msp)
2535{
2536 return (msp == NULL) ? NULL : msp->first;
2537}
d0ca268b 2538
70317a16
AB
2539static inline __u64 ms_peek_time(struct ms_stream *msp)
2540{
2541 return ms_peek(msp)->bit->time;
2542}
d1d7f15f 2543
70317a16
AB
2544static inline void ms_resort(struct ms_stream *msp)
2545{
2546 if (msp->next && ms_peek_time(msp) > ms_peek_time(msp->next)) {
2547 ms_head = msp->next;
2548 msp->next = NULL;
2549 ms_sort(msp);
2550 }
2551}
e7c9f3ff 2552
70317a16
AB
2553static inline void ms_deq(struct ms_stream *msp)
2554{
2555 msp->first = msp->first->next;
2556 if (!msp->first) {
2557 msp->last = NULL;
2558 if (!ms_prime(msp)) {
2559 ms_head = msp->next;
2560 msp->next = NULL;
2561 return;
d0ca268b 2562 }
d5396421
JA
2563 }
2564
70317a16
AB
2565 ms_resort(msp);
2566}
53c68c88 2567
70317a16
AB
2568static void ms_sort(struct ms_stream *msp)
2569{
2570 __u64 msp_t = ms_peek_time(msp);
2571 struct ms_stream *this_msp = ms_head;
d5396421 2572
70317a16
AB
2573 if (this_msp == NULL)
2574 ms_head = msp;
2575 else if (msp_t < ms_peek_time(this_msp)) {
2576 msp->next = this_msp;
2577 ms_head = msp;
2578 }
2579 else {
2580 while (this_msp->next && ms_peek_time(this_msp->next) < msp_t)
2581 this_msp = this_msp->next;
73877e12 2582
70317a16
AB
2583 msp->next = this_msp->next;
2584 this_msp->next = msp;
2585 }
2586}
d5396421 2587
70317a16
AB
2588static int ms_prime(struct ms_stream *msp)
2589{
2590 __u32 magic;
2591 unsigned int i;
2592 struct trace *t;
2593 struct per_dev_info *pdi = msp->pdi;
2594 struct per_cpu_info *pci = get_cpu_info(pdi, msp->cpu);
2595 struct blk_io_trace *bit = NULL;
2596 int ret, pdu_len, ndone = 0;
d5396421 2597
70317a16
AB
2598 for (i = 0; !is_done() && pci->fd >= 0 && i < rb_batch; i++) {
2599 bit = bit_alloc();
2600 ret = read_data(pci->fd, bit, sizeof(*bit), 1, &pci->fdblock);
2601 if (ret)
2602 goto err;
51128a28 2603
70317a16
AB
2604 if (data_is_native == -1 && check_data_endianness(bit->magic))
2605 goto err;
210824c3 2606
70317a16
AB
2607 magic = get_magic(bit);
2608 if ((magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
2609 fprintf(stderr, "Bad magic %x\n", magic);
2610 goto err;
d5396421 2611
70317a16 2612 }
d5396421 2613
70317a16
AB
2614 pdu_len = get_pdulen(bit);
2615 if (pdu_len) {
2616 void *ptr = realloc(bit, sizeof(*bit) + pdu_len);
2617 ret = read_data(pci->fd, ptr + sizeof(*bit), pdu_len,
2618 1, &pci->fdblock);
2619 if (ret) {
2620 free(ptr);
4eb899a6 2621 bit = NULL;
70317a16 2622 goto err;
7d747d22 2623 }
70317a16
AB
2624
2625 bit = ptr;
2ff323b0 2626 }
d5396421 2627
70317a16
AB
2628 trace_to_cpu(bit);
2629 if (verify_trace(bit))
2630 goto err;
53c68c88 2631
13d48592
TM
2632 if (bit->cpu != pci->cpu) {
2633 fprintf(stderr, "cpu %d trace info has error cpu %d\n",
2634 pci->cpu, bit->cpu);
2635 continue;
2636 }
2637
7238673f 2638 if (bit->action & BLK_TC_ACT(BLK_TC_NOTIFY) && (bit->action & ~__BLK_TN_CGROUP) != BLK_TN_MESSAGE) {
7bd4fd0a 2639 handle_notify(bit);
70317a16
AB
2640 output_binary(bit, sizeof(*bit) + bit->pdu_len);
2641 bit_free(bit);
287fa3d6 2642
70317a16
AB
2643 i -= 1;
2644 continue;
2645 }
cb2a1a62 2646
70317a16
AB
2647 if (bit->time > pdi->last_read_time)
2648 pdi->last_read_time = bit->time;
d5396421 2649
70317a16
AB
2650 t = t_alloc();
2651 memset(t, 0, sizeof(*t));
2652 t->bit = bit;
2653
2654 if (msp->first == NULL)
2655 msp->first = msp->last = t;
2656 else {
2657 msp->last->next = t;
2658 msp->last = t;
2659 }
2660
2661 ndone++;
2662 }
2663
2664 return ndone;
2665
2666err:
2667 if (bit) bit_free(bit);
2668
2669 cpu_mark_offline(pdi, pci->cpu);
2670 close(pci->fd);
2671 pci->fd = -1;
2672
2673 return ndone;
2674}
2675
2676static struct ms_stream *ms_alloc(struct per_dev_info *pdi, int cpu)
2677{
2678 struct ms_stream *msp = malloc(sizeof(*msp));
2679
2680 msp->next = NULL;
2681 msp->first = msp->last = NULL;
2682 msp->pdi = pdi;
2683 msp->cpu = cpu;
2684
2685 if (ms_prime(msp))
2686 ms_sort(msp);
2687
2688 return msp;
2689}
2690
2691static int setup_file(struct per_dev_info *pdi, int cpu)
2692{
2693 int len = 0;
2694 struct stat st;
2695 char *p, *dname;
2696 struct per_cpu_info *pci = get_cpu_info(pdi, cpu);
2697
2698 pci->cpu = cpu;
2699 pci->fdblock = -1;
2700
2701 p = strdup(pdi->name);
2702 dname = dirname(p);
2703 if (strcmp(dname, ".")) {
2704 input_dir = dname;
2705 p = strdup(pdi->name);
2706 strcpy(pdi->name, basename(p));
2707 }
2708 free(p);
2709
2710 if (input_dir)
2711 len = sprintf(pci->fname, "%s/", input_dir);
2712
2713 snprintf(pci->fname + len, sizeof(pci->fname)-1-len,
2714 "%s.blktrace.%d", pdi->name, pci->cpu);
8afe3d7d 2715 if (stat(pci->fname, &st) < 0)
70317a16 2716 return 0;
8afe3d7d
AB
2717 if (!st.st_size)
2718 return 1;
70317a16
AB
2719
2720 pci->fd = open(pci->fname, O_RDONLY);
2721 if (pci->fd < 0) {
2722 perror(pci->fname);
2723 return 0;
2724 }
2725
2726 printf("Input file %s added\n", pci->fname);
2727 cpu_mark_online(pdi, pci->cpu);
2728
2729 pdi->nfiles++;
2730 ms_alloc(pdi, pci->cpu);
2731
2732 return 1;
2733}
2734
2735static int handle(struct ms_stream *msp)
2736{
2737 struct trace *t;
2738 struct per_dev_info *pdi;
2739 struct per_cpu_info *pci;
2740 struct blk_io_trace *bit;
2741
2742 t = ms_peek(msp);
70317a16
AB
2743
2744 bit = t->bit;
2745 pdi = msp->pdi;
2746 pci = get_cpu_info(pdi, msp->cpu);
2747 pci->nelems++;
8091de93 2748 bit->time -= genesis_time;
7072ee3f
LU
2749
2750 if (t->bit->time > stopwatch_end)
2751 return 0;
2752
8091de93 2753 pdi->last_reported_time = bit->time;
7072ee3f
LU
2754 if ((bit->action & (act_mask << BLK_TC_SHIFT))&&
2755 t->bit->time >= stopwatch_start)
70317a16
AB
2756 dump_trace(bit, pci, pdi);
2757
2758 ms_deq(msp);
2759
2760 if (text_output)
2761 trace_rb_insert_last(pdi, t);
2762 else {
2763 bit_free(t->bit);
2764 t_free(t);
2765 }
2766
2767 return 1;
2768}
2769
7d340756
MZ
2770/*
2771 * Check if we need to sanitize the name. We allow 'foo', or if foo.blktrace.X
2772 * is given, then strip back down to 'foo' to avoid missing files.
2773 */
2774static int name_fixup(char *name)
2775{
2776 char *b;
2777
2778 if (!name)
2779 return 1;
2780
2781 b = strstr(name, ".blktrace.");
2782 if (b)
2783 *b = '\0';
2784
2785 return 0;
2786}
2787
70317a16
AB
2788static int do_file(void)
2789{
7d340756 2790 int i, cpu, ret;
70317a16
AB
2791 struct per_dev_info *pdi;
2792
2793 /*
2794 * first prepare all files for reading
2795 */
2796 for (i = 0; i < ndevices; i++) {
2797 pdi = &devices[i];
7d340756
MZ
2798 ret = name_fixup(pdi->name);
2799 if (ret)
2800 return ret;
2801
70317a16
AB
2802 for (cpu = 0; setup_file(pdi, cpu); cpu++)
2803 ;
a2b1f355
ES
2804
2805 if (!cpu) {
2806 fprintf(stderr,"No input files found for %s\n",
2807 pdi->name);
2808 return 1;
2809 }
70317a16
AB
2810 }
2811
8091de93
AB
2812 /*
2813 * Get the initial time stamp
2814 */
2815 if (ms_head)
2816 genesis_time = ms_peek_time(ms_head);
2817
918e9797
HM
2818 /*
2819 * Correct abs_start_time if necessary
2820 */
2821 if (start_timestamp
2822 && start_timestamp != genesis_time) {
2823 correct_abs_start_time();
2824 }
2825
70317a16
AB
2826 /*
2827 * Keep processing traces while any are left
2828 */
2829 while (!is_done() && ms_head && handle(ms_head))
2830 ;
a649216c 2831
7d747d22 2832 return 0;
412819ce 2833}
d5396421 2834
67076cbc 2835static void do_pipe(int fd)
412819ce 2836{
53c68c88 2837 unsigned long long youngest;
67076cbc 2838 int events, fdblock;
d5396421 2839
be925321 2840 last_allowed_time = -1ULL;
c0e0dbc2 2841 fdblock = -1;
db7e0552 2842 while ((events = read_events(fd, 0, &fdblock)) > 0) {
4ab42801 2843 read_sequence++;
412819ce 2844
210824c3 2845#if 0
0b07f23e 2846 smallest_seq_read = -1U;
210824c3 2847#endif
0b07f23e 2848
53c68c88
JA
2849 if (sort_entries(&youngest))
2850 break;
2851
2852 if (youngest > stopwatch_end)
2ff323b0
JA
2853 break;
2854
763d936e 2855 show_entries_rb(0);
0b07f23e 2856 }
d5396421 2857
a649216c
JA
2858 if (rb_sort_entries)
2859 show_entries_rb(1);
67076cbc
JA
2860}
2861
2862static int do_fifo(void)
2863{
2864 int fd;
2865
2866 if (!strcmp(pipename, "-"))
2867 fd = dup(STDIN_FILENO);
2868 else
2869 fd = open(pipename, O_RDONLY);
2870
2871 if (fd == -1) {
2872 perror("dup stdin");
2873 return -1;
2874 }
a649216c 2875
67076cbc 2876 do_pipe(fd);
d5396421 2877 close(fd);
d5396421
JA
2878 return 0;
2879}
d0ca268b 2880
cbc927b6 2881static void show_stats(void)
412819ce 2882{
cbc927b6
JA
2883 if (!ofp)
2884 return;
2885 if (stats_printed)
2886 return;
2887
2888 stats_printed = 1;
2889
2890 if (per_process_stats)
2891 show_process_stats();
2892
2893 if (per_device_and_cpu_stats)
2894 show_device_and_cpu_stats();
dc24c67d 2895 fprintf(ofp, "Trace started at %s\n", ctime(&abs_start_time.tv_sec));
cbc927b6 2896
152f6476 2897 fflush(ofp);
412819ce
JA
2898}
2899
e820abd7 2900static void handle_sigint(__attribute__((__unused__)) int sig)
412819ce
JA
2901{
2902 done = 1;
412819ce
JA
2903}
2904
46e6968b
NS
2905/*
2906 * Extract start and duration times from a string, allowing
2907 * us to specify a time interval of interest within a trace.
2908 * Format: "duration" (start is zero) or "start:duration".
2909 */
2910static int find_stopwatch_interval(char *string)
2911{
2912 double value;
2913 char *sp;
2914
2915 value = strtod(string, &sp);
2916 if (sp == string) {
2917 fprintf(stderr,"Invalid stopwatch timer: %s\n", string);
2918 return 1;
2919 }
2920 if (*sp == ':') {
2921 stopwatch_start = DOUBLE_TO_NANO_ULL(value);
2922 string = sp + 1;
2923 value = strtod(string, &sp);
2924 if (sp == string || *sp != '\0') {
2925 fprintf(stderr,"Invalid stopwatch duration time: %s\n",
2926 string);
2927 return 1;
2928 }
2929 } else if (*sp != '\0') {
2930 fprintf(stderr,"Invalid stopwatch start timer: %s\n", string);
2931 return 1;
2932 }
1b928247
JA
2933 stopwatch_end = DOUBLE_TO_NANO_ULL(value);
2934 if (stopwatch_end <= stopwatch_start) {
2935 fprintf(stderr, "Invalid stopwatch interval: %Lu -> %Lu\n",
2936 stopwatch_start, stopwatch_end);
2937 return 1;
2938 }
2939
46e6968b
NS
2940 return 0;
2941}
2942
67076cbc
JA
2943static int is_pipe(const char *str)
2944{
2945 struct stat st;
2946
2947 if (!strcmp(str, "-"))
2948 return 1;
2949 if (!stat(str, &st) && S_ISFIFO(st.st_mode))
2950 return 1;
2951
2952 return 0;
2953}
2954
a7263b8f
WZ
2955static int get_program_sort_event(const char *str)
2956{
2957 char evt = str[0];
2958
2959 switch (evt) {
2960 case 'N':
2961 per_process_stats_event = SORT_PROG_EVENT_N;
2962 break;
2963 case 'Q':
2964 per_process_stats_event = SORT_PROG_EVENT_QKB;
2965 break;
2966 case 'q':
2967 per_process_stats_event = SORT_PROG_EVENT_QIO;
2968 break;
2969 case 'R':
2970 per_process_stats_event = SORT_PROG_EVENT_RKB;
2971 break;
2972 case 'r':
2973 per_process_stats_event = SORT_PROG_EVENT_RIO;
2974 break;
2975 case 'W':
2976 per_process_stats_event = SORT_PROG_EVENT_WKB;
2977 break;
2978 case 'w':
2979 per_process_stats_event = SORT_PROG_EVENT_WIO;
2980 break;
2981 case 'C':
2982 per_process_stats_event = SORT_PROG_EVENT_CKB;
2983 break;
2984 case 'c':
2985 per_process_stats_event = SORT_PROG_EVENT_CIO;
2986 break;
2987 default:
2988 return 1;
2989 }
2990
2991 return 0;
2992}
2993
2994#define S_OPTS "a:A:b:D:d:f:F:hi:o:OqsS:tw:vVM"
234db09d
AB
2995static char usage_str[] = "\n\n" \
2996 "-i <file> | --input=<file>\n" \
2997 "[ -a <action field> | --act-mask=<action field> ]\n" \
2998 "[ -A <action mask> | --set-mask=<action mask> ]\n" \
2999 "[ -b <traces> | --batch=<traces> ]\n" \
3000 "[ -d <file> | --dump-binary=<file> ]\n" \
3001 "[ -D <dir> | --input-directory=<dir> ]\n" \
3002 "[ -f <format> | --format=<format> ]\n" \
3003 "[ -F <spec> | --format-spec=<spec> ]\n" \
3004 "[ -h | --hash-by-name ]\n" \
3005 "[ -o <file> | --output=<file> ]\n" \
3006 "[ -O | --no-text-output ]\n" \
3007 "[ -q | --quiet ]\n" \
3008 "[ -s | --per-program-stats ]\n" \
a7263b8f 3009 "[ -S <event> | --sort-program-stats=<event> ]\n" \
234db09d
AB
3010 "[ -t | --track-ios ]\n" \
3011 "[ -w <time> | --stopwatch=<time> ]\n" \
19cfaf3f 3012 "[ -M | --no-msgs\n" \
234db09d
AB
3013 "[ -v | --verbose ]\n" \
3014 "[ -V | --version ]\n\n" \
541c9bf6
ES
3015 "\t-a Only trace specified actions. See documentation\n" \
3016 "\t-A Give trace mask as a single value. See documentation\n" \
234db09d
AB
3017 "\t-b stdin read batching\n" \
3018 "\t-d Output file. If specified, binary data is written to file\n" \
d1d7f15f 3019 "\t-D Directory to prepend to input file names\n" \
234db09d
AB
3020 "\t-f Output format. Customize the output format. The format field\n" \
3021 "\t identifies can be found in the documentation\n" \
3022 "\t-F Format specification. Can be found in the documentation\n" \
3023 "\t-h Hash processes by name, not pid\n" \
3024 "\t-i Input file containing trace data, or '-' for stdin\n" \
52724a0e 3025 "\t-o Output file. If not given, output is stdout\n" \
234db09d
AB
3026 "\t-O Do NOT output text data\n" \
3027 "\t-q Quiet. Don't display any stats at the end of the trace\n" \
52724a0e 3028 "\t-s Show per-program io statistics\n" \
a7263b8f
WZ
3029 "\t-S Show per-program io statistics sorted by N/Q/q/R/r/W/w/C/c\n" \
3030 "\t N:Name, Q/q:Queued(read & write), R/r:Queued Read, W/w:Queued Write, C/c:Complete.\n" \
3031 "\t Sort programs by how much data(KB): Q,R,W,C.\n" \
3032 "\t Sort programs by how many IO operations: q,r,w,c.\n" \
3033 "\t if -S was used, the -s parameter will be ignored.\n" \
52724a0e
JA
3034 "\t-t Track individual ios. Will tell you the time a request took\n" \
3035 "\t to get queued, to get dispatched, and to get completed\n" \
52724a0e
JA
3036 "\t-w Only parse data between the given time interval in seconds.\n" \
3037 "\t If 'start' isn't given, blkparse defaults the start time to 0\n" \
19cfaf3f 3038 "\t-M Do not output messages to binary file\n" \
57ea8602
JA
3039 "\t-v More verbose for marginal errors\n" \
3040 "\t-V Print program version info\n\n";
52724a0e 3041
1f79c4a0
JA
3042static void usage(char *prog)
3043{
bc14c53f 3044 fprintf(stderr, "Usage: %s %s", prog, usage_str);
1f79c4a0
JA
3045}
3046
d5396421
JA
3047int main(int argc, char *argv[])
3048{
98f8386b 3049 int i, c, ret, mode;
98f8386b 3050 int act_mask_tmp = 0;
234db09d 3051 char *ofp_buffer = NULL;
346d8a74 3052 char *bin_ofp_buffer = NULL;
d5396421
JA
3053
3054 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) != -1) {
3055 switch (c) {
98f8386b
AB
3056 case 'a':
3057 i = find_mask_map(optarg);
3058 if (i < 0) {
3059 fprintf(stderr,"Invalid action mask %s\n",
3060 optarg);
3061 return 1;
3062 }
3063 act_mask_tmp |= i;
3064 break;
3065
3066 case 'A':
3067 if ((sscanf(optarg, "%x", &i) != 1) ||
3068 !valid_act_opt(i)) {
3069 fprintf(stderr,
3070 "Invalid set action mask %s/0x%x\n",
3071 optarg, i);
3072 return 1;
3073 }
3074 act_mask_tmp = i;
3075 break;
d5396421 3076 case 'i':
67076cbc 3077 if (is_pipe(optarg) && !pipeline) {
e7c9f3ff 3078 pipeline = 1;
67076cbc
JA
3079 pipename = strdup(optarg);
3080 } else if (resize_devices(optarg) != 0)
e7c9f3ff 3081 return 1;
d5396421 3082 break;
d1d7f15f
JA
3083 case 'D':
3084 input_dir = optarg;
3085 break;
d5396421 3086 case 'o':
66efebf8 3087 output_name = optarg;
d5396421 3088 break;
234db09d
AB
3089 case 'O':
3090 text_output = 0;
3091 break;
79f19470
JA
3092 case 'b':
3093 rb_batch = atoi(optarg);
3094 if (rb_batch <= 0)
3095 rb_batch = RB_BATCH_DEFAULT;
3096 break;
152f6476
JA
3097 case 's':
3098 per_process_stats = 1;
3099 break;
a7263b8f
WZ
3100 case 'S':
3101 per_process_stats = 1;
3102 if (get_program_sort_event(optarg))
3103 return 1;
3104 break;
7997c5b0
JA
3105 case 't':
3106 track_ios = 1;
3107 break;
1e1c60f1
NS
3108 case 'q':
3109 per_device_and_cpu_stats = 0;
3110 break;
46e6968b
NS
3111 case 'w':
3112 if (find_stopwatch_interval(optarg) != 0)
3113 return 1;
3114 break;
ab197ca7
AB
3115 case 'f':
3116 set_all_format_specs(optarg);
3117 break;
3118 case 'F':
3119 if (add_format_spec(optarg) != 0)
3120 return 1;
3121 break;
d915dee6 3122 case 'h':
715d8021 3123 ppi_hash_by_pid = 0;
bf0720af 3124 break;
52724a0e 3125 case 'v':
57ea8602
JA
3126 verbose++;
3127 break;
3128 case 'V':
52724a0e
JA
3129 printf("%s version %s\n", argv[0], blkparse_version);
3130 return 0;
a2594911
AB
3131 case 'd':
3132 dump_binary = optarg;
3133 break;
19cfaf3f
AB
3134 case 'M':
3135 bin_output_msgs = 0;
3136 break;
d5396421 3137 default:
1f79c4a0 3138 usage(argv[0]);
d5396421
JA
3139 return 1;
3140 }
d0ca268b
JA
3141 }
3142
e7c9f3ff 3143 while (optind < argc) {
67076cbc 3144 if (is_pipe(argv[optind]) && !pipeline) {
e7c9f3ff 3145 pipeline = 1;
00cd3044 3146 pipename = strdup(argv[optind]);
67076cbc 3147 } else if (resize_devices(argv[optind]) != 0)
e7c9f3ff
NS
3148 return 1;
3149 optind++;
3150 }
3151
3152 if (!pipeline && !ndevices) {
1f79c4a0 3153 usage(argv[0]);
d5396421
JA
3154 return 1;
3155 }
3156
98f8386b
AB
3157 if (act_mask_tmp != 0)
3158 act_mask = act_mask_tmp;
3159
7997c5b0 3160 memset(&rb_sort_root, 0, sizeof(rb_sort_root));
412819ce
JA
3161
3162 signal(SIGINT, handle_sigint);
3163 signal(SIGHUP, handle_sigint);
3164 signal(SIGTERM, handle_sigint);
d5396421 3165
d69db225
JA
3166 setlocale(LC_NUMERIC, "en_US");
3167
234db09d
AB
3168 if (text_output) {
3169 if (!output_name) {
3170 ofp = fdopen(STDOUT_FILENO, "w");
3171 mode = _IOLBF;
3172 } else {
c3ce73f5 3173 char ofname[PATH_MAX];
152f6476 3174
234db09d
AB
3175 snprintf(ofname, sizeof(ofname) - 1, "%s", output_name);
3176 ofp = fopen(ofname, "w");
3177 mode = _IOFBF;
3178 }
152f6476 3179
234db09d
AB
3180 if (!ofp) {
3181 perror("fopen");
3182 return 1;
3183 }
152f6476 3184
234db09d
AB
3185 ofp_buffer = malloc(4096);
3186 if (setvbuf(ofp, ofp_buffer, mode, 4096)) {
3187 perror("setvbuf");
3188 return 1;
3189 }
152f6476
JA
3190 }
3191
a2594911 3192 if (dump_binary) {
cf659442
JA
3193 if (!strcmp(dump_binary, "-"))
3194 dump_fp = stdout;
3195 else {
3196 dump_fp = fopen(dump_binary, "w");
3197 if (!dump_fp) {
3198 perror(dump_binary);
3199 dump_binary = NULL;
3200 return 1;
3201 }
a2594911 3202 }
346d8a74
AB
3203 bin_ofp_buffer = malloc(128 * 1024);
3204 if (setvbuf(dump_fp, bin_ofp_buffer, _IOFBF, 128 * 1024)) {
3205 perror("setvbuf binary");
3206 return 1;
3207 }
a2594911
AB
3208 }
3209
e7c9f3ff 3210 if (pipeline)
67076cbc 3211 ret = do_fifo();
d5396421
JA
3212 else
3213 ret = do_file();
3214
fb863d7c
MZ
3215 if (!ret)
3216 show_stats();
3217
c701176c
MP
3218 if (have_drv_data && !dump_binary)
3219 printf("\ndiscarded traces containing low-level device driver "
3220 "specific data (only available in binary output)\n");
3221
8091de93
AB
3222 if (ofp_buffer) {
3223 fflush(ofp);
234db09d 3224 free(ofp_buffer);
8091de93
AB
3225 }
3226 if (bin_ofp_buffer) {
3227 fflush(dump_fp);
346d8a74 3228 free(bin_ofp_buffer);
8091de93 3229 }
d5396421 3230 return ret;
d0ca268b 3231}