[PATCH] blkparse: more PC fixes
[blktrace.git] / blkparse.c
CommitLineData
d956a2cd
JA
1/*
2 * block queue tracing parse application
3 *
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
20 */
d0ca268b
JA
21#include <sys/types.h>
22#include <sys/stat.h>
23#include <unistd.h>
24#include <stdio.h>
25#include <fcntl.h>
26#include <stdlib.h>
8fc0abbc 27#include <string.h>
d5396421 28#include <getopt.h>
412819ce
JA
29#include <errno.h>
30#include <signal.h>
d69db225 31#include <locale.h>
46e6968b 32#include <limits.h>
ab197ca7 33#include <ctype.h>
d0ca268b 34
8fc0abbc
JA
35#include "blktrace.h"
36#include "rbtree.h"
d0ca268b 37
2e3e8ded
JA
38#define SECONDS(x) ((unsigned long long)(x) / 1000000000)
39#define NANO_SECONDS(x) ((unsigned long long)(x) % 1000000000)
46e6968b 40#define DOUBLE_TO_NANO_ULL(d) ((unsigned long long)((d) * 1000000000))
cfab07eb 41
e7c9f3ff
NS
42#define MINORBITS 20
43#define MINORMASK ((1U << MINORBITS) - 1)
44#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS))
45#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK))
46
47#define min(a, b) ((a) < (b) ? (a) : (b))
d5396421 48
ab197ca7
AB
49#define HEADER "%D %2c %8s %5T.%9t %5p %2a %3d "
50
152f6476
JA
51struct io_stats {
52 unsigned long qreads, qwrites, creads, cwrites, mreads, mwrites;
53 unsigned long ireads, iwrites;
54 unsigned long long qread_kb, qwrite_kb, cread_kb, cwrite_kb;
55 unsigned long long iread_kb, iwrite_kb;
06639b27 56 unsigned long io_unplugs, timer_unplugs;
152f6476
JA
57};
58
d5396421 59struct per_cpu_info {
d0ca268b
JA
60 int cpu;
61 int nelems;
d0ca268b
JA
62
63 int fd;
87b72777 64 char fname[128];
d0ca268b 65
152f6476
JA
66 struct io_stats io_stats;
67};
8fc0abbc 68
e7c9f3ff
NS
69struct per_dev_info {
70 dev_t id;
71 char *name;
72
73 int backwards;
74 unsigned long long events;
75 unsigned long long last_reported_time;
76 struct io_stats io_stats;
cb2a1a62 77 unsigned long last_sequence;
0aa8a62d 78 unsigned long skips;
e7c9f3ff
NS
79
80 int ncpus;
81 struct per_cpu_info *cpus;
82};
83
152f6476
JA
84struct per_process_info {
85 char name[16];
86 __u32 pid;
87 struct io_stats io_stats;
88 struct per_process_info *hash_next, *list_next;
50adc0ba
JA
89
90 /*
91 * individual io stats
92 */
b9d40d6f
JA
93 unsigned long long longest_allocation_wait[2];
94 unsigned long long longest_dispatch_wait[2];
95 unsigned long long longest_completion_wait[2];
d0ca268b
JA
96};
97
152f6476
JA
98#define PPI_HASH_SHIFT (8)
99static struct per_process_info *ppi_hash[1 << PPI_HASH_SHIFT];
100static struct per_process_info *ppi_list;
886ecf0e 101static int ppi_list_entries;
152f6476 102
ab197ca7 103#define S_OPTS "i:o:b:stqw:f:F:"
d5396421
JA
104static struct option l_opts[] = {
105 {
106 .name = "input",
428683db 107 .has_arg = required_argument,
d5396421
JA
108 .flag = NULL,
109 .val = 'i'
110 },
111 {
112 .name = "output",
428683db 113 .has_arg = required_argument,
d5396421
JA
114 .flag = NULL,
115 .val = 'o'
116 },
79f19470
JA
117 {
118 .name = "batch",
428683db 119 .has_arg = required_argument,
79f19470
JA
120 .flag = NULL,
121 .val = 'b'
122 },
152f6476
JA
123 {
124 .name = "per program stats",
428683db 125 .has_arg = no_argument,
152f6476
JA
126 .flag = NULL,
127 .val = 's'
128 },
7997c5b0
JA
129 {
130 .name = "track ios",
428683db 131 .has_arg = no_argument,
7997c5b0
JA
132 .flag = NULL,
133 .val = 't'
134 },
1e1c60f1
NS
135 {
136 .name = "quiet",
428683db 137 .has_arg = no_argument,
1e1c60f1
NS
138 .flag = NULL,
139 .val = 'q'
140 },
46e6968b
NS
141 {
142 .name = "stopwatch",
428683db 143 .has_arg = required_argument,
46e6968b
NS
144 .flag = NULL,
145 .val = 'w'
146 },
ab197ca7
AB
147 {
148 .name = "format",
428683db 149 .has_arg = required_argument,
ab197ca7
AB
150 .flag = NULL,
151 .val = 'f'
152 },
153 {
154 .name = "format-spec",
428683db 155 .has_arg = required_argument,
ab197ca7
AB
156 .flag = NULL,
157 .val = 'F'
158 },
d5396421
JA
159};
160
7997c5b0
JA
161/*
162 * for sorting the displayed output
163 */
8fc0abbc
JA
164struct trace {
165 struct blk_io_trace *bit;
166 struct rb_node rb_node;
cb2a1a62
JA
167 struct trace *next;
168 int skipped;
8fc0abbc
JA
169};
170
cb2a1a62
JA
171static struct rb_root rb_sort_root;
172static struct rb_root rb_track_root;
173
174static struct trace *trace_list;
175
7997c5b0
JA
176/*
177 * for tracking individual ios
178 */
179struct io_track {
180 struct rb_node rb_node;
181
e7c9f3ff 182 dev_t device;
7997c5b0
JA
183 __u64 sector;
184 __u32 pid;
95c15013 185 unsigned long long allocation_time;
7997c5b0
JA
186 unsigned long long queue_time;
187 unsigned long long dispatch_time;
188 unsigned long long completion_time;
189};
190
e7c9f3ff
NS
191static int ndevices;
192static struct per_dev_info *devices;
193static char *get_dev_name(struct per_dev_info *, char *, int);
d0ca268b 194
152f6476 195static FILE *ofp;
e7c9f3ff
NS
196static char *output_name;
197
198static unsigned long long genesis_time;
46e6968b
NS
199static unsigned long long stopwatch_start; /* start from zero by default */
200static unsigned long long stopwatch_end = ULONG_LONG_MAX; /* "infinity" */
152f6476
JA
201
202static int per_process_stats;
7997c5b0 203static int track_ios;
d0ca268b 204
79f19470
JA
205#define RB_BATCH_DEFAULT (1024)
206static int rb_batch = RB_BATCH_DEFAULT;
207
e7c9f3ff
NS
208static int pipeline;
209
412819ce
JA
210#define is_done() (*(volatile int *)(&done))
211static volatile int done;
212
152f6476
JA
213static inline unsigned long hash_long(unsigned long val)
214{
16ef714e
JA
215#if __WORDSIZE == 32
216 val *= 0x9e370001UL;
217#elif __WORDSIZE == 64
218 val *= 0x9e37fffffffc0001UL;
219#else
220#error unknown word size
221#endif
222
223 return val >> (__WORDSIZE - PPI_HASH_SHIFT);
152f6476
JA
224}
225
226static inline void add_process_to_hash(struct per_process_info *ppi)
227{
228 const int hash_idx = hash_long(ppi->pid);
229
230 ppi->hash_next = ppi_hash[hash_idx];
231 ppi_hash[hash_idx] = ppi;
232}
233
234static inline void add_process_to_list(struct per_process_info *ppi)
235{
236 ppi->list_next = ppi_list;
237 ppi_list = ppi;
886ecf0e 238 ppi_list_entries++;
152f6476
JA
239}
240
241static struct per_process_info *find_process_by_pid(__u32 pid)
242{
243 const int hash_idx = hash_long(pid);
244 struct per_process_info *ppi;
245
246 ppi = ppi_hash[hash_idx];
247 while (ppi) {
248 if (ppi->pid == pid)
249 return ppi;
250
251 ppi = ppi->hash_next;
252 }
253
254 return NULL;
255}
256
7997c5b0
JA
257static inline int trace_rb_insert(struct trace *t)
258{
259 struct rb_node **p = &rb_sort_root.rb_node;
260 struct rb_node *parent = NULL;
261 struct trace *__t;
262
e7c9f3ff
NS
263 if (genesis_time == 0 || t->bit->time < genesis_time)
264 genesis_time = t->bit->time;
265
7997c5b0
JA
266 while (*p) {
267 parent = *p;
268 __t = rb_entry(parent, struct trace, rb_node);
269
e7c9f3ff
NS
270 if (t->bit->time < __t->bit->time)
271 p = &(*p)->rb_left;
272 else if (t->bit->time > __t->bit->time)
273 p = &(*p)->rb_right;
274 else if (t->bit->device < __t->bit->device)
275 p = &(*p)->rb_left;
276 else if (t->bit->device > __t->bit->device)
277 p = &(*p)->rb_right;
278 else if (t->bit->sequence < __t->bit->sequence)
7997c5b0
JA
279 p = &(*p)->rb_left;
280 else if (t->bit->sequence > __t->bit->sequence)
281 p = &(*p)->rb_right;
e7c9f3ff
NS
282 else if (t->bit->device == __t->bit->device) {
283 fprintf(stderr,
284 "sequence alias (%d) on device %d,%d!\n",
285 t->bit->sequence,
286 MAJOR(t->bit->device), MINOR(t->bit->device));
7997c5b0
JA
287 return 1;
288 }
289 }
290
291 rb_link_node(&t->rb_node, parent, p);
292 rb_insert_color(&t->rb_node, &rb_sort_root);
293 return 0;
294}
295
296static inline int track_rb_insert(struct io_track *iot)
297{
298 struct rb_node **p = &rb_track_root.rb_node;
299 struct rb_node *parent = NULL;
300 struct io_track *__iot;
301
302 while (*p) {
303 parent = *p;
e7c9f3ff 304
7997c5b0
JA
305 __iot = rb_entry(parent, struct io_track, rb_node);
306
e7c9f3ff
NS
307 if (iot->device < __iot->device)
308 p = &(*p)->rb_left;
309 else if (iot->device > __iot->device)
310 p = &(*p)->rb_right;
311 else if (iot->sector < __iot->sector)
7997c5b0
JA
312 p = &(*p)->rb_left;
313 else if (iot->sector > __iot->sector)
314 p = &(*p)->rb_right;
315 else {
e7c9f3ff 316 fprintf(stderr,
ab197ca7
AB
317 "sector alias (%Lu) on device %d,%d!\n",
318 (unsigned long long) iot->sector,
e7c9f3ff 319 MAJOR(iot->device), MINOR(iot->device));
7997c5b0
JA
320 return 1;
321 }
322 }
323
324 rb_link_node(&iot->rb_node, parent, p);
325 rb_insert_color(&iot->rb_node, &rb_track_root);
326 return 0;
327}
328
e7c9f3ff 329static struct io_track *__find_track(dev_t device, __u64 sector)
7997c5b0
JA
330{
331 struct rb_node **p = &rb_track_root.rb_node;
332 struct rb_node *parent = NULL;
333 struct io_track *__iot;
334
335 while (*p) {
336 parent = *p;
337
338 __iot = rb_entry(parent, struct io_track, rb_node);
339
e7c9f3ff
NS
340 if (device < __iot->device)
341 p = &(*p)->rb_left;
342 else if (device > __iot->device)
343 p = &(*p)->rb_right;
344 else if (sector < __iot->sector)
7997c5b0
JA
345 p = &(*p)->rb_left;
346 else if (sector > __iot->sector)
347 p = &(*p)->rb_right;
348 else
349 return __iot;
350 }
351
352 return NULL;
353}
354
e7c9f3ff 355static struct io_track *find_track(__u32 pid, dev_t device, __u64 sector)
7997c5b0 356{
916b5501 357 struct io_track *iot;
7997c5b0 358
e7c9f3ff 359 iot = __find_track(device, sector);
7997c5b0
JA
360 if (!iot) {
361 iot = malloc(sizeof(*iot));
50adc0ba 362 iot->pid = pid;
e7c9f3ff 363 iot->device = device;
7997c5b0
JA
364 iot->sector = sector;
365 track_rb_insert(iot);
366 }
367
368 return iot;
369}
370
a01516de 371static void log_track_frontmerge(struct blk_io_trace *t)
2e3e8ded
JA
372{
373 struct io_track *iot;
374
375 if (!track_ios)
376 return;
2e3e8ded 377
a01516de 378 iot = __find_track(t->device, t->sector + (t->bytes >> 9));
cb2a1a62
JA
379 if (!iot) {
380 fprintf(stderr, "failed to find mergeable event\n");
381 return;
2e3e8ded 382 }
cb2a1a62
JA
383
384 rb_erase(&iot->rb_node, &rb_track_root);
a01516de 385 iot->sector -= t->bytes >> 9;
cb2a1a62 386 track_rb_insert(iot);
2e3e8ded
JA
387}
388
95c15013 389static void log_track_getrq(struct blk_io_trace *t)
2e3e8ded
JA
390{
391 struct io_track *iot;
392
393 if (!track_ios)
394 return;
395
e7c9f3ff 396 iot = find_track(t->pid, t->device, t->sector);
95c15013
JA
397 iot->allocation_time = t->time;
398}
399
400
401/*
b6076a9b 402 * return time between rq allocation and insertion
95c15013 403 */
b6076a9b 404static unsigned long long log_track_insert(struct blk_io_trace *t)
95c15013 405{
50adc0ba 406 unsigned long long elapsed;
95c15013
JA
407 struct io_track *iot;
408
409 if (!track_ios)
410 return -1;
411
e7c9f3ff 412 iot = find_track(t->pid, t->device, t->sector);
2e3e8ded 413 iot->queue_time = t->time;
50adc0ba
JA
414 elapsed = iot->queue_time - iot->allocation_time;
415
416 if (per_process_stats) {
417 struct per_process_info *ppi = find_process_by_pid(iot->pid);
b9d40d6f 418 int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
50adc0ba 419
b9d40d6f
JA
420 if (ppi && elapsed > ppi->longest_allocation_wait[w])
421 ppi->longest_allocation_wait[w] = elapsed;
50adc0ba
JA
422 }
423
424 return elapsed;
2e3e8ded
JA
425}
426
427/*
428 * return time between queue and issue
429 */
430static unsigned long long log_track_issue(struct blk_io_trace *t)
431{
50adc0ba 432 unsigned long long elapsed;
2e3e8ded
JA
433 struct io_track *iot;
434
435 if (!track_ios)
436 return -1;
437 if ((t->action & BLK_TC_ACT(BLK_TC_FS)) == 0)
438 return -1;
439
e7c9f3ff 440 iot = __find_track(t->device, t->sector);
cb2a1a62
JA
441 if (!iot) {
442 fprintf(stderr, "failed to find issue event\n");
2e3e8ded 443 return -1;
cb2a1a62 444 }
2e3e8ded
JA
445
446 iot->dispatch_time = t->time;
50adc0ba
JA
447 elapsed = iot->dispatch_time - iot->queue_time;
448
449 if (per_process_stats) {
450 struct per_process_info *ppi = find_process_by_pid(iot->pid);
b9d40d6f 451 int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
50adc0ba 452
b9d40d6f
JA
453 if (ppi && elapsed > ppi->longest_dispatch_wait[w])
454 ppi->longest_dispatch_wait[w] = elapsed;
50adc0ba
JA
455 }
456
457 return elapsed;
2e3e8ded
JA
458}
459
460/*
461 * return time between dispatch and complete
462 */
463static unsigned long long log_track_complete(struct blk_io_trace *t)
464{
465 unsigned long long elapsed;
466 struct io_track *iot;
467
468 if (!track_ios)
469 return -1;
470 if ((t->action & BLK_TC_ACT(BLK_TC_FS)) == 0)
471 return -1;
472
e7c9f3ff 473 iot = __find_track(t->device, t->sector);
cb2a1a62
JA
474 if (!iot) {
475 fprintf(stderr, "failed to find complete event\n");
2e3e8ded 476 return -1;
cb2a1a62 477 }
2e3e8ded
JA
478
479 iot->completion_time = t->time;
480 elapsed = iot->completion_time - iot->dispatch_time;
481
50adc0ba
JA
482 if (per_process_stats) {
483 struct per_process_info *ppi = find_process_by_pid(iot->pid);
b9d40d6f 484 int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
50adc0ba 485
b9d40d6f
JA
486 if (ppi && elapsed > ppi->longest_completion_wait[w])
487 ppi->longest_completion_wait[w] = elapsed;
50adc0ba
JA
488 }
489
2e3e8ded
JA
490 /*
491 * kill the trace, we don't need it after completion
492 */
493 rb_erase(&iot->rb_node, &rb_track_root);
494 free(iot);
495
496 return elapsed;
497}
498
499
152f6476
JA
500static struct io_stats *find_process_io_stats(__u32 pid, char *name)
501{
502 struct per_process_info *ppi = find_process_by_pid(pid);
503
504 if (!ppi) {
505 ppi = malloc(sizeof(*ppi));
506 memset(ppi, 0, sizeof(*ppi));
507 strncpy(ppi->name, name, sizeof(ppi->name));
508 ppi->pid = pid;
509 add_process_to_hash(ppi);
510 add_process_to_list(ppi);
511 }
512
513 return &ppi->io_stats;
514}
515
e7c9f3ff
NS
516
517static void resize_cpu_info(struct per_dev_info *pdi, int cpu)
a718bd37 518{
e7c9f3ff
NS
519 struct per_cpu_info *cpus = pdi->cpus;
520 int ncpus = pdi->ncpus;
521 int new_count = cpu + 1;
522 int new_space, size;
a718bd37
NS
523 char *new_start;
524
e7c9f3ff
NS
525 size = new_count * sizeof(struct per_cpu_info);
526 cpus = realloc(cpus, size);
527 if (!cpus) {
528 char name[20];
529 fprintf(stderr, "Out of memory, CPU info for device %s (%d)\n",
530 get_dev_name(pdi, name, sizeof(name)), size);
a718bd37
NS
531 exit(1);
532 }
533
e7c9f3ff
NS
534 new_start = (char *)cpus + (ncpus * sizeof(struct per_cpu_info));
535 new_space = (new_count - ncpus) * sizeof(struct per_cpu_info);
a718bd37 536 memset(new_start, 0, new_space);
e7c9f3ff
NS
537
538 pdi->ncpus = new_count;
539 pdi->cpus = cpus;
540}
cb2a1a62 541
e7c9f3ff
NS
542static struct per_cpu_info *get_cpu_info(struct per_dev_info *pdi, int cpu)
543{
cb2a1a62
JA
544 struct per_cpu_info *pci;
545
e7c9f3ff
NS
546 if (cpu >= pdi->ncpus)
547 resize_cpu_info(pdi, cpu);
cb2a1a62
JA
548
549 pci = &pdi->cpus[cpu];
550 pci->cpu = cpu;
551 return pci;
a718bd37
NS
552}
553
e7c9f3ff
NS
554
555static int resize_devices(char *name)
a718bd37 556{
e7c9f3ff 557 int size = (ndevices + 1) * sizeof(struct per_dev_info);
c499bf38 558
e7c9f3ff
NS
559 devices = realloc(devices, size);
560 if (!devices) {
561 fprintf(stderr, "Out of memory, device %s (%d)\n", name, size);
562 return 1;
563 }
564 memset(&devices[ndevices], 0, sizeof(struct per_dev_info));
565 devices[ndevices].name = name;
566 ndevices++;
567 return 0;
568}
a718bd37 569
cb2a1a62 570static struct per_dev_info *get_dev_info(dev_t id)
e7c9f3ff 571{
cb2a1a62 572 struct per_dev_info *pdi;
e7c9f3ff 573 int i;
c499bf38 574
e7c9f3ff
NS
575 for (i = 0; i < ndevices; i++)
576 if (devices[i].id == id)
577 return &devices[i];
cb2a1a62 578
e7c9f3ff
NS
579 if (resize_devices(NULL) != 0)
580 return NULL;
cb2a1a62
JA
581
582 pdi = &devices[ndevices - 1];
583 pdi->id = id;
584 return pdi;
a718bd37
NS
585}
586
e7c9f3ff
NS
587static char *get_dev_name(struct per_dev_info *pdi, char *buffer, int size)
588{
589 if (pdi->name)
590 snprintf(buffer, size, "%s", pdi->name);
591 else
592 snprintf(buffer, size, "%d,%d", MAJOR(pdi->id), MINOR(pdi->id));
593 return buffer;
594}
595
e7c9f3ff 596static void check_time(struct per_dev_info *pdi, struct blk_io_trace *bit)
cfab07eb
AB
597{
598 unsigned long long this = bit->time;
e7c9f3ff 599 unsigned long long last = pdi->last_reported_time;
cfab07eb 600
e7c9f3ff
NS
601 pdi->backwards = (this < last) ? 'B' : ' ';
602 pdi->last_reported_time = this;
cfab07eb
AB
603}
604
152f6476
JA
605static inline void __account_m(struct io_stats *ios, struct blk_io_trace *t,
606 int rw)
d0ca268b
JA
607{
608 if (rw) {
152f6476
JA
609 ios->mwrites++;
610 ios->qwrite_kb += t->bytes >> 10;
d0ca268b 611 } else {
152f6476
JA
612 ios->mreads++;
613 ios->qread_kb += t->bytes >> 10;
614 }
615}
616
617static inline void account_m(struct blk_io_trace *t, struct per_cpu_info *pci,
618 int rw)
619{
620 __account_m(&pci->io_stats, t, rw);
621
622 if (per_process_stats) {
623 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
624
625 __account_m(ios, t, rw);
d0ca268b
JA
626 }
627}
628
b6076a9b
JA
629static inline void __account_queue(struct io_stats *ios, struct blk_io_trace *t,
630 int rw)
d0ca268b
JA
631{
632 if (rw) {
152f6476
JA
633 ios->qwrites++;
634 ios->qwrite_kb += t->bytes >> 10;
d0ca268b 635 } else {
152f6476
JA
636 ios->qreads++;
637 ios->qread_kb += t->bytes >> 10;
638 }
639}
640
b6076a9b
JA
641static inline void account_queue(struct blk_io_trace *t,
642 struct per_cpu_info *pci, int rw)
152f6476 643{
b6076a9b 644 __account_queue(&pci->io_stats, t, rw);
152f6476
JA
645
646 if (per_process_stats) {
647 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
648
b6076a9b 649 __account_queue(ios, t, rw);
d0ca268b
JA
650 }
651}
652
152f6476 653static inline void __account_c(struct io_stats *ios, int rw, unsigned int bytes)
d0ca268b
JA
654{
655 if (rw) {
152f6476
JA
656 ios->cwrites++;
657 ios->cwrite_kb += bytes >> 10;
d0ca268b 658 } else {
152f6476
JA
659 ios->creads++;
660 ios->cread_kb += bytes >> 10;
661 }
662}
663
664static inline void account_c(struct blk_io_trace *t, struct per_cpu_info *pci,
665 int rw, int bytes)
666{
667 __account_c(&pci->io_stats, rw, bytes);
668
669 if (per_process_stats) {
670 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
671
672 __account_c(ios, rw, bytes);
d0ca268b
JA
673 }
674}
675
b6076a9b
JA
676static inline void __account_issue(struct io_stats *ios, int rw,
677 unsigned int bytes)
afd2d7ad 678{
679 if (rw) {
152f6476
JA
680 ios->iwrites++;
681 ios->iwrite_kb += bytes >> 10;
afd2d7ad 682 } else {
152f6476
JA
683 ios->ireads++;
684 ios->iread_kb += bytes >> 10;
afd2d7ad 685 }
686}
687
b6076a9b
JA
688static inline void account_issue(struct blk_io_trace *t,
689 struct per_cpu_info *pci, int rw)
d0ca268b 690{
b6076a9b 691 __account_issue(&pci->io_stats, rw, t->bytes);
152f6476
JA
692
693 if (per_process_stats) {
694 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
d5396421 695
b6076a9b 696 __account_issue(ios, rw, t->bytes);
152f6476
JA
697 }
698}
699
06639b27
JA
700static inline void __account_unplug(struct io_stats *ios, int timer)
701{
702 if (timer)
703 ios->timer_unplugs++;
704 else
705 ios->io_unplugs++;
706}
707
708static inline void account_unplug(struct blk_io_trace *t,
709 struct per_cpu_info *pci, int timer)
710{
711 __account_unplug(&pci->io_stats, timer);
712
713 if (per_process_stats) {
714 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
715
716 __account_unplug(ios, timer);
717 }
718}
719
ab197ca7
AB
720#define VALID_SPECS "BCDFGMPQRSTU"
721static char *override_format[256];
722static inline int valid_spec(int spec)
152f6476 723{
ab197ca7 724 return strchr(VALID_SPECS, spec) != NULL;
d0ca268b
JA
725}
726
ab197ca7 727static void set_all_format_specs(char *optarg)
d0ca268b 728{
ab197ca7 729 char *p;
d0ca268b 730
ab197ca7
AB
731 for (p = VALID_SPECS; *p; p++)
732 if (override_format[(int)(*p)] == NULL)
733 override_format[(int)(*p)] = strdup(optarg);
734}
d0ca268b 735
ab197ca7
AB
736static int add_format_spec(char *optarg)
737{
738 int spec = optarg[0];
d0ca268b 739
ab197ca7
AB
740 if (!valid_spec(spec)) {
741 fprintf(stderr,"Bad format specifier %c\n", spec);
742 return 1;
743 }
744 if (optarg[1] != ',') {
745 fprintf(stderr,"Bad format specifier - need ',' %s\n", optarg);
746 return 1;
747 }
748 optarg += 2;
749 if (*optarg == '\0') {
750 fprintf(stderr,"Bad format specifier - need fmt %s\n", optarg);
751 return 1;
752 }
753
754 /*
755 * Set both merges (front and back)
756 */
757 if (spec == 'M') {
758 override_format['B'] = strdup(optarg);
759 override_format['M'] = strdup(optarg);
760 } else
761 override_format[spec] = strdup(optarg);
d0ca268b 762
ab197ca7 763 return 0;
d0ca268b
JA
764}
765
ab197ca7
AB
766static void print_field(char *act, struct per_cpu_info *pci,
767 struct blk_io_trace *t, unsigned long long elapsed,
768 int pdu_len, unsigned char *pdu_buf, char field,
769 int minus, int has_w, int width)
d0ca268b 770{
ab197ca7 771 char format[64];
2e3e8ded 772
ab197ca7
AB
773 if (has_w) {
774 if (minus)
775 sprintf(format, "%%-%d", width);
776 else
777 sprintf(format, "%%%d", width);
778 } else
779 sprintf(format, "%%");
780
781 switch (field) {
782 case 'a':
783 fprintf(ofp, strcat(format, "s"), act);
784 break;
785 case 'c':
786 fprintf(ofp, strcat(format, "d"), pci->cpu);
787 break;
788 case 'C':
789 fprintf(ofp, strcat(format, "s"), t->comm);
790 break;
791 case 'd': {
792 char rwbs[4];
793 int i = 0;
794 int w = t->action & BLK_TC_ACT(BLK_TC_WRITE);
795 int b = t->action & BLK_TC_ACT(BLK_TC_BARRIER);
796 int s = t->action & BLK_TC_ACT(BLK_TC_SYNC);
797 if (w)
798 rwbs[i++] = 'W';
799 else
800 rwbs[i++] = 'R';
801 if (b)
802 rwbs[i++] = 'B';
803 if (s)
804 rwbs[i++] = 'S';
805 rwbs[i] = '\0';
806 fprintf(ofp, strcat(format, "s"), rwbs);
807 break;
808 }
809 case 'D': /* format width ignored */
810 fprintf(ofp,"%3d,%-3d", MAJOR(t->device), MINOR(t->device));
811 break;
812 case 'e':
813 fprintf(ofp, strcat(format, "d"), t->error);
814 break;
815 case 'M':
816 fprintf(ofp, strcat(format, "d"), MAJOR(t->device));
817 break;
818 case 'm':
819 fprintf(ofp, strcat(format, "d"), MINOR(t->device));
820 break;
821 case 'n':
822 fprintf(ofp, strcat(format, "u"), t->bytes >> 9);
823 break;
824 case 'p':
825 fprintf(ofp, strcat(format, "u"), t->pid);
826 break;
827 case 'P': /* format width ignored */
828 if ((pdu_len > 0) && (pdu_buf != NULL)) {
829 int i;
830 unsigned char *p = pdu_buf;
182f2fcc
JA
831 for (i = 0; i < pdu_len; i++) {
832 if (i)
833 fprintf(ofp, " ");
834
835 fprintf(ofp, "%02x", *p++);
836 }
ab197ca7
AB
837 }
838 break;
839 case 's':
840 fprintf(ofp, strcat(format, "ld"), t->sequence);
841 break;
842 case 'S':
843 fprintf(ofp, strcat(format, "lu"), t->sector);
844 break;
845 case 't':
846 sprintf(format, "%%0%dlu", has_w ? width : 9);
847 fprintf(ofp, format, NANO_SECONDS(t->time));
848 break;
849 case 'T':
850 fprintf(ofp, strcat(format, "d"), SECONDS(t->time));
851 break;
852 case 'u':
853 if (elapsed == -1ULL) {
854 fprintf(stderr, "Expecting elapsed value\n");
855 exit(1);
856 }
857 fprintf(ofp, strcat(format, "llu"), elapsed / 1000);
858 break;
859 case 'U': {
860 __u64 *depth = (__u64 *) ((char *) t + sizeof(*t));
861 fprintf(ofp, strcat(format, "u"),
862 (unsigned int) be64_to_cpu(*depth));
863 break;
864 }
865 default:
866 fprintf(ofp,strcat(format, "c"), field);
867 break;
868 }
869}
2e3e8ded 870
ab197ca7
AB
871static char *parse_field(char *act, struct per_cpu_info *pci,
872 struct blk_io_trace *t, unsigned long long elapsed,
873 int pdu_len, unsigned char *pdu_buf,
874 char *master_format)
875{
876 int minus = 0;
877 int has_w = 0;
878 int width = 0;
879 char *p = master_format;
880
881 if (*p == '-') {
882 minus = 1;
883 p++;
2e3e8ded 884 }
ab197ca7
AB
885 if (isdigit(*p)) {
886 has_w = 1;
887 do {
888 width = (width * 10) + (*p++ - '0');
889 } while ((*p) && (isdigit(*p)));
890 }
891 if (*p) {
892 print_field(act, pci, t, elapsed, pdu_len, pdu_buf, *p++,
893 minus, has_w, width);
894 }
895 return p;
d0ca268b
JA
896}
897
ab197ca7
AB
898static char *fmt_select(int fmt_spec, struct blk_io_trace *t,
899 unsigned long long elapsed)
d0ca268b 900{
ab197ca7
AB
901 char *fmt;
902 char scratch_format[1024];
2e3e8ded 903
ab197ca7
AB
904 if (override_format[fmt_spec] != NULL)
905 return override_format[fmt_spec];
95c15013 906
ab197ca7
AB
907 switch (fmt_spec) {
908 case 'C': /* Complete */
909 if (t->action & BLK_TC_ACT(BLK_TC_PC)) {
910 strcpy(scratch_format, HEADER);
182f2fcc 911 strcat(scratch_format, "(%P) ");
ab197ca7
AB
912 } else {
913 strcpy(scratch_format, HEADER "%S + %n ");
914 if (elapsed != -1ULL)
915 strcat(scratch_format, "(%8u) ");
916 }
917 strcat(scratch_format, "[%e]\n");
918 fmt = scratch_format;
919 break;
920
921 case 'D': /* Issue */
922 if (t->action & BLK_TC_ACT(BLK_TC_PC)) {
923 strcpy(scratch_format, HEADER);
182f2fcc 924 strcat(scratch_format, "%n (%P) ");
ab197ca7
AB
925 } else {
926 strcpy(scratch_format, HEADER "%S + %n ");
927 if (elapsed != -1ULL)
928 strcat(scratch_format, "(%8u) ");
929 }
930 strcat(scratch_format,"[%C]\n");
931 fmt = scratch_format;
932 break;
933
b6076a9b
JA
934 case 'I': /* Insert */
935 if (t->action & BLK_TC_ACT(BLK_TC_PC)) {
936 strcpy(scratch_format, HEADER);
182f2fcc 937 strcat(scratch_format, "%n (%P) ");
b6076a9b
JA
938 } else {
939 strcpy(scratch_format, HEADER "%S + %n ");
940 if (elapsed != -1ULL)
941 strcat(scratch_format, "(%8u) ");
942 }
943 strcat(scratch_format,"[%C]\n");
944 fmt = scratch_format;
945 break;
946
ab197ca7 947 case 'Q': /* Queue */
93f1c611 948 case 'W': /* Bounce */
ab197ca7
AB
949 strcpy(scratch_format, HEADER "%S + %n ");
950 if (elapsed != -1ULL)
951 strcat(scratch_format, "(%8u) ");
952 strcat(scratch_format,"[%C]\n");
953 fmt = scratch_format;
954 break;
955
956 case 'B': /* Back merge */
957 case 'F': /* Front merge */
958 case 'M': /* Front or back merge */
959 fmt = HEADER "%S + %n [%C]\n";
960 break;
961
962 case 'P': /* Plug */
963 fmt = HEADER "[%C]\n";
964 break;
965
966 case 'G': /* Get request */
967 case 'S': /* Sleep request */
968 fmt = HEADER "%S + %n [%C]\n";
969 break;
970
971 case 'U': /* Unplug IO */
972 case 'T': /* Unplug timer */
973 fmt = HEADER "[%C] %U\n";
974 break;
975
93f1c611
JA
976 case 'X': /* Split */
977 strcpy(scratch_format, HEADER "%S / %U ");
978 strcat(scratch_format,"[%C]\n");
979 fmt = scratch_format;
980 break;
981
ab197ca7
AB
982 default:
983 fprintf(stderr,"FATAL: Invalid format spec %c\n", fmt_spec);
984 exit(1);
985 /*NOTREACHED*/
95c15013 986 }
ab197ca7
AB
987
988 return fmt;
d0ca268b
JA
989}
990
ab197ca7
AB
991static void process_fmt(char *act, struct per_cpu_info *pci,
992 struct blk_io_trace *t, unsigned long long elapsed,
993 int pdu_len, unsigned char *pdu_buf)
d0ca268b 994{
ab197ca7 995 char *p = fmt_select(act[0], t, elapsed);
2e3e8ded 996
ab197ca7
AB
997 while (*p) {
998 switch (*p) {
999 case '%': /* Field specifier */
1000 p++;
1001 if (*p == '%')
1002 fprintf(ofp, "%c", *p++);
1003 else if (!*p)
1004 fprintf(ofp, "%c", '%');
1005 else
1006 p = parse_field(act, pci, t, elapsed,
1007 pdu_len, pdu_buf, p);
1008 break;
1009 case '\\': { /* escape */
1010 switch (p[1]) {
1011 case 'b': fprintf(ofp, "\b"); break;
1012 case 'n': fprintf(ofp, "\n"); break;
1013 case 'r': fprintf(ofp, "\r"); break;
1014 case 't': fprintf(ofp, "\t"); break;
1015 default:
1016 fprintf(stderr,
1017 "Invalid escape char in format %c\n",
1018 p[1]);
1019 exit(1);
1020 /*NOTREACHED*/
1021 }
1022 p += 2;
1023 break;
1024 }
1025 default:
1026 fprintf(ofp, "%c", *p++);
1027 break;
1028 }
2e3e8ded 1029 }
ab197ca7
AB
1030}
1031
1032static void log_complete(struct per_cpu_info *pci, struct blk_io_trace *t,
1033 char *act)
1034{
1035 process_fmt(act, pci, t, log_track_complete(t), 0, NULL);
1036}
1037
b6076a9b
JA
1038static void log_insert(struct per_cpu_info *pci, struct blk_io_trace *t,
1039 char *act)
1040{
1041 process_fmt(act, pci, t, log_track_insert(t), 0, NULL);
1042}
1043
ab197ca7
AB
1044static void log_queue(struct per_cpu_info *pci, struct blk_io_trace *t,
1045 char *act)
1046{
b6076a9b 1047 process_fmt(act, pci, t, -1, 0, NULL);
ab197ca7 1048}
2e3e8ded 1049
ab197ca7
AB
1050static void log_issue(struct per_cpu_info *pci, struct blk_io_trace *t,
1051 char *act)
1052{
1053 process_fmt(act, pci, t, log_track_issue(t), 0, NULL);
d0ca268b
JA
1054}
1055
d5396421 1056static void log_merge(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 1057 char *act)
d0ca268b 1058{
a01516de
JA
1059 if (act[0] == 'F')
1060 log_track_frontmerge(t);
2e3e8ded 1061
ab197ca7 1062 process_fmt(act, pci, t, -1ULL, 0, NULL);
d0ca268b
JA
1063}
1064
dfe34da1 1065static void log_action(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 1066 char *act)
dfe34da1 1067{
ab197ca7 1068 process_fmt(act, pci, t, -1ULL, 0, NULL);
dfe34da1
JA
1069}
1070
d5396421 1071static void log_generic(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 1072 char *act)
d0ca268b 1073{
ab197ca7 1074 process_fmt(act, pci, t, -1ULL, 0, NULL);
d0ca268b
JA
1075}
1076
ab197ca7 1077static void log_unplug(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 1078 char *act)
67e14fdc 1079{
ab197ca7 1080 process_fmt(act, pci, t, -1ULL, 0, NULL);
67e14fdc
JA
1081}
1082
93f1c611
JA
1083static void log_split(struct per_cpu_info *pci, struct blk_io_trace *t,
1084 char *act)
1085{
1086 process_fmt(act, pci, t, -1ULL, 0, NULL);
1087}
1088
ab197ca7 1089static void log_pc(struct per_cpu_info *pci, struct blk_io_trace *t, char *act)
d0ca268b 1090{
ab197ca7 1091 unsigned char *buf = (unsigned char *) t + sizeof(*t);
d0ca268b 1092
ab197ca7 1093 process_fmt(act, pci, t, -1ULL, t->pdu_len, buf);
d0ca268b
JA
1094}
1095
ff3a732c 1096static void dump_trace_pc(struct blk_io_trace *t, struct per_cpu_info *pci)
d0ca268b 1097{
56f2af81
JA
1098 int act = t->action & 0xffff;
1099
1100 switch (act) {
d0ca268b 1101 case __BLK_TA_QUEUE:
3639a11e 1102 log_generic(pci, t, "Q");
d0ca268b
JA
1103 break;
1104 case __BLK_TA_GETRQ:
3639a11e 1105 log_generic(pci, t, "G");
d0ca268b
JA
1106 break;
1107 case __BLK_TA_SLEEPRQ:
3639a11e 1108 log_generic(pci, t, "S");
d0ca268b
JA
1109 break;
1110 case __BLK_TA_REQUEUE:
3639a11e 1111 log_generic(pci, t, "R");
d0ca268b
JA
1112 break;
1113 case __BLK_TA_ISSUE:
ab197ca7 1114 log_pc(pci, t, "D");
d0ca268b
JA
1115 break;
1116 case __BLK_TA_COMPLETE:
3639a11e 1117 log_pc(pci, t, "C");
d0ca268b 1118 break;
56f2af81
JA
1119 case __BLK_TA_INSERT:
1120 log_pc(pci, t, "I");
1121 break;
d0ca268b 1122 default:
56f2af81 1123 fprintf(stderr, "Bad pc action %x\n", act);
87b72777 1124 break;
d0ca268b 1125 }
d0ca268b
JA
1126}
1127
d5396421 1128static void dump_trace_fs(struct blk_io_trace *t, struct per_cpu_info *pci)
d0ca268b
JA
1129{
1130 int w = t->action & BLK_TC_ACT(BLK_TC_WRITE);
7997c5b0 1131 int act = t->action & 0xffff;
d0ca268b 1132
7997c5b0 1133 switch (act) {
d0ca268b 1134 case __BLK_TA_QUEUE:
b6076a9b 1135 account_queue(t, pci, w);
3639a11e 1136 log_queue(pci, t, "Q");
d0ca268b 1137 break;
b6076a9b
JA
1138 case __BLK_TA_INSERT:
1139 log_insert(pci, t, "I");
1140 break;
d0ca268b 1141 case __BLK_TA_BACKMERGE:
152f6476 1142 account_m(t, pci, w);
3639a11e 1143 log_merge(pci, t, "M");
d0ca268b
JA
1144 break;
1145 case __BLK_TA_FRONTMERGE:
152f6476 1146 account_m(t, pci, w);
3639a11e 1147 log_merge(pci, t, "F");
d0ca268b
JA
1148 break;
1149 case __BLK_TA_GETRQ:
95c15013 1150 log_track_getrq(t);
3639a11e 1151 log_generic(pci, t, "G");
d0ca268b
JA
1152 break;
1153 case __BLK_TA_SLEEPRQ:
3639a11e 1154 log_generic(pci, t, "S");
d0ca268b
JA
1155 break;
1156 case __BLK_TA_REQUEUE:
152f6476 1157 account_c(t, pci, w, -t->bytes);
3639a11e 1158 log_queue(pci, t, "R");
d0ca268b
JA
1159 break;
1160 case __BLK_TA_ISSUE:
b6076a9b 1161 account_issue(t, pci, w);
3639a11e 1162 log_issue(pci, t, "D");
d0ca268b
JA
1163 break;
1164 case __BLK_TA_COMPLETE:
152f6476 1165 account_c(t, pci, w, t->bytes);
3639a11e 1166 log_complete(pci, t, "C");
d0ca268b 1167 break;
88b1a526 1168 case __BLK_TA_PLUG:
3639a11e 1169 log_action(pci, t, "P");
88b1a526 1170 break;
3639a11e 1171 case __BLK_TA_UNPLUG_IO:
06639b27 1172 account_unplug(t, pci, 0);
3639a11e
JA
1173 log_unplug(pci, t, "U");
1174 break;
1175 case __BLK_TA_UNPLUG_TIMER:
06639b27 1176 account_unplug(t, pci, 1);
3639a11e 1177 log_unplug(pci, t, "UT");
88b1a526 1178 break;
93f1c611
JA
1179 case __BLK_TA_SPLIT:
1180 log_split(pci, t, "X");
1181 break;
1182 case __BLK_TA_BOUNCE:
1183 log_generic(pci, t, "B");
1184 break;
d0ca268b
JA
1185 default:
1186 fprintf(stderr, "Bad fs action %x\n", t->action);
1f79c4a0 1187 break;
d0ca268b 1188 }
d0ca268b
JA
1189}
1190
ff3a732c
JA
1191static void dump_trace(struct blk_io_trace *t, struct per_cpu_info *pci,
1192 struct per_dev_info *pdi)
d0ca268b
JA
1193{
1194 if (t->action & BLK_TC_ACT(BLK_TC_PC))
ff3a732c 1195 dump_trace_pc(t, pci);
d0ca268b 1196 else
d5396421 1197 dump_trace_fs(t, pci);
87b72777 1198
e7c9f3ff 1199 pdi->events++;
d0ca268b
JA
1200}
1201
152f6476 1202static void dump_io_stats(struct io_stats *ios, char *msg)
5c017e4b 1203{
152f6476
JA
1204 fprintf(ofp, "%s\n", msg);
1205
1206 fprintf(ofp, " Reads Queued: %'8lu, %'8LuKiB\t", ios->qreads, ios->qread_kb);
1207 fprintf(ofp, " Writes Queued: %'8lu, %'8LuKiB\n", ios->qwrites,ios->qwrite_kb);
0a6b8fc4 1208
152f6476
JA
1209 fprintf(ofp, " Read Dispatches: %'8lu, %'8LuKiB\t", ios->ireads, ios->iread_kb);
1210 fprintf(ofp, " Write Dispatches: %'8lu, %'8LuKiB\n", ios->iwrites,ios->iwrite_kb);
1211 fprintf(ofp, " Reads Completed: %'8lu, %'8LuKiB\t", ios->creads, ios->cread_kb);
1212 fprintf(ofp, " Writes Completed: %'8lu, %'8LuKiB\n", ios->cwrites,ios->cwrite_kb);
1213 fprintf(ofp, " Read Merges: %'8lu%8c\t", ios->mreads, ' ');
152f6476 1214 fprintf(ofp, " Write Merges: %'8lu\n", ios->mwrites);
06639b27
JA
1215 fprintf(ofp, " IO unplugs: %'8lu%8c\t", ios->io_unplugs, ' ');
1216 fprintf(ofp, " Timer unplugs: %'8lu\n", ios->timer_unplugs);
5c017e4b
JA
1217}
1218
50adc0ba
JA
1219static void dump_wait_stats(struct per_process_info *ppi)
1220{
b9d40d6f
JA
1221 unsigned long rawait = ppi->longest_allocation_wait[0] / 1000;
1222 unsigned long rdwait = ppi->longest_dispatch_wait[0] / 1000;
1223 unsigned long rcwait = ppi->longest_completion_wait[0] / 1000;
1224 unsigned long wawait = ppi->longest_allocation_wait[1] / 1000;
1225 unsigned long wdwait = ppi->longest_dispatch_wait[1] / 1000;
1226 unsigned long wcwait = ppi->longest_completion_wait[1] / 1000;
1227
1228 fprintf(ofp, " Allocation wait: %'8lu%8c\t", rawait, ' ');
1229 fprintf(ofp, " Allocation wait: %'8lu\n", wawait);
1230 fprintf(ofp, " Dispatch wait: %'8lu%8c\t", rdwait, ' ');
1231 fprintf(ofp, " Dispatch wait: %'8lu\n", wdwait);
1232 fprintf(ofp, " Completion wait: %'8lu%8c\t", rcwait, ' ');
1233 fprintf(ofp, " Completion wait: %'8lu\n", wcwait);
50adc0ba
JA
1234}
1235
886ecf0e
JA
1236static int ppi_name_compare(const void *p1, const void *p2)
1237{
1238 struct per_process_info *ppi1 = *((struct per_process_info **) p1);
1239 struct per_process_info *ppi2 = *((struct per_process_info **) p2);
1240 int res;
1241
1242 res = strverscmp(ppi1->name, ppi2->name);
1243 if (!res)
06e6f286 1244 res = ppi1->pid > ppi2->pid;
886ecf0e
JA
1245
1246 return res;
1247}
1248
1249static void sort_process_list(void)
1250{
1251 struct per_process_info **ppis;
1252 struct per_process_info *ppi;
1253 int i = 0;
1254
1255 ppis = malloc(ppi_list_entries * sizeof(struct per_process_info *));
1256
1257 ppi = ppi_list;
1258 while (ppi) {
06e6f286 1259 ppis[i++] = ppi;
886ecf0e
JA
1260 ppi = ppi->list_next;
1261 }
1262
06e6f286 1263 qsort(ppis, ppi_list_entries, sizeof(ppi), ppi_name_compare);
886ecf0e
JA
1264
1265 i = ppi_list_entries - 1;
1266 ppi_list = NULL;
1267 while (i >= 0) {
1268 ppi = ppis[i];
1269
1270 ppi->list_next = ppi_list;
1271 ppi_list = ppi;
1272 i--;
1273 }
50c38702
JA
1274
1275 free(ppis);
886ecf0e
JA
1276}
1277
152f6476
JA
1278static void show_process_stats(void)
1279{
1280 struct per_process_info *ppi;
1281
886ecf0e
JA
1282 sort_process_list();
1283
152f6476
JA
1284 ppi = ppi_list;
1285 while (ppi) {
ce8b6b4f
JA
1286 char name[64];
1287
1288 snprintf(name, sizeof(name)-1, "%s (%u)", ppi->name, ppi->pid);
1289 dump_io_stats(&ppi->io_stats, name);
50adc0ba 1290 dump_wait_stats(ppi);
152f6476
JA
1291 ppi = ppi->list_next;
1292 }
1293
1294 fprintf(ofp, "\n");
1295}
1296
e7c9f3ff 1297static void show_device_and_cpu_stats(void)
d0ca268b 1298{
e7c9f3ff
NS
1299 struct per_dev_info *pdi;
1300 struct per_cpu_info *pci;
1301 struct io_stats total, *ios;
1302 int i, j, pci_events;
1303 char line[3 + 8/*cpu*/ + 2 + 32/*dev*/ + 3];
1304 char name[32];
1305
1306 for (pdi = devices, i = 0; i < ndevices; i++, pdi++) {
1307
1308 memset(&total, 0, sizeof(total));
1309 pci_events = 0;
1310
1311 if (i > 0)
1312 fprintf(ofp, "\n");
1313
1314 for (pci = pdi->cpus, j = 0; j < pdi->ncpus; j++, pci++) {
1315 if (!pci->nelems)
1316 continue;
1317
1318 ios = &pci->io_stats;
1319 total.qreads += ios->qreads;
1320 total.qwrites += ios->qwrites;
1321 total.creads += ios->creads;
1322 total.cwrites += ios->cwrites;
1323 total.mreads += ios->mreads;
1324 total.mwrites += ios->mwrites;
1325 total.ireads += ios->ireads;
1326 total.iwrites += ios->iwrites;
1327 total.qread_kb += ios->qread_kb;
1328 total.qwrite_kb += ios->qwrite_kb;
1329 total.cread_kb += ios->cread_kb;
1330 total.cwrite_kb += ios->cwrite_kb;
1331 total.iread_kb += ios->iread_kb;
1332 total.iwrite_kb += ios->iwrite_kb;
06639b27
JA
1333 total.timer_unplugs += ios->timer_unplugs;
1334 total.io_unplugs += ios->io_unplugs;
e7c9f3ff
NS
1335
1336 snprintf(line, sizeof(line) - 1, "CPU%d (%s):",
1337 j, get_dev_name(pdi, name, sizeof(name)));
1338 dump_io_stats(ios, line);
1339 pci_events++;
1340 }
5c017e4b 1341
e7c9f3ff
NS
1342 if (pci_events > 1) {
1343 fprintf(ofp, "\n");
1344 snprintf(line, sizeof(line) - 1, "Total (%s):",
1345 get_dev_name(pdi, name, sizeof(name)));
1346 dump_io_stats(&total, line);
1347 }
d0ca268b 1348
0aa8a62d
JA
1349 fprintf(ofp, "\nEvents (%s): %'Lu entries, %'lu skips\n",
1350 get_dev_name(pdi, line, sizeof(line)), pdi->events,
1351 pdi->skips);
e7c9f3ff 1352 }
d0ca268b
JA
1353}
1354
cb2a1a62 1355static struct blk_io_trace *find_trace(void *p, unsigned long offset)
2ff323b0 1356{
cb2a1a62 1357 unsigned long max_offset = offset;
2ff323b0
JA
1358 unsigned long off;
1359 struct blk_io_trace *bit;
1360 __u32 magic;
1361
1362 for (off = 0; off < max_offset; off++) {
1363 bit = p + off;
1364
1365 magic = be32_to_cpu(bit->magic);
1366 if ((magic & 0xffffff00) == BLK_IO_TRACE_MAGIC)
1367 return bit;
1368 }
1369
1370 return NULL;
1371}
1372
cb2a1a62 1373static int sort_entries(void)
8fc0abbc 1374{
e7c9f3ff 1375 struct per_dev_info *pdi;
412819ce 1376 struct per_cpu_info *pci;
8fc0abbc
JA
1377 struct blk_io_trace *bit;
1378 struct trace *t;
cb2a1a62 1379 int nr = 0;
8fc0abbc 1380
cb2a1a62 1381 while ((t = trace_list) != NULL) {
412819ce 1382
cb2a1a62
JA
1383 trace_list = t->next;
1384 bit = t->bit;
6fe4709e 1385
8fc0abbc
JA
1386 memset(&t->rb_node, 0, sizeof(t->rb_node));
1387
cb2a1a62 1388 if (verify_trace(bit))
66fa7233 1389 break;
cb2a1a62
JA
1390 if (trace_rb_insert(t))
1391 return -1;
66fa7233 1392
cb2a1a62
JA
1393 pdi = get_dev_info(bit->device);
1394 pci = get_cpu_info(pdi, bit->cpu);
412819ce
JA
1395 pci->nelems++;
1396
cb2a1a62 1397 nr++;
6fe4709e 1398 }
8fc0abbc 1399
cb2a1a62 1400 return nr;
412819ce
JA
1401}
1402
f1327cc5 1403static void show_entries_rb(int piped)
8fc0abbc 1404{
e7c9f3ff 1405 struct per_dev_info *pdi;
8fc0abbc 1406 struct blk_io_trace *bit;
3aabcd89 1407 struct rb_node *n;
8fc0abbc
JA
1408 struct trace *t;
1409 int cpu;
1410
cb2a1a62 1411 while ((n = rb_first(&rb_sort_root)) != NULL) {
8fc0abbc
JA
1412
1413 t = rb_entry(n, struct trace, rb_node);
1414 bit = t->bit;
1415
cb2a1a62 1416 pdi = get_dev_info(bit->device);
e7c9f3ff
NS
1417 if (!pdi) {
1418 fprintf(stderr, "Unknown device ID? (%d,%d)\n",
1419 MAJOR(bit->device), MINOR(bit->device));
1420 break;
1421 }
d5396421 1422 cpu = bit->cpu;
e7c9f3ff
NS
1423 if (cpu > pdi->ncpus) {
1424 fprintf(stderr, "Unknown CPU ID? (%d, device %d,%d)\n",
1425 cpu, MAJOR(bit->device), MINOR(bit->device));
87b72777 1426 break;
8fc0abbc
JA
1427 }
1428
cb2a1a62
JA
1429 /*
1430 * back off displaying more info if we are out of sync
1431 * on SMP systems. to prevent stalling on lost events,
0aa8a62d 1432 * only allow an event to us a few times
cb2a1a62 1433 */
a3983763
JA
1434 if (bit->sequence != (pdi->last_sequence + 1)) {
1435 if (piped && t->skipped < 5) {
0aa8a62d 1436 t->skipped++;
cb2a1a62 1437 break;
0aa8a62d
JA
1438 } else {
1439 fprintf(stderr, "skipping from %lu to %u\n", pdi->last_sequence, bit->sequence);
1440 pdi->skips++;
cb2a1a62
JA
1441 }
1442 }
1443
1444 pdi->last_sequence = bit->sequence;
1445
cfab07eb 1446 bit->time -= genesis_time;
46e6968b
NS
1447 if (bit->time < stopwatch_start)
1448 continue;
1449 if (bit->time >= stopwatch_end)
1450 break;
8fc0abbc 1451
e7c9f3ff 1452 check_time(pdi, bit);
8fc0abbc 1453
ff3a732c 1454 dump_trace(bit, &pdi->cpus[cpu], pdi);
87b72777 1455
cb2a1a62 1456 rb_erase(&t->rb_node, &rb_sort_root);
e8741a4a 1457
f1327cc5 1458 if (piped) {
e8741a4a
JA
1459 free(bit);
1460 free(t);
1461 }
cb2a1a62 1462 }
8fc0abbc
JA
1463}
1464
1f79c4a0
JA
1465static int read_data(int fd, void *buffer, int bytes, int block)
1466{
1467 int ret, bytes_left, fl;
1468 void *p;
1469
1470 fl = fcntl(fd, F_GETFL);
1471
1472 if (!block)
1473 fcntl(fd, F_SETFL, fl | O_NONBLOCK);
1474 else
1475 fcntl(fd, F_SETFL, fl & ~O_NONBLOCK);
1476
1477 bytes_left = bytes;
1478 p = buffer;
1479 while (bytes_left > 0) {
1480 ret = read(fd, p, bytes_left);
1481 if (!ret)
1482 return 1;
1483 else if (ret < 0) {
1484 if (errno != EAGAIN)
1485 perror("read");
1486 return -1;
1487 } else {
1488 p += ret;
1489 bytes_left -= ret;
1490 }
1491 }
1492
1493 return 0;
1494}
1495
cb2a1a62
JA
1496/*
1497 * Find the traces in 'tb' and add them to the list for sorting and
1498 * displaying
1499 */
1500static int find_entries(void *tb, unsigned long size)
1501{
1502 struct blk_io_trace *bit;
1503 struct trace *t;
1504 void *start = tb;
1505
1506 while (tb - start <= size - sizeof(*bit)) {
1507 bit = find_trace(tb, size - (tb - start));
1508 if (!bit)
1509 break;
1510
1511 t = malloc(sizeof(*t));
1512 memset(t, 0, sizeof(*t));
1513 t->bit = bit;
1514
f1327cc5 1515 trace_to_cpu(bit);
cb2a1a62
JA
1516 t->next = trace_list;
1517 trace_list = t;
1518
1519 tb += sizeof(*bit) + bit->pdu_len;
1520 }
1521
1522 return 0;
1523}
1524
d5396421 1525static int do_file(void)
d0ca268b 1526{
cb2a1a62 1527 int i, j, nfiles = 0, nelems;
d0ca268b 1528
e8741a4a 1529 for (i = 0; i < ndevices; i++) {
e7c9f3ff 1530 for (j = 0;; j++, nfiles++) {
e8741a4a 1531 struct per_dev_info *pdi;
e7c9f3ff
NS
1532 struct per_cpu_info *pci;
1533 struct stat st;
1534 void *tb;
87b72777 1535
e8741a4a 1536 pdi = &devices[i];
e7c9f3ff
NS
1537 pci = get_cpu_info(pdi, j);
1538 pci->cpu = j;
d0ca268b 1539
e7c9f3ff 1540 snprintf(pci->fname, sizeof(pci->fname)-1,
9f6486bd 1541 "%s.blktrace.%d", pdi->name, j);
e7c9f3ff
NS
1542 if (stat(pci->fname, &st) < 0)
1543 break;
1544 if (!st.st_size)
1545 continue;
1546
1547 printf("Processing %s\n", pci->fname);
1548
1549 tb = malloc(st.st_size);
1550 if (!tb) {
1551 fprintf(stderr, "Out of memory, skip file %s\n",
1552 pci->fname);
1553 continue;
1554 }
1555
1556 pci->fd = open(pci->fname, O_RDONLY);
1557 if (pci->fd < 0) {
1558 perror(pci->fname);
1559 free(tb);
1560 continue;
1561 }
1562
1563 if (read_data(pci->fd, tb, st.st_size, 1)) {
1564 close(pci->fd);
1565 free(tb);
1566 continue;
1567 }
1568
cb2a1a62
JA
1569 if (find_entries(tb, st.st_size)) {
1570 close(pci->fd);
1571 free(tb);
1572 }
1573
1574 nelems = sort_entries();
1575 if (nelems == -1) {
e7c9f3ff
NS
1576 close(pci->fd);
1577 free(tb);
1578 continue;
1579 }
1580
1581 printf("Completed %s (CPU%d %d, entries)\n",
cb2a1a62 1582 pci->fname, j, nelems);
e7c9f3ff 1583 close(pci->fd);
d0ca268b 1584 }
d5396421
JA
1585 }
1586
1587 if (!nfiles) {
1588 fprintf(stderr, "No files found\n");
1589 return 1;
1590 }
1591
e8741a4a 1592 show_entries_rb(0);
d5396421
JA
1593 return 0;
1594}
1595
cb2a1a62 1596static int read_sort_events(int fd)
d5396421 1597{
cb2a1a62 1598 int events = 0;
d5396421 1599
412819ce 1600 do {
cb2a1a62
JA
1601 struct blk_io_trace *bit;
1602 struct trace *t;
412819ce 1603 int pdu_len;
51128a28 1604 __u32 magic;
d5396421 1605
cb2a1a62 1606 bit = malloc(sizeof(*bit));
d5396421 1607
cb2a1a62 1608 if (read_data(fd, bit, sizeof(*bit), !events))
c80c18a7 1609 break;
d5396421 1610
cb2a1a62 1611 magic = be32_to_cpu(bit->magic);
51128a28
JA
1612 if ((magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
1613 fprintf(stderr, "Bad magic %x\n", magic);
1614 break;
1615 }
1616
cb2a1a62 1617 pdu_len = be16_to_cpu(bit->pdu_len);
2ff323b0 1618 if (pdu_len) {
cb2a1a62 1619 void *ptr = realloc(bit, sizeof(*bit) + pdu_len);
d5396421 1620
cb2a1a62 1621 if (read_data(fd, ptr + sizeof(*bit), pdu_len, 1))
2ff323b0 1622 break;
d5396421 1623
cb2a1a62 1624 bit = ptr;
2ff323b0 1625 }
d5396421 1626
cb2a1a62
JA
1627 t = malloc(sizeof(*t));
1628 memset(t, 0, sizeof(*t));
1629 t->bit = bit;
f1327cc5
JA
1630
1631 trace_to_cpu(bit);
cb2a1a62
JA
1632 t->next = trace_list;
1633 trace_list = t;
1634
412819ce 1635 events++;
79f19470 1636 } while (!is_done() && events < rb_batch);
d5396421 1637
412819ce
JA
1638 return events;
1639}
d5396421 1640
412819ce
JA
1641static int do_stdin(void)
1642{
1643 int fd;
d5396421 1644
1f79c4a0 1645 fd = dup(STDIN_FILENO);
412819ce
JA
1646 do {
1647 int events;
d5396421 1648
cb2a1a62 1649 events = read_sort_events(fd);
412819ce
JA
1650 if (!events)
1651 break;
1652
cb2a1a62 1653 if (sort_entries() == -1)
2ff323b0
JA
1654 break;
1655
e8741a4a 1656 show_entries_rb(1);
d5396421
JA
1657 } while (1);
1658
1659 close(fd);
d5396421
JA
1660 return 0;
1661}
d0ca268b 1662
1f79c4a0 1663static void flush_output(void)
412819ce 1664{
152f6476 1665 fflush(ofp);
412819ce
JA
1666}
1667
1f79c4a0 1668static void handle_sigint(int sig)
412819ce
JA
1669{
1670 done = 1;
1671 flush_output();
1672}
1673
46e6968b
NS
1674/*
1675 * Extract start and duration times from a string, allowing
1676 * us to specify a time interval of interest within a trace.
1677 * Format: "duration" (start is zero) or "start:duration".
1678 */
1679static int find_stopwatch_interval(char *string)
1680{
1681 double value;
1682 char *sp;
1683
1684 value = strtod(string, &sp);
1685 if (sp == string) {
1686 fprintf(stderr,"Invalid stopwatch timer: %s\n", string);
1687 return 1;
1688 }
1689 if (*sp == ':') {
1690 stopwatch_start = DOUBLE_TO_NANO_ULL(value);
1691 string = sp + 1;
1692 value = strtod(string, &sp);
1693 if (sp == string || *sp != '\0') {
1694 fprintf(stderr,"Invalid stopwatch duration time: %s\n",
1695 string);
1696 return 1;
1697 }
1698 } else if (*sp != '\0') {
1699 fprintf(stderr,"Invalid stopwatch start timer: %s\n", string);
1700 return 1;
1701 }
1702 stopwatch_end = stopwatch_start + DOUBLE_TO_NANO_ULL(value);
1703 return 0;
1704}
1705
1f79c4a0
JA
1706static void usage(char *prog)
1707{
46e6968b
NS
1708 fprintf(stderr, "Usage: %s "
1709 "[-i <name>] [-o <output>] [-s] [-w N[:n]] <name>...\n",
1710 prog);
1f79c4a0
JA
1711}
1712
d5396421
JA
1713int main(int argc, char *argv[])
1714{
152f6476 1715 char *ofp_buffer;
a66877e6 1716 int c, ret, mode;
1e1c60f1 1717 int per_device_and_cpu_stats = 1;
d5396421
JA
1718
1719 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) != -1) {
1720 switch (c) {
1721 case 'i':
e7c9f3ff
NS
1722 if (!strcmp(optarg, "-") && !pipeline)
1723 pipeline = 1;
1724 else if (resize_devices(optarg) != 0)
1725 return 1;
d5396421
JA
1726 break;
1727 case 'o':
66efebf8 1728 output_name = optarg;
d5396421 1729 break;
79f19470
JA
1730 case 'b':
1731 rb_batch = atoi(optarg);
1732 if (rb_batch <= 0)
1733 rb_batch = RB_BATCH_DEFAULT;
1734 break;
152f6476
JA
1735 case 's':
1736 per_process_stats = 1;
1737 break;
7997c5b0
JA
1738 case 't':
1739 track_ios = 1;
1740 break;
1e1c60f1
NS
1741 case 'q':
1742 per_device_and_cpu_stats = 0;
1743 break;
46e6968b
NS
1744 case 'w':
1745 if (find_stopwatch_interval(optarg) != 0)
1746 return 1;
1747 break;
ab197ca7
AB
1748 case 'f':
1749 set_all_format_specs(optarg);
1750 break;
1751 case 'F':
1752 if (add_format_spec(optarg) != 0)
1753 return 1;
1754 break;
d5396421 1755 default:
1f79c4a0 1756 usage(argv[0]);
d5396421
JA
1757 return 1;
1758 }
d0ca268b
JA
1759 }
1760
e7c9f3ff
NS
1761 while (optind < argc) {
1762 if (!strcmp(argv[optind], "-") && !pipeline)
1763 pipeline = 1;
1764 else if (resize_devices(argv[optind]) != 0)
1765 return 1;
1766 optind++;
1767 }
1768
1769 if (!pipeline && !ndevices) {
1f79c4a0 1770 usage(argv[0]);
d5396421
JA
1771 return 1;
1772 }
1773
7997c5b0
JA
1774 memset(&rb_sort_root, 0, sizeof(rb_sort_root));
1775 memset(&rb_track_root, 0, sizeof(rb_track_root));
412819ce
JA
1776
1777 signal(SIGINT, handle_sigint);
1778 signal(SIGHUP, handle_sigint);
1779 signal(SIGTERM, handle_sigint);
d5396421 1780
d69db225
JA
1781 setlocale(LC_NUMERIC, "en_US");
1782
a66877e6 1783 if (!output_name) {
152f6476 1784 ofp = fdopen(STDOUT_FILENO, "w");
a66877e6
JA
1785 mode = _IOLBF;
1786 } else {
152f6476
JA
1787 char ofname[128];
1788
1789 snprintf(ofname, sizeof(ofname) - 1, "%s.log", output_name);
1790 ofp = fopen(ofname, "w");
a66877e6 1791 mode = _IOFBF;
152f6476
JA
1792 }
1793
1794 if (!ofp) {
1795 perror("fopen");
1796 return 1;
1797 }
1798
1799 ofp_buffer = malloc(4096);
a66877e6 1800 if (setvbuf(ofp, ofp_buffer, mode, 4096)) {
152f6476
JA
1801 perror("setvbuf");
1802 return 1;
1803 }
1804
e7c9f3ff 1805 if (pipeline)
d5396421
JA
1806 ret = do_stdin();
1807 else
1808 ret = do_file();
1809
152f6476
JA
1810 if (per_process_stats)
1811 show_process_stats();
1812
1e1c60f1
NS
1813 if (per_device_and_cpu_stats)
1814 show_device_and_cpu_stats();
152f6476 1815
412819ce 1816 flush_output();
d5396421 1817 return ret;
d0ca268b 1818}