[PATCH] README: typo
[blktrace.git] / blkparse.c
CommitLineData
d956a2cd
JA
1/*
2 * block queue tracing parse application
3 *
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
20 */
d0ca268b
JA
21#include <sys/types.h>
22#include <sys/stat.h>
23#include <unistd.h>
24#include <stdio.h>
25#include <fcntl.h>
26#include <stdlib.h>
8fc0abbc 27#include <string.h>
d5396421 28#include <getopt.h>
412819ce
JA
29#include <errno.h>
30#include <signal.h>
d69db225 31#include <locale.h>
46e6968b 32#include <limits.h>
ab197ca7 33#include <ctype.h>
d0ca268b 34
8fc0abbc
JA
35#include "blktrace.h"
36#include "rbtree.h"
d0ca268b 37
2e3e8ded
JA
38#define SECONDS(x) ((unsigned long long)(x) / 1000000000)
39#define NANO_SECONDS(x) ((unsigned long long)(x) % 1000000000)
46e6968b 40#define DOUBLE_TO_NANO_ULL(d) ((unsigned long long)((d) * 1000000000))
cfab07eb 41
e7c9f3ff
NS
42#define MINORBITS 20
43#define MINORMASK ((1U << MINORBITS) - 1)
44#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS))
45#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK))
46
47#define min(a, b) ((a) < (b) ? (a) : (b))
d5396421 48
ab197ca7
AB
49#define HEADER "%D %2c %8s %5T.%9t %5p %2a %3d "
50
152f6476
JA
51struct io_stats {
52 unsigned long qreads, qwrites, creads, cwrites, mreads, mwrites;
53 unsigned long ireads, iwrites;
54 unsigned long long qread_kb, qwrite_kb, cread_kb, cwrite_kb;
55 unsigned long long iread_kb, iwrite_kb;
06639b27 56 unsigned long io_unplugs, timer_unplugs;
152f6476
JA
57};
58
d5396421 59struct per_cpu_info {
d0ca268b
JA
60 int cpu;
61 int nelems;
d0ca268b
JA
62
63 int fd;
87b72777 64 char fname[128];
d0ca268b 65
152f6476
JA
66 struct io_stats io_stats;
67};
8fc0abbc 68
e7c9f3ff
NS
69struct per_dev_info {
70 dev_t id;
71 char *name;
72
73 int backwards;
74 unsigned long long events;
75 unsigned long long last_reported_time;
76 struct io_stats io_stats;
cb2a1a62 77 unsigned long last_sequence;
0aa8a62d 78 unsigned long skips;
e7c9f3ff
NS
79
80 int ncpus;
81 struct per_cpu_info *cpus;
82};
83
152f6476
JA
84struct per_process_info {
85 char name[16];
86 __u32 pid;
87 struct io_stats io_stats;
88 struct per_process_info *hash_next, *list_next;
50adc0ba
JA
89
90 /*
91 * individual io stats
92 */
b9d40d6f
JA
93 unsigned long long longest_allocation_wait[2];
94 unsigned long long longest_dispatch_wait[2];
95 unsigned long long longest_completion_wait[2];
d0ca268b
JA
96};
97
152f6476
JA
98#define PPI_HASH_SHIFT (8)
99static struct per_process_info *ppi_hash[1 << PPI_HASH_SHIFT];
100static struct per_process_info *ppi_list;
886ecf0e 101static int ppi_list_entries;
152f6476 102
ab197ca7 103#define S_OPTS "i:o:b:stqw:f:F:"
d5396421
JA
104static struct option l_opts[] = {
105 {
106 .name = "input",
428683db 107 .has_arg = required_argument,
d5396421
JA
108 .flag = NULL,
109 .val = 'i'
110 },
111 {
112 .name = "output",
428683db 113 .has_arg = required_argument,
d5396421
JA
114 .flag = NULL,
115 .val = 'o'
116 },
79f19470
JA
117 {
118 .name = "batch",
428683db 119 .has_arg = required_argument,
79f19470
JA
120 .flag = NULL,
121 .val = 'b'
122 },
152f6476
JA
123 {
124 .name = "per program stats",
428683db 125 .has_arg = no_argument,
152f6476
JA
126 .flag = NULL,
127 .val = 's'
128 },
7997c5b0
JA
129 {
130 .name = "track ios",
428683db 131 .has_arg = no_argument,
7997c5b0
JA
132 .flag = NULL,
133 .val = 't'
134 },
1e1c60f1
NS
135 {
136 .name = "quiet",
428683db 137 .has_arg = no_argument,
1e1c60f1
NS
138 .flag = NULL,
139 .val = 'q'
140 },
46e6968b
NS
141 {
142 .name = "stopwatch",
428683db 143 .has_arg = required_argument,
46e6968b
NS
144 .flag = NULL,
145 .val = 'w'
146 },
ab197ca7
AB
147 {
148 .name = "format",
428683db 149 .has_arg = required_argument,
ab197ca7
AB
150 .flag = NULL,
151 .val = 'f'
152 },
153 {
154 .name = "format-spec",
428683db 155 .has_arg = required_argument,
ab197ca7
AB
156 .flag = NULL,
157 .val = 'F'
158 },
d5396421
JA
159};
160
7997c5b0
JA
161/*
162 * for sorting the displayed output
163 */
8fc0abbc
JA
164struct trace {
165 struct blk_io_trace *bit;
166 struct rb_node rb_node;
cb2a1a62
JA
167 struct trace *next;
168 int skipped;
8fc0abbc
JA
169};
170
cb2a1a62
JA
171static struct rb_root rb_sort_root;
172static struct rb_root rb_track_root;
173
174static struct trace *trace_list;
175
7997c5b0
JA
176/*
177 * for tracking individual ios
178 */
179struct io_track {
180 struct rb_node rb_node;
181
e7c9f3ff 182 dev_t device;
7997c5b0
JA
183 __u64 sector;
184 __u32 pid;
95c15013 185 unsigned long long allocation_time;
7997c5b0
JA
186 unsigned long long queue_time;
187 unsigned long long dispatch_time;
188 unsigned long long completion_time;
189};
190
e7c9f3ff
NS
191static int ndevices;
192static struct per_dev_info *devices;
193static char *get_dev_name(struct per_dev_info *, char *, int);
d0ca268b 194
152f6476 195static FILE *ofp;
e7c9f3ff
NS
196static char *output_name;
197
198static unsigned long long genesis_time;
46e6968b
NS
199static unsigned long long stopwatch_start; /* start from zero by default */
200static unsigned long long stopwatch_end = ULONG_LONG_MAX; /* "infinity" */
152f6476
JA
201
202static int per_process_stats;
7997c5b0 203static int track_ios;
d0ca268b 204
79f19470
JA
205#define RB_BATCH_DEFAULT (1024)
206static int rb_batch = RB_BATCH_DEFAULT;
207
e7c9f3ff
NS
208static int pipeline;
209
412819ce
JA
210#define is_done() (*(volatile int *)(&done))
211static volatile int done;
212
152f6476
JA
213static inline unsigned long hash_long(unsigned long val)
214{
16ef714e
JA
215#if __WORDSIZE == 32
216 val *= 0x9e370001UL;
217#elif __WORDSIZE == 64
218 val *= 0x9e37fffffffc0001UL;
219#else
220#error unknown word size
221#endif
222
223 return val >> (__WORDSIZE - PPI_HASH_SHIFT);
152f6476
JA
224}
225
226static inline void add_process_to_hash(struct per_process_info *ppi)
227{
228 const int hash_idx = hash_long(ppi->pid);
229
230 ppi->hash_next = ppi_hash[hash_idx];
231 ppi_hash[hash_idx] = ppi;
232}
233
234static inline void add_process_to_list(struct per_process_info *ppi)
235{
236 ppi->list_next = ppi_list;
237 ppi_list = ppi;
886ecf0e 238 ppi_list_entries++;
152f6476
JA
239}
240
241static struct per_process_info *find_process_by_pid(__u32 pid)
242{
243 const int hash_idx = hash_long(pid);
244 struct per_process_info *ppi;
245
246 ppi = ppi_hash[hash_idx];
247 while (ppi) {
248 if (ppi->pid == pid)
249 return ppi;
250
251 ppi = ppi->hash_next;
252 }
253
254 return NULL;
255}
256
7997c5b0
JA
257static inline int trace_rb_insert(struct trace *t)
258{
259 struct rb_node **p = &rb_sort_root.rb_node;
260 struct rb_node *parent = NULL;
261 struct trace *__t;
262
e7c9f3ff
NS
263 if (genesis_time == 0 || t->bit->time < genesis_time)
264 genesis_time = t->bit->time;
265
7997c5b0
JA
266 while (*p) {
267 parent = *p;
268 __t = rb_entry(parent, struct trace, rb_node);
269
e7c9f3ff
NS
270 if (t->bit->time < __t->bit->time)
271 p = &(*p)->rb_left;
272 else if (t->bit->time > __t->bit->time)
273 p = &(*p)->rb_right;
274 else if (t->bit->device < __t->bit->device)
275 p = &(*p)->rb_left;
276 else if (t->bit->device > __t->bit->device)
277 p = &(*p)->rb_right;
278 else if (t->bit->sequence < __t->bit->sequence)
7997c5b0
JA
279 p = &(*p)->rb_left;
280 else if (t->bit->sequence > __t->bit->sequence)
281 p = &(*p)->rb_right;
e7c9f3ff
NS
282 else if (t->bit->device == __t->bit->device) {
283 fprintf(stderr,
284 "sequence alias (%d) on device %d,%d!\n",
285 t->bit->sequence,
286 MAJOR(t->bit->device), MINOR(t->bit->device));
7997c5b0
JA
287 return 1;
288 }
289 }
290
291 rb_link_node(&t->rb_node, parent, p);
292 rb_insert_color(&t->rb_node, &rb_sort_root);
293 return 0;
294}
295
296static inline int track_rb_insert(struct io_track *iot)
297{
298 struct rb_node **p = &rb_track_root.rb_node;
299 struct rb_node *parent = NULL;
300 struct io_track *__iot;
301
302 while (*p) {
303 parent = *p;
e7c9f3ff 304
7997c5b0
JA
305 __iot = rb_entry(parent, struct io_track, rb_node);
306
e7c9f3ff
NS
307 if (iot->device < __iot->device)
308 p = &(*p)->rb_left;
309 else if (iot->device > __iot->device)
310 p = &(*p)->rb_right;
311 else if (iot->sector < __iot->sector)
7997c5b0
JA
312 p = &(*p)->rb_left;
313 else if (iot->sector > __iot->sector)
314 p = &(*p)->rb_right;
315 else {
e7c9f3ff 316 fprintf(stderr,
ab197ca7
AB
317 "sector alias (%Lu) on device %d,%d!\n",
318 (unsigned long long) iot->sector,
e7c9f3ff 319 MAJOR(iot->device), MINOR(iot->device));
7997c5b0
JA
320 return 1;
321 }
322 }
323
324 rb_link_node(&iot->rb_node, parent, p);
325 rb_insert_color(&iot->rb_node, &rb_track_root);
326 return 0;
327}
328
e7c9f3ff 329static struct io_track *__find_track(dev_t device, __u64 sector)
7997c5b0
JA
330{
331 struct rb_node **p = &rb_track_root.rb_node;
332 struct rb_node *parent = NULL;
333 struct io_track *__iot;
334
335 while (*p) {
336 parent = *p;
337
338 __iot = rb_entry(parent, struct io_track, rb_node);
339
e7c9f3ff
NS
340 if (device < __iot->device)
341 p = &(*p)->rb_left;
342 else if (device > __iot->device)
343 p = &(*p)->rb_right;
344 else if (sector < __iot->sector)
7997c5b0
JA
345 p = &(*p)->rb_left;
346 else if (sector > __iot->sector)
347 p = &(*p)->rb_right;
348 else
349 return __iot;
350 }
351
352 return NULL;
353}
354
e7c9f3ff 355static struct io_track *find_track(__u32 pid, dev_t device, __u64 sector)
7997c5b0 356{
916b5501 357 struct io_track *iot;
7997c5b0 358
e7c9f3ff 359 iot = __find_track(device, sector);
7997c5b0
JA
360 if (!iot) {
361 iot = malloc(sizeof(*iot));
50adc0ba 362 iot->pid = pid;
e7c9f3ff 363 iot->device = device;
7997c5b0
JA
364 iot->sector = sector;
365 track_rb_insert(iot);
366 }
367
368 return iot;
369}
370
a01516de 371static void log_track_frontmerge(struct blk_io_trace *t)
2e3e8ded
JA
372{
373 struct io_track *iot;
374
375 if (!track_ios)
376 return;
2e3e8ded 377
a01516de 378 iot = __find_track(t->device, t->sector + (t->bytes >> 9));
cb2a1a62
JA
379 if (!iot) {
380 fprintf(stderr, "failed to find mergeable event\n");
381 return;
2e3e8ded 382 }
cb2a1a62
JA
383
384 rb_erase(&iot->rb_node, &rb_track_root);
a01516de 385 iot->sector -= t->bytes >> 9;
cb2a1a62 386 track_rb_insert(iot);
2e3e8ded
JA
387}
388
95c15013 389static void log_track_getrq(struct blk_io_trace *t)
2e3e8ded
JA
390{
391 struct io_track *iot;
392
393 if (!track_ios)
394 return;
395
e7c9f3ff 396 iot = find_track(t->pid, t->device, t->sector);
95c15013
JA
397 iot->allocation_time = t->time;
398}
399
400
401/*
b6076a9b 402 * return time between rq allocation and insertion
95c15013 403 */
b6076a9b 404static unsigned long long log_track_insert(struct blk_io_trace *t)
95c15013 405{
50adc0ba 406 unsigned long long elapsed;
95c15013
JA
407 struct io_track *iot;
408
409 if (!track_ios)
410 return -1;
411
e7c9f3ff 412 iot = find_track(t->pid, t->device, t->sector);
2e3e8ded 413 iot->queue_time = t->time;
50adc0ba
JA
414 elapsed = iot->queue_time - iot->allocation_time;
415
416 if (per_process_stats) {
417 struct per_process_info *ppi = find_process_by_pid(iot->pid);
b9d40d6f 418 int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
50adc0ba 419
b9d40d6f
JA
420 if (ppi && elapsed > ppi->longest_allocation_wait[w])
421 ppi->longest_allocation_wait[w] = elapsed;
50adc0ba
JA
422 }
423
424 return elapsed;
2e3e8ded
JA
425}
426
427/*
428 * return time between queue and issue
429 */
430static unsigned long long log_track_issue(struct blk_io_trace *t)
431{
50adc0ba 432 unsigned long long elapsed;
2e3e8ded
JA
433 struct io_track *iot;
434
435 if (!track_ios)
436 return -1;
437 if ((t->action & BLK_TC_ACT(BLK_TC_FS)) == 0)
438 return -1;
439
e7c9f3ff 440 iot = __find_track(t->device, t->sector);
cb2a1a62
JA
441 if (!iot) {
442 fprintf(stderr, "failed to find issue event\n");
2e3e8ded 443 return -1;
cb2a1a62 444 }
2e3e8ded
JA
445
446 iot->dispatch_time = t->time;
50adc0ba
JA
447 elapsed = iot->dispatch_time - iot->queue_time;
448
449 if (per_process_stats) {
450 struct per_process_info *ppi = find_process_by_pid(iot->pid);
b9d40d6f 451 int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
50adc0ba 452
b9d40d6f
JA
453 if (ppi && elapsed > ppi->longest_dispatch_wait[w])
454 ppi->longest_dispatch_wait[w] = elapsed;
50adc0ba
JA
455 }
456
457 return elapsed;
2e3e8ded
JA
458}
459
460/*
461 * return time between dispatch and complete
462 */
463static unsigned long long log_track_complete(struct blk_io_trace *t)
464{
465 unsigned long long elapsed;
466 struct io_track *iot;
467
468 if (!track_ios)
469 return -1;
470 if ((t->action & BLK_TC_ACT(BLK_TC_FS)) == 0)
471 return -1;
472
e7c9f3ff 473 iot = __find_track(t->device, t->sector);
cb2a1a62
JA
474 if (!iot) {
475 fprintf(stderr, "failed to find complete event\n");
2e3e8ded 476 return -1;
cb2a1a62 477 }
2e3e8ded
JA
478
479 iot->completion_time = t->time;
480 elapsed = iot->completion_time - iot->dispatch_time;
481
50adc0ba
JA
482 if (per_process_stats) {
483 struct per_process_info *ppi = find_process_by_pid(iot->pid);
b9d40d6f 484 int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
50adc0ba 485
b9d40d6f
JA
486 if (ppi && elapsed > ppi->longest_completion_wait[w])
487 ppi->longest_completion_wait[w] = elapsed;
50adc0ba
JA
488 }
489
2e3e8ded
JA
490 /*
491 * kill the trace, we don't need it after completion
492 */
493 rb_erase(&iot->rb_node, &rb_track_root);
494 free(iot);
495
496 return elapsed;
497}
498
499
152f6476
JA
500static struct io_stats *find_process_io_stats(__u32 pid, char *name)
501{
502 struct per_process_info *ppi = find_process_by_pid(pid);
503
504 if (!ppi) {
505 ppi = malloc(sizeof(*ppi));
506 memset(ppi, 0, sizeof(*ppi));
507 strncpy(ppi->name, name, sizeof(ppi->name));
508 ppi->pid = pid;
509 add_process_to_hash(ppi);
510 add_process_to_list(ppi);
511 }
512
513 return &ppi->io_stats;
514}
515
e7c9f3ff
NS
516
517static void resize_cpu_info(struct per_dev_info *pdi, int cpu)
a718bd37 518{
e7c9f3ff
NS
519 struct per_cpu_info *cpus = pdi->cpus;
520 int ncpus = pdi->ncpus;
521 int new_count = cpu + 1;
522 int new_space, size;
a718bd37
NS
523 char *new_start;
524
e7c9f3ff
NS
525 size = new_count * sizeof(struct per_cpu_info);
526 cpus = realloc(cpus, size);
527 if (!cpus) {
528 char name[20];
529 fprintf(stderr, "Out of memory, CPU info for device %s (%d)\n",
530 get_dev_name(pdi, name, sizeof(name)), size);
a718bd37
NS
531 exit(1);
532 }
533
e7c9f3ff
NS
534 new_start = (char *)cpus + (ncpus * sizeof(struct per_cpu_info));
535 new_space = (new_count - ncpus) * sizeof(struct per_cpu_info);
a718bd37 536 memset(new_start, 0, new_space);
e7c9f3ff
NS
537
538 pdi->ncpus = new_count;
539 pdi->cpus = cpus;
540}
cb2a1a62 541
e7c9f3ff
NS
542static struct per_cpu_info *get_cpu_info(struct per_dev_info *pdi, int cpu)
543{
cb2a1a62
JA
544 struct per_cpu_info *pci;
545
e7c9f3ff
NS
546 if (cpu >= pdi->ncpus)
547 resize_cpu_info(pdi, cpu);
cb2a1a62
JA
548
549 pci = &pdi->cpus[cpu];
550 pci->cpu = cpu;
551 return pci;
a718bd37
NS
552}
553
e7c9f3ff
NS
554
555static int resize_devices(char *name)
a718bd37 556{
e7c9f3ff 557 int size = (ndevices + 1) * sizeof(struct per_dev_info);
c499bf38 558
e7c9f3ff
NS
559 devices = realloc(devices, size);
560 if (!devices) {
561 fprintf(stderr, "Out of memory, device %s (%d)\n", name, size);
562 return 1;
563 }
564 memset(&devices[ndevices], 0, sizeof(struct per_dev_info));
565 devices[ndevices].name = name;
566 ndevices++;
567 return 0;
568}
a718bd37 569
cb2a1a62 570static struct per_dev_info *get_dev_info(dev_t id)
e7c9f3ff 571{
cb2a1a62 572 struct per_dev_info *pdi;
e7c9f3ff 573 int i;
c499bf38 574
e7c9f3ff
NS
575 for (i = 0; i < ndevices; i++)
576 if (devices[i].id == id)
577 return &devices[i];
cb2a1a62 578
e7c9f3ff
NS
579 if (resize_devices(NULL) != 0)
580 return NULL;
cb2a1a62
JA
581
582 pdi = &devices[ndevices - 1];
583 pdi->id = id;
584 return pdi;
a718bd37
NS
585}
586
e7c9f3ff
NS
587static char *get_dev_name(struct per_dev_info *pdi, char *buffer, int size)
588{
589 if (pdi->name)
590 snprintf(buffer, size, "%s", pdi->name);
591 else
592 snprintf(buffer, size, "%d,%d", MAJOR(pdi->id), MINOR(pdi->id));
593 return buffer;
594}
595
e7c9f3ff 596static void check_time(struct per_dev_info *pdi, struct blk_io_trace *bit)
cfab07eb
AB
597{
598 unsigned long long this = bit->time;
e7c9f3ff 599 unsigned long long last = pdi->last_reported_time;
cfab07eb 600
e7c9f3ff
NS
601 pdi->backwards = (this < last) ? 'B' : ' ';
602 pdi->last_reported_time = this;
cfab07eb
AB
603}
604
152f6476
JA
605static inline void __account_m(struct io_stats *ios, struct blk_io_trace *t,
606 int rw)
d0ca268b
JA
607{
608 if (rw) {
152f6476
JA
609 ios->mwrites++;
610 ios->qwrite_kb += t->bytes >> 10;
d0ca268b 611 } else {
152f6476
JA
612 ios->mreads++;
613 ios->qread_kb += t->bytes >> 10;
614 }
615}
616
617static inline void account_m(struct blk_io_trace *t, struct per_cpu_info *pci,
618 int rw)
619{
620 __account_m(&pci->io_stats, t, rw);
621
622 if (per_process_stats) {
623 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
624
625 __account_m(ios, t, rw);
d0ca268b
JA
626 }
627}
628
b6076a9b
JA
629static inline void __account_queue(struct io_stats *ios, struct blk_io_trace *t,
630 int rw)
d0ca268b
JA
631{
632 if (rw) {
152f6476
JA
633 ios->qwrites++;
634 ios->qwrite_kb += t->bytes >> 10;
d0ca268b 635 } else {
152f6476
JA
636 ios->qreads++;
637 ios->qread_kb += t->bytes >> 10;
638 }
639}
640
b6076a9b
JA
641static inline void account_queue(struct blk_io_trace *t,
642 struct per_cpu_info *pci, int rw)
152f6476 643{
b6076a9b 644 __account_queue(&pci->io_stats, t, rw);
152f6476
JA
645
646 if (per_process_stats) {
647 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
648
b6076a9b 649 __account_queue(ios, t, rw);
d0ca268b
JA
650 }
651}
652
152f6476 653static inline void __account_c(struct io_stats *ios, int rw, unsigned int bytes)
d0ca268b
JA
654{
655 if (rw) {
152f6476
JA
656 ios->cwrites++;
657 ios->cwrite_kb += bytes >> 10;
d0ca268b 658 } else {
152f6476
JA
659 ios->creads++;
660 ios->cread_kb += bytes >> 10;
661 }
662}
663
664static inline void account_c(struct blk_io_trace *t, struct per_cpu_info *pci,
665 int rw, int bytes)
666{
667 __account_c(&pci->io_stats, rw, bytes);
668
669 if (per_process_stats) {
670 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
671
672 __account_c(ios, rw, bytes);
d0ca268b
JA
673 }
674}
675
b6076a9b
JA
676static inline void __account_issue(struct io_stats *ios, int rw,
677 unsigned int bytes)
afd2d7ad 678{
679 if (rw) {
152f6476
JA
680 ios->iwrites++;
681 ios->iwrite_kb += bytes >> 10;
afd2d7ad 682 } else {
152f6476
JA
683 ios->ireads++;
684 ios->iread_kb += bytes >> 10;
afd2d7ad 685 }
686}
687
b6076a9b
JA
688static inline void account_issue(struct blk_io_trace *t,
689 struct per_cpu_info *pci, int rw)
d0ca268b 690{
b6076a9b 691 __account_issue(&pci->io_stats, rw, t->bytes);
152f6476
JA
692
693 if (per_process_stats) {
694 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
d5396421 695
b6076a9b 696 __account_issue(ios, rw, t->bytes);
152f6476
JA
697 }
698}
699
06639b27
JA
700static inline void __account_unplug(struct io_stats *ios, int timer)
701{
702 if (timer)
703 ios->timer_unplugs++;
704 else
705 ios->io_unplugs++;
706}
707
708static inline void account_unplug(struct blk_io_trace *t,
709 struct per_cpu_info *pci, int timer)
710{
711 __account_unplug(&pci->io_stats, timer);
712
713 if (per_process_stats) {
714 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
715
716 __account_unplug(ios, timer);
717 }
718}
719
ab197ca7
AB
720#define VALID_SPECS "BCDFGMPQRSTU"
721static char *override_format[256];
722static inline int valid_spec(int spec)
152f6476 723{
ab197ca7 724 return strchr(VALID_SPECS, spec) != NULL;
d0ca268b
JA
725}
726
ab197ca7 727static void set_all_format_specs(char *optarg)
d0ca268b 728{
ab197ca7 729 char *p;
d0ca268b 730
ab197ca7
AB
731 for (p = VALID_SPECS; *p; p++)
732 if (override_format[(int)(*p)] == NULL)
733 override_format[(int)(*p)] = strdup(optarg);
734}
d0ca268b 735
ab197ca7
AB
736static int add_format_spec(char *optarg)
737{
738 int spec = optarg[0];
d0ca268b 739
ab197ca7
AB
740 if (!valid_spec(spec)) {
741 fprintf(stderr,"Bad format specifier %c\n", spec);
742 return 1;
743 }
744 if (optarg[1] != ',') {
745 fprintf(stderr,"Bad format specifier - need ',' %s\n", optarg);
746 return 1;
747 }
748 optarg += 2;
749 if (*optarg == '\0') {
750 fprintf(stderr,"Bad format specifier - need fmt %s\n", optarg);
751 return 1;
752 }
753
754 /*
755 * Set both merges (front and back)
756 */
757 if (spec == 'M') {
758 override_format['B'] = strdup(optarg);
759 override_format['M'] = strdup(optarg);
760 } else
761 override_format[spec] = strdup(optarg);
d0ca268b 762
ab197ca7 763 return 0;
d0ca268b
JA
764}
765
ab197ca7
AB
766static void print_field(char *act, struct per_cpu_info *pci,
767 struct blk_io_trace *t, unsigned long long elapsed,
768 int pdu_len, unsigned char *pdu_buf, char field,
769 int minus, int has_w, int width)
d0ca268b 770{
ab197ca7 771 char format[64];
2e3e8ded 772
ab197ca7
AB
773 if (has_w) {
774 if (minus)
775 sprintf(format, "%%-%d", width);
776 else
777 sprintf(format, "%%%d", width);
778 } else
779 sprintf(format, "%%");
780
781 switch (field) {
782 case 'a':
783 fprintf(ofp, strcat(format, "s"), act);
784 break;
785 case 'c':
786 fprintf(ofp, strcat(format, "d"), pci->cpu);
787 break;
788 case 'C':
789 fprintf(ofp, strcat(format, "s"), t->comm);
790 break;
791 case 'd': {
792 char rwbs[4];
793 int i = 0;
794 int w = t->action & BLK_TC_ACT(BLK_TC_WRITE);
795 int b = t->action & BLK_TC_ACT(BLK_TC_BARRIER);
796 int s = t->action & BLK_TC_ACT(BLK_TC_SYNC);
797 if (w)
798 rwbs[i++] = 'W';
799 else
800 rwbs[i++] = 'R';
801 if (b)
802 rwbs[i++] = 'B';
803 if (s)
804 rwbs[i++] = 'S';
805 rwbs[i] = '\0';
806 fprintf(ofp, strcat(format, "s"), rwbs);
807 break;
808 }
809 case 'D': /* format width ignored */
810 fprintf(ofp,"%3d,%-3d", MAJOR(t->device), MINOR(t->device));
811 break;
812 case 'e':
813 fprintf(ofp, strcat(format, "d"), t->error);
814 break;
815 case 'M':
816 fprintf(ofp, strcat(format, "d"), MAJOR(t->device));
817 break;
818 case 'm':
819 fprintf(ofp, strcat(format, "d"), MINOR(t->device));
820 break;
821 case 'n':
822 fprintf(ofp, strcat(format, "u"), t->bytes >> 9);
823 break;
824 case 'p':
825 fprintf(ofp, strcat(format, "u"), t->pid);
826 break;
827 case 'P': /* format width ignored */
828 if ((pdu_len > 0) && (pdu_buf != NULL)) {
829 int i;
830 unsigned char *p = pdu_buf;
831 for (i = 0; i < pdu_len; i++)
832 fprintf(ofp, "%02x ", *p++);
833 }
834 break;
835 case 's':
836 fprintf(ofp, strcat(format, "ld"), t->sequence);
837 break;
838 case 'S':
839 fprintf(ofp, strcat(format, "lu"), t->sector);
840 break;
841 case 't':
842 sprintf(format, "%%0%dlu", has_w ? width : 9);
843 fprintf(ofp, format, NANO_SECONDS(t->time));
844 break;
845 case 'T':
846 fprintf(ofp, strcat(format, "d"), SECONDS(t->time));
847 break;
848 case 'u':
849 if (elapsed == -1ULL) {
850 fprintf(stderr, "Expecting elapsed value\n");
851 exit(1);
852 }
853 fprintf(ofp, strcat(format, "llu"), elapsed / 1000);
854 break;
855 case 'U': {
856 __u64 *depth = (__u64 *) ((char *) t + sizeof(*t));
857 fprintf(ofp, strcat(format, "u"),
858 (unsigned int) be64_to_cpu(*depth));
859 break;
860 }
861 default:
862 fprintf(ofp,strcat(format, "c"), field);
863 break;
864 }
865}
2e3e8ded 866
ab197ca7
AB
867static char *parse_field(char *act, struct per_cpu_info *pci,
868 struct blk_io_trace *t, unsigned long long elapsed,
869 int pdu_len, unsigned char *pdu_buf,
870 char *master_format)
871{
872 int minus = 0;
873 int has_w = 0;
874 int width = 0;
875 char *p = master_format;
876
877 if (*p == '-') {
878 minus = 1;
879 p++;
2e3e8ded 880 }
ab197ca7
AB
881 if (isdigit(*p)) {
882 has_w = 1;
883 do {
884 width = (width * 10) + (*p++ - '0');
885 } while ((*p) && (isdigit(*p)));
886 }
887 if (*p) {
888 print_field(act, pci, t, elapsed, pdu_len, pdu_buf, *p++,
889 minus, has_w, width);
890 }
891 return p;
d0ca268b
JA
892}
893
ab197ca7
AB
894static char *fmt_select(int fmt_spec, struct blk_io_trace *t,
895 unsigned long long elapsed)
d0ca268b 896{
ab197ca7
AB
897 char *fmt;
898 char scratch_format[1024];
2e3e8ded 899
ab197ca7
AB
900 if (override_format[fmt_spec] != NULL)
901 return override_format[fmt_spec];
95c15013 902
ab197ca7
AB
903 switch (fmt_spec) {
904 case 'C': /* Complete */
905 if (t->action & BLK_TC_ACT(BLK_TC_PC)) {
906 strcpy(scratch_format, HEADER);
907 strcat(scratch_format, "%P");
908 } else {
909 strcpy(scratch_format, HEADER "%S + %n ");
910 if (elapsed != -1ULL)
911 strcat(scratch_format, "(%8u) ");
912 }
913 strcat(scratch_format, "[%e]\n");
914 fmt = scratch_format;
915 break;
916
917 case 'D': /* Issue */
918 if (t->action & BLK_TC_ACT(BLK_TC_PC)) {
919 strcpy(scratch_format, HEADER);
920 strcat(scratch_format, "%P");
921 } else {
922 strcpy(scratch_format, HEADER "%S + %n ");
923 if (elapsed != -1ULL)
924 strcat(scratch_format, "(%8u) ");
925 }
926 strcat(scratch_format,"[%C]\n");
927 fmt = scratch_format;
928 break;
929
b6076a9b
JA
930 case 'I': /* Insert */
931 if (t->action & BLK_TC_ACT(BLK_TC_PC)) {
932 strcpy(scratch_format, HEADER);
933 strcat(scratch_format, "%P");
934 } else {
935 strcpy(scratch_format, HEADER "%S + %n ");
936 if (elapsed != -1ULL)
937 strcat(scratch_format, "(%8u) ");
938 }
939 strcat(scratch_format,"[%C]\n");
940 fmt = scratch_format;
941 break;
942
ab197ca7
AB
943 case 'Q': /* Queue */
944 strcpy(scratch_format, HEADER "%S + %n ");
945 if (elapsed != -1ULL)
946 strcat(scratch_format, "(%8u) ");
947 strcat(scratch_format,"[%C]\n");
948 fmt = scratch_format;
949 break;
950
951 case 'B': /* Back merge */
952 case 'F': /* Front merge */
953 case 'M': /* Front or back merge */
954 fmt = HEADER "%S + %n [%C]\n";
955 break;
956
957 case 'P': /* Plug */
958 fmt = HEADER "[%C]\n";
959 break;
960
961 case 'G': /* Get request */
962 case 'S': /* Sleep request */
963 fmt = HEADER "%S + %n [%C]\n";
964 break;
965
966 case 'U': /* Unplug IO */
967 case 'T': /* Unplug timer */
968 fmt = HEADER "[%C] %U\n";
969 break;
970
971 default:
972 fprintf(stderr,"FATAL: Invalid format spec %c\n", fmt_spec);
973 exit(1);
974 /*NOTREACHED*/
95c15013 975 }
ab197ca7
AB
976
977 return fmt;
d0ca268b
JA
978}
979
ab197ca7
AB
980static void process_fmt(char *act, struct per_cpu_info *pci,
981 struct blk_io_trace *t, unsigned long long elapsed,
982 int pdu_len, unsigned char *pdu_buf)
d0ca268b 983{
ab197ca7 984 char *p = fmt_select(act[0], t, elapsed);
2e3e8ded 985
ab197ca7
AB
986 while (*p) {
987 switch (*p) {
988 case '%': /* Field specifier */
989 p++;
990 if (*p == '%')
991 fprintf(ofp, "%c", *p++);
992 else if (!*p)
993 fprintf(ofp, "%c", '%');
994 else
995 p = parse_field(act, pci, t, elapsed,
996 pdu_len, pdu_buf, p);
997 break;
998 case '\\': { /* escape */
999 switch (p[1]) {
1000 case 'b': fprintf(ofp, "\b"); break;
1001 case 'n': fprintf(ofp, "\n"); break;
1002 case 'r': fprintf(ofp, "\r"); break;
1003 case 't': fprintf(ofp, "\t"); break;
1004 default:
1005 fprintf(stderr,
1006 "Invalid escape char in format %c\n",
1007 p[1]);
1008 exit(1);
1009 /*NOTREACHED*/
1010 }
1011 p += 2;
1012 break;
1013 }
1014 default:
1015 fprintf(ofp, "%c", *p++);
1016 break;
1017 }
2e3e8ded 1018 }
ab197ca7
AB
1019}
1020
1021static void log_complete(struct per_cpu_info *pci, struct blk_io_trace *t,
1022 char *act)
1023{
1024 process_fmt(act, pci, t, log_track_complete(t), 0, NULL);
1025}
1026
b6076a9b
JA
1027static void log_insert(struct per_cpu_info *pci, struct blk_io_trace *t,
1028 char *act)
1029{
1030 process_fmt(act, pci, t, log_track_insert(t), 0, NULL);
1031}
1032
ab197ca7
AB
1033static void log_queue(struct per_cpu_info *pci, struct blk_io_trace *t,
1034 char *act)
1035{
b6076a9b 1036 process_fmt(act, pci, t, -1, 0, NULL);
ab197ca7 1037}
2e3e8ded 1038
ab197ca7
AB
1039static void log_issue(struct per_cpu_info *pci, struct blk_io_trace *t,
1040 char *act)
1041{
1042 process_fmt(act, pci, t, log_track_issue(t), 0, NULL);
d0ca268b
JA
1043}
1044
d5396421 1045static void log_merge(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 1046 char *act)
d0ca268b 1047{
a01516de
JA
1048 if (act[0] == 'F')
1049 log_track_frontmerge(t);
2e3e8ded 1050
ab197ca7 1051 process_fmt(act, pci, t, -1ULL, 0, NULL);
d0ca268b
JA
1052}
1053
dfe34da1 1054static void log_action(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 1055 char *act)
dfe34da1 1056{
ab197ca7 1057 process_fmt(act, pci, t, -1ULL, 0, NULL);
dfe34da1
JA
1058}
1059
d5396421 1060static void log_generic(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 1061 char *act)
d0ca268b 1062{
ab197ca7 1063 process_fmt(act, pci, t, -1ULL, 0, NULL);
d0ca268b
JA
1064}
1065
ab197ca7 1066static void log_unplug(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 1067 char *act)
67e14fdc 1068{
ab197ca7 1069 process_fmt(act, pci, t, -1ULL, 0, NULL);
67e14fdc
JA
1070}
1071
ab197ca7 1072static void log_pc(struct per_cpu_info *pci, struct blk_io_trace *t, char *act)
d0ca268b 1073{
ab197ca7 1074 unsigned char *buf = (unsigned char *) t + sizeof(*t);
d0ca268b 1075
ab197ca7 1076 process_fmt(act, pci, t, -1ULL, t->pdu_len, buf);
d0ca268b
JA
1077}
1078
ff3a732c 1079static void dump_trace_pc(struct blk_io_trace *t, struct per_cpu_info *pci)
d0ca268b
JA
1080{
1081 switch (t->action & 0xffff) {
1082 case __BLK_TA_QUEUE:
3639a11e 1083 log_generic(pci, t, "Q");
d0ca268b
JA
1084 break;
1085 case __BLK_TA_GETRQ:
3639a11e 1086 log_generic(pci, t, "G");
d0ca268b
JA
1087 break;
1088 case __BLK_TA_SLEEPRQ:
3639a11e 1089 log_generic(pci, t, "S");
d0ca268b
JA
1090 break;
1091 case __BLK_TA_REQUEUE:
3639a11e 1092 log_generic(pci, t, "R");
d0ca268b
JA
1093 break;
1094 case __BLK_TA_ISSUE:
ab197ca7 1095 log_pc(pci, t, "D");
d0ca268b
JA
1096 break;
1097 case __BLK_TA_COMPLETE:
3639a11e 1098 log_pc(pci, t, "C");
d0ca268b
JA
1099 break;
1100 default:
1101 fprintf(stderr, "Bad pc action %x\n", t->action);
87b72777 1102 break;
d0ca268b 1103 }
d0ca268b
JA
1104}
1105
d5396421 1106static void dump_trace_fs(struct blk_io_trace *t, struct per_cpu_info *pci)
d0ca268b
JA
1107{
1108 int w = t->action & BLK_TC_ACT(BLK_TC_WRITE);
7997c5b0 1109 int act = t->action & 0xffff;
d0ca268b 1110
7997c5b0 1111 switch (act) {
d0ca268b 1112 case __BLK_TA_QUEUE:
b6076a9b 1113 account_queue(t, pci, w);
3639a11e 1114 log_queue(pci, t, "Q");
d0ca268b 1115 break;
b6076a9b
JA
1116 case __BLK_TA_INSERT:
1117 log_insert(pci, t, "I");
1118 break;
d0ca268b 1119 case __BLK_TA_BACKMERGE:
152f6476 1120 account_m(t, pci, w);
3639a11e 1121 log_merge(pci, t, "M");
d0ca268b
JA
1122 break;
1123 case __BLK_TA_FRONTMERGE:
152f6476 1124 account_m(t, pci, w);
3639a11e 1125 log_merge(pci, t, "F");
d0ca268b
JA
1126 break;
1127 case __BLK_TA_GETRQ:
95c15013 1128 log_track_getrq(t);
3639a11e 1129 log_generic(pci, t, "G");
d0ca268b
JA
1130 break;
1131 case __BLK_TA_SLEEPRQ:
3639a11e 1132 log_generic(pci, t, "S");
d0ca268b
JA
1133 break;
1134 case __BLK_TA_REQUEUE:
152f6476 1135 account_c(t, pci, w, -t->bytes);
3639a11e 1136 log_queue(pci, t, "R");
d0ca268b
JA
1137 break;
1138 case __BLK_TA_ISSUE:
b6076a9b 1139 account_issue(t, pci, w);
3639a11e 1140 log_issue(pci, t, "D");
d0ca268b
JA
1141 break;
1142 case __BLK_TA_COMPLETE:
152f6476 1143 account_c(t, pci, w, t->bytes);
3639a11e 1144 log_complete(pci, t, "C");
d0ca268b 1145 break;
88b1a526 1146 case __BLK_TA_PLUG:
3639a11e 1147 log_action(pci, t, "P");
88b1a526 1148 break;
3639a11e 1149 case __BLK_TA_UNPLUG_IO:
06639b27 1150 account_unplug(t, pci, 0);
3639a11e
JA
1151 log_unplug(pci, t, "U");
1152 break;
1153 case __BLK_TA_UNPLUG_TIMER:
06639b27 1154 account_unplug(t, pci, 1);
3639a11e 1155 log_unplug(pci, t, "UT");
88b1a526 1156 break;
d0ca268b
JA
1157 default:
1158 fprintf(stderr, "Bad fs action %x\n", t->action);
1f79c4a0 1159 break;
d0ca268b 1160 }
d0ca268b
JA
1161}
1162
ff3a732c
JA
1163static void dump_trace(struct blk_io_trace *t, struct per_cpu_info *pci,
1164 struct per_dev_info *pdi)
d0ca268b
JA
1165{
1166 if (t->action & BLK_TC_ACT(BLK_TC_PC))
ff3a732c 1167 dump_trace_pc(t, pci);
d0ca268b 1168 else
d5396421 1169 dump_trace_fs(t, pci);
87b72777 1170
e7c9f3ff 1171 pdi->events++;
d0ca268b
JA
1172}
1173
152f6476 1174static void dump_io_stats(struct io_stats *ios, char *msg)
5c017e4b 1175{
152f6476
JA
1176 fprintf(ofp, "%s\n", msg);
1177
1178 fprintf(ofp, " Reads Queued: %'8lu, %'8LuKiB\t", ios->qreads, ios->qread_kb);
1179 fprintf(ofp, " Writes Queued: %'8lu, %'8LuKiB\n", ios->qwrites,ios->qwrite_kb);
0a6b8fc4 1180
152f6476
JA
1181 fprintf(ofp, " Read Dispatches: %'8lu, %'8LuKiB\t", ios->ireads, ios->iread_kb);
1182 fprintf(ofp, " Write Dispatches: %'8lu, %'8LuKiB\n", ios->iwrites,ios->iwrite_kb);
1183 fprintf(ofp, " Reads Completed: %'8lu, %'8LuKiB\t", ios->creads, ios->cread_kb);
1184 fprintf(ofp, " Writes Completed: %'8lu, %'8LuKiB\n", ios->cwrites,ios->cwrite_kb);
1185 fprintf(ofp, " Read Merges: %'8lu%8c\t", ios->mreads, ' ');
152f6476 1186 fprintf(ofp, " Write Merges: %'8lu\n", ios->mwrites);
06639b27
JA
1187 fprintf(ofp, " IO unplugs: %'8lu%8c\t", ios->io_unplugs, ' ');
1188 fprintf(ofp, " Timer unplugs: %'8lu\n", ios->timer_unplugs);
5c017e4b
JA
1189}
1190
50adc0ba
JA
1191static void dump_wait_stats(struct per_process_info *ppi)
1192{
b9d40d6f
JA
1193 unsigned long rawait = ppi->longest_allocation_wait[0] / 1000;
1194 unsigned long rdwait = ppi->longest_dispatch_wait[0] / 1000;
1195 unsigned long rcwait = ppi->longest_completion_wait[0] / 1000;
1196 unsigned long wawait = ppi->longest_allocation_wait[1] / 1000;
1197 unsigned long wdwait = ppi->longest_dispatch_wait[1] / 1000;
1198 unsigned long wcwait = ppi->longest_completion_wait[1] / 1000;
1199
1200 fprintf(ofp, " Allocation wait: %'8lu%8c\t", rawait, ' ');
1201 fprintf(ofp, " Allocation wait: %'8lu\n", wawait);
1202 fprintf(ofp, " Dispatch wait: %'8lu%8c\t", rdwait, ' ');
1203 fprintf(ofp, " Dispatch wait: %'8lu\n", wdwait);
1204 fprintf(ofp, " Completion wait: %'8lu%8c\t", rcwait, ' ');
1205 fprintf(ofp, " Completion wait: %'8lu\n", wcwait);
50adc0ba
JA
1206}
1207
886ecf0e
JA
1208static int ppi_name_compare(const void *p1, const void *p2)
1209{
1210 struct per_process_info *ppi1 = *((struct per_process_info **) p1);
1211 struct per_process_info *ppi2 = *((struct per_process_info **) p2);
1212 int res;
1213
1214 res = strverscmp(ppi1->name, ppi2->name);
1215 if (!res)
06e6f286 1216 res = ppi1->pid > ppi2->pid;
886ecf0e
JA
1217
1218 return res;
1219}
1220
1221static void sort_process_list(void)
1222{
1223 struct per_process_info **ppis;
1224 struct per_process_info *ppi;
1225 int i = 0;
1226
1227 ppis = malloc(ppi_list_entries * sizeof(struct per_process_info *));
1228
1229 ppi = ppi_list;
1230 while (ppi) {
06e6f286 1231 ppis[i++] = ppi;
886ecf0e
JA
1232 ppi = ppi->list_next;
1233 }
1234
06e6f286 1235 qsort(ppis, ppi_list_entries, sizeof(ppi), ppi_name_compare);
886ecf0e
JA
1236
1237 i = ppi_list_entries - 1;
1238 ppi_list = NULL;
1239 while (i >= 0) {
1240 ppi = ppis[i];
1241
1242 ppi->list_next = ppi_list;
1243 ppi_list = ppi;
1244 i--;
1245 }
50c38702
JA
1246
1247 free(ppis);
886ecf0e
JA
1248}
1249
152f6476
JA
1250static void show_process_stats(void)
1251{
1252 struct per_process_info *ppi;
1253
886ecf0e
JA
1254 sort_process_list();
1255
152f6476
JA
1256 ppi = ppi_list;
1257 while (ppi) {
ce8b6b4f
JA
1258 char name[64];
1259
1260 snprintf(name, sizeof(name)-1, "%s (%u)", ppi->name, ppi->pid);
1261 dump_io_stats(&ppi->io_stats, name);
50adc0ba 1262 dump_wait_stats(ppi);
152f6476
JA
1263 ppi = ppi->list_next;
1264 }
1265
1266 fprintf(ofp, "\n");
1267}
1268
e7c9f3ff 1269static void show_device_and_cpu_stats(void)
d0ca268b 1270{
e7c9f3ff
NS
1271 struct per_dev_info *pdi;
1272 struct per_cpu_info *pci;
1273 struct io_stats total, *ios;
1274 int i, j, pci_events;
1275 char line[3 + 8/*cpu*/ + 2 + 32/*dev*/ + 3];
1276 char name[32];
1277
1278 for (pdi = devices, i = 0; i < ndevices; i++, pdi++) {
1279
1280 memset(&total, 0, sizeof(total));
1281 pci_events = 0;
1282
1283 if (i > 0)
1284 fprintf(ofp, "\n");
1285
1286 for (pci = pdi->cpus, j = 0; j < pdi->ncpus; j++, pci++) {
1287 if (!pci->nelems)
1288 continue;
1289
1290 ios = &pci->io_stats;
1291 total.qreads += ios->qreads;
1292 total.qwrites += ios->qwrites;
1293 total.creads += ios->creads;
1294 total.cwrites += ios->cwrites;
1295 total.mreads += ios->mreads;
1296 total.mwrites += ios->mwrites;
1297 total.ireads += ios->ireads;
1298 total.iwrites += ios->iwrites;
1299 total.qread_kb += ios->qread_kb;
1300 total.qwrite_kb += ios->qwrite_kb;
1301 total.cread_kb += ios->cread_kb;
1302 total.cwrite_kb += ios->cwrite_kb;
1303 total.iread_kb += ios->iread_kb;
1304 total.iwrite_kb += ios->iwrite_kb;
06639b27
JA
1305 total.timer_unplugs += ios->timer_unplugs;
1306 total.io_unplugs += ios->io_unplugs;
e7c9f3ff
NS
1307
1308 snprintf(line, sizeof(line) - 1, "CPU%d (%s):",
1309 j, get_dev_name(pdi, name, sizeof(name)));
1310 dump_io_stats(ios, line);
1311 pci_events++;
1312 }
5c017e4b 1313
e7c9f3ff
NS
1314 if (pci_events > 1) {
1315 fprintf(ofp, "\n");
1316 snprintf(line, sizeof(line) - 1, "Total (%s):",
1317 get_dev_name(pdi, name, sizeof(name)));
1318 dump_io_stats(&total, line);
1319 }
d0ca268b 1320
0aa8a62d
JA
1321 fprintf(ofp, "\nEvents (%s): %'Lu entries, %'lu skips\n",
1322 get_dev_name(pdi, line, sizeof(line)), pdi->events,
1323 pdi->skips);
e7c9f3ff 1324 }
d0ca268b
JA
1325}
1326
cb2a1a62 1327static struct blk_io_trace *find_trace(void *p, unsigned long offset)
2ff323b0 1328{
cb2a1a62 1329 unsigned long max_offset = offset;
2ff323b0
JA
1330 unsigned long off;
1331 struct blk_io_trace *bit;
1332 __u32 magic;
1333
1334 for (off = 0; off < max_offset; off++) {
1335 bit = p + off;
1336
1337 magic = be32_to_cpu(bit->magic);
1338 if ((magic & 0xffffff00) == BLK_IO_TRACE_MAGIC)
1339 return bit;
1340 }
1341
1342 return NULL;
1343}
1344
cb2a1a62 1345static int sort_entries(void)
8fc0abbc 1346{
e7c9f3ff 1347 struct per_dev_info *pdi;
412819ce 1348 struct per_cpu_info *pci;
8fc0abbc
JA
1349 struct blk_io_trace *bit;
1350 struct trace *t;
cb2a1a62 1351 int nr = 0;
8fc0abbc 1352
cb2a1a62 1353 while ((t = trace_list) != NULL) {
412819ce 1354
cb2a1a62
JA
1355 trace_list = t->next;
1356 bit = t->bit;
6fe4709e 1357
8fc0abbc
JA
1358 memset(&t->rb_node, 0, sizeof(t->rb_node));
1359
cb2a1a62 1360 if (verify_trace(bit))
66fa7233 1361 break;
cb2a1a62
JA
1362 if (trace_rb_insert(t))
1363 return -1;
66fa7233 1364
cb2a1a62
JA
1365 pdi = get_dev_info(bit->device);
1366 pci = get_cpu_info(pdi, bit->cpu);
412819ce
JA
1367 pci->nelems++;
1368
cb2a1a62 1369 nr++;
6fe4709e 1370 }
8fc0abbc 1371
cb2a1a62 1372 return nr;
412819ce
JA
1373}
1374
f1327cc5 1375static void show_entries_rb(int piped)
8fc0abbc 1376{
e7c9f3ff 1377 struct per_dev_info *pdi;
8fc0abbc 1378 struct blk_io_trace *bit;
3aabcd89 1379 struct rb_node *n;
8fc0abbc
JA
1380 struct trace *t;
1381 int cpu;
1382
cb2a1a62 1383 while ((n = rb_first(&rb_sort_root)) != NULL) {
8fc0abbc
JA
1384
1385 t = rb_entry(n, struct trace, rb_node);
1386 bit = t->bit;
1387
cb2a1a62 1388 pdi = get_dev_info(bit->device);
e7c9f3ff
NS
1389 if (!pdi) {
1390 fprintf(stderr, "Unknown device ID? (%d,%d)\n",
1391 MAJOR(bit->device), MINOR(bit->device));
1392 break;
1393 }
d5396421 1394 cpu = bit->cpu;
e7c9f3ff
NS
1395 if (cpu > pdi->ncpus) {
1396 fprintf(stderr, "Unknown CPU ID? (%d, device %d,%d)\n",
1397 cpu, MAJOR(bit->device), MINOR(bit->device));
87b72777 1398 break;
8fc0abbc
JA
1399 }
1400
cb2a1a62
JA
1401 /*
1402 * back off displaying more info if we are out of sync
1403 * on SMP systems. to prevent stalling on lost events,
0aa8a62d 1404 * only allow an event to us a few times
cb2a1a62 1405 */
a3983763
JA
1406 if (bit->sequence != (pdi->last_sequence + 1)) {
1407 if (piped && t->skipped < 5) {
0aa8a62d 1408 t->skipped++;
cb2a1a62 1409 break;
0aa8a62d
JA
1410 } else {
1411 fprintf(stderr, "skipping from %lu to %u\n", pdi->last_sequence, bit->sequence);
1412 pdi->skips++;
cb2a1a62
JA
1413 }
1414 }
1415
1416 pdi->last_sequence = bit->sequence;
1417
cfab07eb 1418 bit->time -= genesis_time;
46e6968b
NS
1419 if (bit->time < stopwatch_start)
1420 continue;
1421 if (bit->time >= stopwatch_end)
1422 break;
8fc0abbc 1423
e7c9f3ff 1424 check_time(pdi, bit);
8fc0abbc 1425
ff3a732c 1426 dump_trace(bit, &pdi->cpus[cpu], pdi);
87b72777 1427
cb2a1a62 1428 rb_erase(&t->rb_node, &rb_sort_root);
e8741a4a 1429
f1327cc5 1430 if (piped) {
e8741a4a
JA
1431 free(bit);
1432 free(t);
1433 }
cb2a1a62 1434 }
8fc0abbc
JA
1435}
1436
1f79c4a0
JA
1437static int read_data(int fd, void *buffer, int bytes, int block)
1438{
1439 int ret, bytes_left, fl;
1440 void *p;
1441
1442 fl = fcntl(fd, F_GETFL);
1443
1444 if (!block)
1445 fcntl(fd, F_SETFL, fl | O_NONBLOCK);
1446 else
1447 fcntl(fd, F_SETFL, fl & ~O_NONBLOCK);
1448
1449 bytes_left = bytes;
1450 p = buffer;
1451 while (bytes_left > 0) {
1452 ret = read(fd, p, bytes_left);
1453 if (!ret)
1454 return 1;
1455 else if (ret < 0) {
1456 if (errno != EAGAIN)
1457 perror("read");
1458 return -1;
1459 } else {
1460 p += ret;
1461 bytes_left -= ret;
1462 }
1463 }
1464
1465 return 0;
1466}
1467
cb2a1a62
JA
1468/*
1469 * Find the traces in 'tb' and add them to the list for sorting and
1470 * displaying
1471 */
1472static int find_entries(void *tb, unsigned long size)
1473{
1474 struct blk_io_trace *bit;
1475 struct trace *t;
1476 void *start = tb;
1477
1478 while (tb - start <= size - sizeof(*bit)) {
1479 bit = find_trace(tb, size - (tb - start));
1480 if (!bit)
1481 break;
1482
1483 t = malloc(sizeof(*t));
1484 memset(t, 0, sizeof(*t));
1485 t->bit = bit;
1486
f1327cc5 1487 trace_to_cpu(bit);
cb2a1a62
JA
1488 t->next = trace_list;
1489 trace_list = t;
1490
1491 tb += sizeof(*bit) + bit->pdu_len;
1492 }
1493
1494 return 0;
1495}
1496
d5396421 1497static int do_file(void)
d0ca268b 1498{
cb2a1a62 1499 int i, j, nfiles = 0, nelems;
d0ca268b 1500
e8741a4a 1501 for (i = 0; i < ndevices; i++) {
e7c9f3ff 1502 for (j = 0;; j++, nfiles++) {
e8741a4a 1503 struct per_dev_info *pdi;
e7c9f3ff
NS
1504 struct per_cpu_info *pci;
1505 struct stat st;
1506 void *tb;
87b72777 1507
e8741a4a 1508 pdi = &devices[i];
e7c9f3ff
NS
1509 pci = get_cpu_info(pdi, j);
1510 pci->cpu = j;
d0ca268b 1511
e7c9f3ff 1512 snprintf(pci->fname, sizeof(pci->fname)-1,
9f6486bd 1513 "%s.blktrace.%d", pdi->name, j);
e7c9f3ff
NS
1514 if (stat(pci->fname, &st) < 0)
1515 break;
1516 if (!st.st_size)
1517 continue;
1518
1519 printf("Processing %s\n", pci->fname);
1520
1521 tb = malloc(st.st_size);
1522 if (!tb) {
1523 fprintf(stderr, "Out of memory, skip file %s\n",
1524 pci->fname);
1525 continue;
1526 }
1527
1528 pci->fd = open(pci->fname, O_RDONLY);
1529 if (pci->fd < 0) {
1530 perror(pci->fname);
1531 free(tb);
1532 continue;
1533 }
1534
1535 if (read_data(pci->fd, tb, st.st_size, 1)) {
1536 close(pci->fd);
1537 free(tb);
1538 continue;
1539 }
1540
cb2a1a62
JA
1541 if (find_entries(tb, st.st_size)) {
1542 close(pci->fd);
1543 free(tb);
1544 }
1545
1546 nelems = sort_entries();
1547 if (nelems == -1) {
e7c9f3ff
NS
1548 close(pci->fd);
1549 free(tb);
1550 continue;
1551 }
1552
1553 printf("Completed %s (CPU%d %d, entries)\n",
cb2a1a62 1554 pci->fname, j, nelems);
e7c9f3ff 1555 close(pci->fd);
d0ca268b 1556 }
d5396421
JA
1557 }
1558
1559 if (!nfiles) {
1560 fprintf(stderr, "No files found\n");
1561 return 1;
1562 }
1563
e8741a4a 1564 show_entries_rb(0);
d5396421
JA
1565 return 0;
1566}
1567
cb2a1a62 1568static int read_sort_events(int fd)
d5396421 1569{
cb2a1a62 1570 int events = 0;
d5396421 1571
412819ce 1572 do {
cb2a1a62
JA
1573 struct blk_io_trace *bit;
1574 struct trace *t;
412819ce 1575 int pdu_len;
51128a28 1576 __u32 magic;
d5396421 1577
cb2a1a62 1578 bit = malloc(sizeof(*bit));
d5396421 1579
cb2a1a62 1580 if (read_data(fd, bit, sizeof(*bit), !events))
c80c18a7 1581 break;
d5396421 1582
cb2a1a62 1583 magic = be32_to_cpu(bit->magic);
51128a28
JA
1584 if ((magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
1585 fprintf(stderr, "Bad magic %x\n", magic);
1586 break;
1587 }
1588
cb2a1a62 1589 pdu_len = be16_to_cpu(bit->pdu_len);
2ff323b0 1590 if (pdu_len) {
cb2a1a62 1591 void *ptr = realloc(bit, sizeof(*bit) + pdu_len);
d5396421 1592
cb2a1a62 1593 if (read_data(fd, ptr + sizeof(*bit), pdu_len, 1))
2ff323b0 1594 break;
d5396421 1595
cb2a1a62 1596 bit = ptr;
2ff323b0 1597 }
d5396421 1598
cb2a1a62
JA
1599 t = malloc(sizeof(*t));
1600 memset(t, 0, sizeof(*t));
1601 t->bit = bit;
f1327cc5
JA
1602
1603 trace_to_cpu(bit);
cb2a1a62
JA
1604 t->next = trace_list;
1605 trace_list = t;
1606
412819ce 1607 events++;
79f19470 1608 } while (!is_done() && events < rb_batch);
d5396421 1609
412819ce
JA
1610 return events;
1611}
d5396421 1612
412819ce
JA
1613static int do_stdin(void)
1614{
1615 int fd;
d5396421 1616
1f79c4a0 1617 fd = dup(STDIN_FILENO);
412819ce
JA
1618 do {
1619 int events;
d5396421 1620
cb2a1a62 1621 events = read_sort_events(fd);
412819ce
JA
1622 if (!events)
1623 break;
1624
cb2a1a62 1625 if (sort_entries() == -1)
2ff323b0
JA
1626 break;
1627
e8741a4a 1628 show_entries_rb(1);
d5396421
JA
1629 } while (1);
1630
1631 close(fd);
d5396421
JA
1632 return 0;
1633}
d0ca268b 1634
1f79c4a0 1635static void flush_output(void)
412819ce 1636{
152f6476 1637 fflush(ofp);
412819ce
JA
1638}
1639
1f79c4a0 1640static void handle_sigint(int sig)
412819ce
JA
1641{
1642 done = 1;
1643 flush_output();
1644}
1645
46e6968b
NS
1646/*
1647 * Extract start and duration times from a string, allowing
1648 * us to specify a time interval of interest within a trace.
1649 * Format: "duration" (start is zero) or "start:duration".
1650 */
1651static int find_stopwatch_interval(char *string)
1652{
1653 double value;
1654 char *sp;
1655
1656 value = strtod(string, &sp);
1657 if (sp == string) {
1658 fprintf(stderr,"Invalid stopwatch timer: %s\n", string);
1659 return 1;
1660 }
1661 if (*sp == ':') {
1662 stopwatch_start = DOUBLE_TO_NANO_ULL(value);
1663 string = sp + 1;
1664 value = strtod(string, &sp);
1665 if (sp == string || *sp != '\0') {
1666 fprintf(stderr,"Invalid stopwatch duration time: %s\n",
1667 string);
1668 return 1;
1669 }
1670 } else if (*sp != '\0') {
1671 fprintf(stderr,"Invalid stopwatch start timer: %s\n", string);
1672 return 1;
1673 }
1674 stopwatch_end = stopwatch_start + DOUBLE_TO_NANO_ULL(value);
1675 return 0;
1676}
1677
1f79c4a0
JA
1678static void usage(char *prog)
1679{
46e6968b
NS
1680 fprintf(stderr, "Usage: %s "
1681 "[-i <name>] [-o <output>] [-s] [-w N[:n]] <name>...\n",
1682 prog);
1f79c4a0
JA
1683}
1684
d5396421
JA
1685int main(int argc, char *argv[])
1686{
152f6476 1687 char *ofp_buffer;
a66877e6 1688 int c, ret, mode;
1e1c60f1 1689 int per_device_and_cpu_stats = 1;
d5396421
JA
1690
1691 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) != -1) {
1692 switch (c) {
1693 case 'i':
e7c9f3ff
NS
1694 if (!strcmp(optarg, "-") && !pipeline)
1695 pipeline = 1;
1696 else if (resize_devices(optarg) != 0)
1697 return 1;
d5396421
JA
1698 break;
1699 case 'o':
66efebf8 1700 output_name = optarg;
d5396421 1701 break;
79f19470
JA
1702 case 'b':
1703 rb_batch = atoi(optarg);
1704 if (rb_batch <= 0)
1705 rb_batch = RB_BATCH_DEFAULT;
1706 break;
152f6476
JA
1707 case 's':
1708 per_process_stats = 1;
1709 break;
7997c5b0
JA
1710 case 't':
1711 track_ios = 1;
1712 break;
1e1c60f1
NS
1713 case 'q':
1714 per_device_and_cpu_stats = 0;
1715 break;
46e6968b
NS
1716 case 'w':
1717 if (find_stopwatch_interval(optarg) != 0)
1718 return 1;
1719 break;
ab197ca7
AB
1720 case 'f':
1721 set_all_format_specs(optarg);
1722 break;
1723 case 'F':
1724 if (add_format_spec(optarg) != 0)
1725 return 1;
1726 break;
d5396421 1727 default:
1f79c4a0 1728 usage(argv[0]);
d5396421
JA
1729 return 1;
1730 }
d0ca268b
JA
1731 }
1732
e7c9f3ff
NS
1733 while (optind < argc) {
1734 if (!strcmp(argv[optind], "-") && !pipeline)
1735 pipeline = 1;
1736 else if (resize_devices(argv[optind]) != 0)
1737 return 1;
1738 optind++;
1739 }
1740
1741 if (!pipeline && !ndevices) {
1f79c4a0 1742 usage(argv[0]);
d5396421
JA
1743 return 1;
1744 }
1745
7997c5b0
JA
1746 memset(&rb_sort_root, 0, sizeof(rb_sort_root));
1747 memset(&rb_track_root, 0, sizeof(rb_track_root));
412819ce
JA
1748
1749 signal(SIGINT, handle_sigint);
1750 signal(SIGHUP, handle_sigint);
1751 signal(SIGTERM, handle_sigint);
d5396421 1752
d69db225
JA
1753 setlocale(LC_NUMERIC, "en_US");
1754
a66877e6 1755 if (!output_name) {
152f6476 1756 ofp = fdopen(STDOUT_FILENO, "w");
a66877e6
JA
1757 mode = _IOLBF;
1758 } else {
152f6476
JA
1759 char ofname[128];
1760
1761 snprintf(ofname, sizeof(ofname) - 1, "%s.log", output_name);
1762 ofp = fopen(ofname, "w");
a66877e6 1763 mode = _IOFBF;
152f6476
JA
1764 }
1765
1766 if (!ofp) {
1767 perror("fopen");
1768 return 1;
1769 }
1770
1771 ofp_buffer = malloc(4096);
a66877e6 1772 if (setvbuf(ofp, ofp_buffer, mode, 4096)) {
152f6476
JA
1773 perror("setvbuf");
1774 return 1;
1775 }
1776
e7c9f3ff 1777 if (pipeline)
d5396421
JA
1778 ret = do_stdin();
1779 else
1780 ret = do_file();
1781
152f6476
JA
1782 if (per_process_stats)
1783 show_process_stats();
1784
1e1c60f1
NS
1785 if (per_device_and_cpu_stats)
1786 show_device_and_cpu_stats();
152f6476 1787
412819ce 1788 flush_output();
d5396421 1789 return ret;
d0ca268b 1790}