[PATCH] blkparse: Fix stopwatch
[blktrace.git] / blkparse.c
CommitLineData
d956a2cd
JA
1/*
2 * block queue tracing parse application
3 *
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
20 */
d0ca268b
JA
21#include <sys/types.h>
22#include <sys/stat.h>
23#include <unistd.h>
24#include <stdio.h>
25#include <fcntl.h>
26#include <stdlib.h>
8fc0abbc 27#include <string.h>
d5396421 28#include <getopt.h>
412819ce
JA
29#include <errno.h>
30#include <signal.h>
d69db225 31#include <locale.h>
46e6968b 32#include <limits.h>
ab197ca7 33#include <ctype.h>
d0ca268b 34
8fc0abbc
JA
35#include "blktrace.h"
36#include "rbtree.h"
d0ca268b 37
2e3e8ded
JA
38#define SECONDS(x) ((unsigned long long)(x) / 1000000000)
39#define NANO_SECONDS(x) ((unsigned long long)(x) % 1000000000)
46e6968b 40#define DOUBLE_TO_NANO_ULL(d) ((unsigned long long)((d) * 1000000000))
cfab07eb 41
e7c9f3ff
NS
42#define MINORBITS 20
43#define MINORMASK ((1U << MINORBITS) - 1)
44#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS))
45#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK))
46
47#define min(a, b) ((a) < (b) ? (a) : (b))
d5396421 48
ab197ca7
AB
49#define HEADER "%D %2c %8s %5T.%9t %5p %2a %3d "
50
152f6476
JA
51struct io_stats {
52 unsigned long qreads, qwrites, creads, cwrites, mreads, mwrites;
53 unsigned long ireads, iwrites;
54 unsigned long long qread_kb, qwrite_kb, cread_kb, cwrite_kb;
55 unsigned long long iread_kb, iwrite_kb;
06639b27 56 unsigned long io_unplugs, timer_unplugs;
152f6476
JA
57};
58
d5396421 59struct per_cpu_info {
d0ca268b
JA
60 int cpu;
61 int nelems;
d0ca268b
JA
62
63 int fd;
87b72777 64 char fname[128];
d0ca268b 65
152f6476
JA
66 struct io_stats io_stats;
67};
8fc0abbc 68
e7c9f3ff
NS
69struct per_dev_info {
70 dev_t id;
71 char *name;
72
73 int backwards;
74 unsigned long long events;
75 unsigned long long last_reported_time;
76 struct io_stats io_stats;
cb2a1a62 77 unsigned long last_sequence;
0aa8a62d 78 unsigned long skips;
e7c9f3ff
NS
79
80 int ncpus;
81 struct per_cpu_info *cpus;
82};
83
152f6476
JA
84struct per_process_info {
85 char name[16];
86 __u32 pid;
87 struct io_stats io_stats;
88 struct per_process_info *hash_next, *list_next;
50adc0ba
JA
89
90 /*
91 * individual io stats
92 */
b9d40d6f
JA
93 unsigned long long longest_allocation_wait[2];
94 unsigned long long longest_dispatch_wait[2];
95 unsigned long long longest_completion_wait[2];
d0ca268b
JA
96};
97
152f6476
JA
98#define PPI_HASH_SHIFT (8)
99static struct per_process_info *ppi_hash[1 << PPI_HASH_SHIFT];
100static struct per_process_info *ppi_list;
886ecf0e 101static int ppi_list_entries;
152f6476 102
ab197ca7 103#define S_OPTS "i:o:b:stqw:f:F:"
d5396421
JA
104static struct option l_opts[] = {
105 {
106 .name = "input",
428683db 107 .has_arg = required_argument,
d5396421
JA
108 .flag = NULL,
109 .val = 'i'
110 },
111 {
112 .name = "output",
428683db 113 .has_arg = required_argument,
d5396421
JA
114 .flag = NULL,
115 .val = 'o'
116 },
79f19470
JA
117 {
118 .name = "batch",
428683db 119 .has_arg = required_argument,
79f19470
JA
120 .flag = NULL,
121 .val = 'b'
122 },
152f6476
JA
123 {
124 .name = "per program stats",
428683db 125 .has_arg = no_argument,
152f6476
JA
126 .flag = NULL,
127 .val = 's'
128 },
7997c5b0
JA
129 {
130 .name = "track ios",
428683db 131 .has_arg = no_argument,
7997c5b0
JA
132 .flag = NULL,
133 .val = 't'
134 },
1e1c60f1
NS
135 {
136 .name = "quiet",
428683db 137 .has_arg = no_argument,
1e1c60f1
NS
138 .flag = NULL,
139 .val = 'q'
140 },
46e6968b
NS
141 {
142 .name = "stopwatch",
428683db 143 .has_arg = required_argument,
46e6968b
NS
144 .flag = NULL,
145 .val = 'w'
146 },
ab197ca7
AB
147 {
148 .name = "format",
428683db 149 .has_arg = required_argument,
ab197ca7
AB
150 .flag = NULL,
151 .val = 'f'
152 },
153 {
154 .name = "format-spec",
428683db 155 .has_arg = required_argument,
ab197ca7
AB
156 .flag = NULL,
157 .val = 'F'
158 },
d5396421
JA
159};
160
7997c5b0
JA
161/*
162 * for sorting the displayed output
163 */
8fc0abbc
JA
164struct trace {
165 struct blk_io_trace *bit;
166 struct rb_node rb_node;
cb2a1a62
JA
167 struct trace *next;
168 int skipped;
8fc0abbc
JA
169};
170
cb2a1a62
JA
171static struct rb_root rb_sort_root;
172static struct rb_root rb_track_root;
173
174static struct trace *trace_list;
175
7997c5b0
JA
176/*
177 * for tracking individual ios
178 */
179struct io_track {
180 struct rb_node rb_node;
181
e7c9f3ff 182 dev_t device;
7997c5b0
JA
183 __u64 sector;
184 __u32 pid;
95c15013 185 unsigned long long allocation_time;
7997c5b0
JA
186 unsigned long long queue_time;
187 unsigned long long dispatch_time;
188 unsigned long long completion_time;
189};
190
e7c9f3ff
NS
191static int ndevices;
192static struct per_dev_info *devices;
193static char *get_dev_name(struct per_dev_info *, char *, int);
d0ca268b 194
152f6476 195static FILE *ofp;
e7c9f3ff
NS
196static char *output_name;
197
198static unsigned long long genesis_time;
46e6968b
NS
199static unsigned long long stopwatch_start; /* start from zero by default */
200static unsigned long long stopwatch_end = ULONG_LONG_MAX; /* "infinity" */
152f6476
JA
201
202static int per_process_stats;
7997c5b0 203static int track_ios;
d0ca268b 204
79f19470
JA
205#define RB_BATCH_DEFAULT (1024)
206static int rb_batch = RB_BATCH_DEFAULT;
207
e7c9f3ff
NS
208static int pipeline;
209
412819ce
JA
210#define is_done() (*(volatile int *)(&done))
211static volatile int done;
212
152f6476
JA
213static inline unsigned long hash_long(unsigned long val)
214{
16ef714e
JA
215#if __WORDSIZE == 32
216 val *= 0x9e370001UL;
217#elif __WORDSIZE == 64
218 val *= 0x9e37fffffffc0001UL;
219#else
220#error unknown word size
221#endif
222
223 return val >> (__WORDSIZE - PPI_HASH_SHIFT);
152f6476
JA
224}
225
226static inline void add_process_to_hash(struct per_process_info *ppi)
227{
228 const int hash_idx = hash_long(ppi->pid);
229
230 ppi->hash_next = ppi_hash[hash_idx];
231 ppi_hash[hash_idx] = ppi;
232}
233
234static inline void add_process_to_list(struct per_process_info *ppi)
235{
236 ppi->list_next = ppi_list;
237 ppi_list = ppi;
886ecf0e 238 ppi_list_entries++;
152f6476
JA
239}
240
241static struct per_process_info *find_process_by_pid(__u32 pid)
242{
243 const int hash_idx = hash_long(pid);
244 struct per_process_info *ppi;
245
246 ppi = ppi_hash[hash_idx];
247 while (ppi) {
248 if (ppi->pid == pid)
249 return ppi;
250
251 ppi = ppi->hash_next;
252 }
253
254 return NULL;
255}
256
7997c5b0
JA
257static inline int trace_rb_insert(struct trace *t)
258{
259 struct rb_node **p = &rb_sort_root.rb_node;
260 struct rb_node *parent = NULL;
261 struct trace *__t;
262
e7c9f3ff
NS
263 if (genesis_time == 0 || t->bit->time < genesis_time)
264 genesis_time = t->bit->time;
265
7997c5b0
JA
266 while (*p) {
267 parent = *p;
268 __t = rb_entry(parent, struct trace, rb_node);
269
e7c9f3ff
NS
270 if (t->bit->time < __t->bit->time)
271 p = &(*p)->rb_left;
272 else if (t->bit->time > __t->bit->time)
273 p = &(*p)->rb_right;
274 else if (t->bit->device < __t->bit->device)
275 p = &(*p)->rb_left;
276 else if (t->bit->device > __t->bit->device)
277 p = &(*p)->rb_right;
278 else if (t->bit->sequence < __t->bit->sequence)
7997c5b0
JA
279 p = &(*p)->rb_left;
280 else if (t->bit->sequence > __t->bit->sequence)
281 p = &(*p)->rb_right;
e7c9f3ff
NS
282 else if (t->bit->device == __t->bit->device) {
283 fprintf(stderr,
284 "sequence alias (%d) on device %d,%d!\n",
285 t->bit->sequence,
286 MAJOR(t->bit->device), MINOR(t->bit->device));
7997c5b0
JA
287 return 1;
288 }
289 }
290
291 rb_link_node(&t->rb_node, parent, p);
292 rb_insert_color(&t->rb_node, &rb_sort_root);
293 return 0;
294}
295
296static inline int track_rb_insert(struct io_track *iot)
297{
298 struct rb_node **p = &rb_track_root.rb_node;
299 struct rb_node *parent = NULL;
300 struct io_track *__iot;
301
302 while (*p) {
303 parent = *p;
e7c9f3ff 304
7997c5b0
JA
305 __iot = rb_entry(parent, struct io_track, rb_node);
306
e7c9f3ff
NS
307 if (iot->device < __iot->device)
308 p = &(*p)->rb_left;
309 else if (iot->device > __iot->device)
310 p = &(*p)->rb_right;
311 else if (iot->sector < __iot->sector)
7997c5b0
JA
312 p = &(*p)->rb_left;
313 else if (iot->sector > __iot->sector)
314 p = &(*p)->rb_right;
315 else {
e7c9f3ff 316 fprintf(stderr,
ab197ca7
AB
317 "sector alias (%Lu) on device %d,%d!\n",
318 (unsigned long long) iot->sector,
e7c9f3ff 319 MAJOR(iot->device), MINOR(iot->device));
7997c5b0
JA
320 return 1;
321 }
322 }
323
324 rb_link_node(&iot->rb_node, parent, p);
325 rb_insert_color(&iot->rb_node, &rb_track_root);
326 return 0;
327}
328
e7c9f3ff 329static struct io_track *__find_track(dev_t device, __u64 sector)
7997c5b0
JA
330{
331 struct rb_node **p = &rb_track_root.rb_node;
332 struct rb_node *parent = NULL;
333 struct io_track *__iot;
334
335 while (*p) {
336 parent = *p;
337
338 __iot = rb_entry(parent, struct io_track, rb_node);
339
e7c9f3ff
NS
340 if (device < __iot->device)
341 p = &(*p)->rb_left;
342 else if (device > __iot->device)
343 p = &(*p)->rb_right;
344 else if (sector < __iot->sector)
7997c5b0
JA
345 p = &(*p)->rb_left;
346 else if (sector > __iot->sector)
347 p = &(*p)->rb_right;
348 else
349 return __iot;
350 }
351
352 return NULL;
353}
354
e7c9f3ff 355static struct io_track *find_track(__u32 pid, dev_t device, __u64 sector)
7997c5b0 356{
916b5501 357 struct io_track *iot;
7997c5b0 358
e7c9f3ff 359 iot = __find_track(device, sector);
7997c5b0
JA
360 if (!iot) {
361 iot = malloc(sizeof(*iot));
50adc0ba 362 iot->pid = pid;
e7c9f3ff 363 iot->device = device;
7997c5b0
JA
364 iot->sector = sector;
365 track_rb_insert(iot);
366 }
367
368 return iot;
369}
370
a01516de 371static void log_track_frontmerge(struct blk_io_trace *t)
2e3e8ded
JA
372{
373 struct io_track *iot;
374
375 if (!track_ios)
376 return;
2e3e8ded 377
a01516de 378 iot = __find_track(t->device, t->sector + (t->bytes >> 9));
cb2a1a62
JA
379 if (!iot) {
380 fprintf(stderr, "failed to find mergeable event\n");
381 return;
2e3e8ded 382 }
cb2a1a62
JA
383
384 rb_erase(&iot->rb_node, &rb_track_root);
a01516de 385 iot->sector -= t->bytes >> 9;
cb2a1a62 386 track_rb_insert(iot);
2e3e8ded
JA
387}
388
95c15013 389static void log_track_getrq(struct blk_io_trace *t)
2e3e8ded
JA
390{
391 struct io_track *iot;
392
393 if (!track_ios)
394 return;
395
e7c9f3ff 396 iot = find_track(t->pid, t->device, t->sector);
95c15013
JA
397 iot->allocation_time = t->time;
398}
399
400
401/*
b6076a9b 402 * return time between rq allocation and insertion
95c15013 403 */
b6076a9b 404static unsigned long long log_track_insert(struct blk_io_trace *t)
95c15013 405{
50adc0ba 406 unsigned long long elapsed;
95c15013
JA
407 struct io_track *iot;
408
409 if (!track_ios)
410 return -1;
411
e7c9f3ff 412 iot = find_track(t->pid, t->device, t->sector);
2e3e8ded 413 iot->queue_time = t->time;
50adc0ba
JA
414 elapsed = iot->queue_time - iot->allocation_time;
415
416 if (per_process_stats) {
417 struct per_process_info *ppi = find_process_by_pid(iot->pid);
b9d40d6f 418 int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
50adc0ba 419
b9d40d6f
JA
420 if (ppi && elapsed > ppi->longest_allocation_wait[w])
421 ppi->longest_allocation_wait[w] = elapsed;
50adc0ba
JA
422 }
423
424 return elapsed;
2e3e8ded
JA
425}
426
427/*
428 * return time between queue and issue
429 */
430static unsigned long long log_track_issue(struct blk_io_trace *t)
431{
50adc0ba 432 unsigned long long elapsed;
2e3e8ded
JA
433 struct io_track *iot;
434
435 if (!track_ios)
436 return -1;
437 if ((t->action & BLK_TC_ACT(BLK_TC_FS)) == 0)
438 return -1;
439
e7c9f3ff 440 iot = __find_track(t->device, t->sector);
cb2a1a62
JA
441 if (!iot) {
442 fprintf(stderr, "failed to find issue event\n");
2e3e8ded 443 return -1;
cb2a1a62 444 }
2e3e8ded
JA
445
446 iot->dispatch_time = t->time;
50adc0ba
JA
447 elapsed = iot->dispatch_time - iot->queue_time;
448
449 if (per_process_stats) {
450 struct per_process_info *ppi = find_process_by_pid(iot->pid);
b9d40d6f 451 int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
50adc0ba 452
b9d40d6f
JA
453 if (ppi && elapsed > ppi->longest_dispatch_wait[w])
454 ppi->longest_dispatch_wait[w] = elapsed;
50adc0ba
JA
455 }
456
457 return elapsed;
2e3e8ded
JA
458}
459
460/*
461 * return time between dispatch and complete
462 */
463static unsigned long long log_track_complete(struct blk_io_trace *t)
464{
465 unsigned long long elapsed;
466 struct io_track *iot;
467
468 if (!track_ios)
469 return -1;
470 if ((t->action & BLK_TC_ACT(BLK_TC_FS)) == 0)
471 return -1;
472
e7c9f3ff 473 iot = __find_track(t->device, t->sector);
cb2a1a62
JA
474 if (!iot) {
475 fprintf(stderr, "failed to find complete event\n");
2e3e8ded 476 return -1;
cb2a1a62 477 }
2e3e8ded
JA
478
479 iot->completion_time = t->time;
480 elapsed = iot->completion_time - iot->dispatch_time;
481
50adc0ba
JA
482 if (per_process_stats) {
483 struct per_process_info *ppi = find_process_by_pid(iot->pid);
b9d40d6f 484 int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
50adc0ba 485
b9d40d6f
JA
486 if (ppi && elapsed > ppi->longest_completion_wait[w])
487 ppi->longest_completion_wait[w] = elapsed;
50adc0ba
JA
488 }
489
2e3e8ded
JA
490 /*
491 * kill the trace, we don't need it after completion
492 */
493 rb_erase(&iot->rb_node, &rb_track_root);
494 free(iot);
495
496 return elapsed;
497}
498
499
152f6476
JA
500static struct io_stats *find_process_io_stats(__u32 pid, char *name)
501{
502 struct per_process_info *ppi = find_process_by_pid(pid);
503
504 if (!ppi) {
505 ppi = malloc(sizeof(*ppi));
506 memset(ppi, 0, sizeof(*ppi));
507 strncpy(ppi->name, name, sizeof(ppi->name));
508 ppi->pid = pid;
509 add_process_to_hash(ppi);
510 add_process_to_list(ppi);
511 }
512
513 return &ppi->io_stats;
514}
515
e7c9f3ff
NS
516
517static void resize_cpu_info(struct per_dev_info *pdi, int cpu)
a718bd37 518{
e7c9f3ff
NS
519 struct per_cpu_info *cpus = pdi->cpus;
520 int ncpus = pdi->ncpus;
521 int new_count = cpu + 1;
522 int new_space, size;
a718bd37
NS
523 char *new_start;
524
e7c9f3ff
NS
525 size = new_count * sizeof(struct per_cpu_info);
526 cpus = realloc(cpus, size);
527 if (!cpus) {
528 char name[20];
529 fprintf(stderr, "Out of memory, CPU info for device %s (%d)\n",
530 get_dev_name(pdi, name, sizeof(name)), size);
a718bd37
NS
531 exit(1);
532 }
533
e7c9f3ff
NS
534 new_start = (char *)cpus + (ncpus * sizeof(struct per_cpu_info));
535 new_space = (new_count - ncpus) * sizeof(struct per_cpu_info);
a718bd37 536 memset(new_start, 0, new_space);
e7c9f3ff
NS
537
538 pdi->ncpus = new_count;
539 pdi->cpus = cpus;
540}
cb2a1a62 541
e7c9f3ff
NS
542static struct per_cpu_info *get_cpu_info(struct per_dev_info *pdi, int cpu)
543{
cb2a1a62
JA
544 struct per_cpu_info *pci;
545
e7c9f3ff
NS
546 if (cpu >= pdi->ncpus)
547 resize_cpu_info(pdi, cpu);
cb2a1a62
JA
548
549 pci = &pdi->cpus[cpu];
550 pci->cpu = cpu;
551 return pci;
a718bd37
NS
552}
553
e7c9f3ff
NS
554
555static int resize_devices(char *name)
a718bd37 556{
e7c9f3ff 557 int size = (ndevices + 1) * sizeof(struct per_dev_info);
c499bf38 558
e7c9f3ff
NS
559 devices = realloc(devices, size);
560 if (!devices) {
561 fprintf(stderr, "Out of memory, device %s (%d)\n", name, size);
562 return 1;
563 }
564 memset(&devices[ndevices], 0, sizeof(struct per_dev_info));
565 devices[ndevices].name = name;
566 ndevices++;
567 return 0;
568}
a718bd37 569
cb2a1a62 570static struct per_dev_info *get_dev_info(dev_t id)
e7c9f3ff 571{
cb2a1a62 572 struct per_dev_info *pdi;
e7c9f3ff 573 int i;
c499bf38 574
e7c9f3ff
NS
575 for (i = 0; i < ndevices; i++)
576 if (devices[i].id == id)
577 return &devices[i];
cb2a1a62 578
e7c9f3ff
NS
579 if (resize_devices(NULL) != 0)
580 return NULL;
cb2a1a62
JA
581
582 pdi = &devices[ndevices - 1];
583 pdi->id = id;
1b928247 584 pdi->last_sequence = -1;
cb2a1a62 585 return pdi;
a718bd37
NS
586}
587
e7c9f3ff
NS
588static char *get_dev_name(struct per_dev_info *pdi, char *buffer, int size)
589{
590 if (pdi->name)
591 snprintf(buffer, size, "%s", pdi->name);
592 else
593 snprintf(buffer, size, "%d,%d", MAJOR(pdi->id), MINOR(pdi->id));
594 return buffer;
595}
596
e7c9f3ff 597static void check_time(struct per_dev_info *pdi, struct blk_io_trace *bit)
cfab07eb
AB
598{
599 unsigned long long this = bit->time;
e7c9f3ff 600 unsigned long long last = pdi->last_reported_time;
cfab07eb 601
e7c9f3ff
NS
602 pdi->backwards = (this < last) ? 'B' : ' ';
603 pdi->last_reported_time = this;
cfab07eb
AB
604}
605
152f6476
JA
606static inline void __account_m(struct io_stats *ios, struct blk_io_trace *t,
607 int rw)
d0ca268b
JA
608{
609 if (rw) {
152f6476
JA
610 ios->mwrites++;
611 ios->qwrite_kb += t->bytes >> 10;
d0ca268b 612 } else {
152f6476
JA
613 ios->mreads++;
614 ios->qread_kb += t->bytes >> 10;
615 }
616}
617
618static inline void account_m(struct blk_io_trace *t, struct per_cpu_info *pci,
619 int rw)
620{
621 __account_m(&pci->io_stats, t, rw);
622
623 if (per_process_stats) {
624 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
625
626 __account_m(ios, t, rw);
d0ca268b
JA
627 }
628}
629
b6076a9b
JA
630static inline void __account_queue(struct io_stats *ios, struct blk_io_trace *t,
631 int rw)
d0ca268b
JA
632{
633 if (rw) {
152f6476
JA
634 ios->qwrites++;
635 ios->qwrite_kb += t->bytes >> 10;
d0ca268b 636 } else {
152f6476
JA
637 ios->qreads++;
638 ios->qread_kb += t->bytes >> 10;
639 }
640}
641
b6076a9b
JA
642static inline void account_queue(struct blk_io_trace *t,
643 struct per_cpu_info *pci, int rw)
152f6476 644{
b6076a9b 645 __account_queue(&pci->io_stats, t, rw);
152f6476
JA
646
647 if (per_process_stats) {
648 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
649
b6076a9b 650 __account_queue(ios, t, rw);
d0ca268b
JA
651 }
652}
653
152f6476 654static inline void __account_c(struct io_stats *ios, int rw, unsigned int bytes)
d0ca268b
JA
655{
656 if (rw) {
152f6476
JA
657 ios->cwrites++;
658 ios->cwrite_kb += bytes >> 10;
d0ca268b 659 } else {
152f6476
JA
660 ios->creads++;
661 ios->cread_kb += bytes >> 10;
662 }
663}
664
665static inline void account_c(struct blk_io_trace *t, struct per_cpu_info *pci,
666 int rw, int bytes)
667{
668 __account_c(&pci->io_stats, rw, bytes);
669
670 if (per_process_stats) {
671 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
672
673 __account_c(ios, rw, bytes);
d0ca268b
JA
674 }
675}
676
b6076a9b
JA
677static inline void __account_issue(struct io_stats *ios, int rw,
678 unsigned int bytes)
afd2d7ad 679{
680 if (rw) {
152f6476
JA
681 ios->iwrites++;
682 ios->iwrite_kb += bytes >> 10;
afd2d7ad 683 } else {
152f6476
JA
684 ios->ireads++;
685 ios->iread_kb += bytes >> 10;
afd2d7ad 686 }
687}
688
b6076a9b
JA
689static inline void account_issue(struct blk_io_trace *t,
690 struct per_cpu_info *pci, int rw)
d0ca268b 691{
b6076a9b 692 __account_issue(&pci->io_stats, rw, t->bytes);
152f6476
JA
693
694 if (per_process_stats) {
695 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
d5396421 696
b6076a9b 697 __account_issue(ios, rw, t->bytes);
152f6476
JA
698 }
699}
700
06639b27
JA
701static inline void __account_unplug(struct io_stats *ios, int timer)
702{
703 if (timer)
704 ios->timer_unplugs++;
705 else
706 ios->io_unplugs++;
707}
708
709static inline void account_unplug(struct blk_io_trace *t,
710 struct per_cpu_info *pci, int timer)
711{
712 __account_unplug(&pci->io_stats, timer);
713
714 if (per_process_stats) {
715 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
716
717 __account_unplug(ios, timer);
718 }
719}
720
ab197ca7
AB
721#define VALID_SPECS "BCDFGMPQRSTU"
722static char *override_format[256];
723static inline int valid_spec(int spec)
152f6476 724{
ab197ca7 725 return strchr(VALID_SPECS, spec) != NULL;
d0ca268b
JA
726}
727
ab197ca7 728static void set_all_format_specs(char *optarg)
d0ca268b 729{
ab197ca7 730 char *p;
d0ca268b 731
ab197ca7
AB
732 for (p = VALID_SPECS; *p; p++)
733 if (override_format[(int)(*p)] == NULL)
734 override_format[(int)(*p)] = strdup(optarg);
735}
d0ca268b 736
ab197ca7
AB
737static int add_format_spec(char *optarg)
738{
739 int spec = optarg[0];
d0ca268b 740
ab197ca7
AB
741 if (!valid_spec(spec)) {
742 fprintf(stderr,"Bad format specifier %c\n", spec);
743 return 1;
744 }
745 if (optarg[1] != ',') {
746 fprintf(stderr,"Bad format specifier - need ',' %s\n", optarg);
747 return 1;
748 }
749 optarg += 2;
750 if (*optarg == '\0') {
751 fprintf(stderr,"Bad format specifier - need fmt %s\n", optarg);
752 return 1;
753 }
754
755 /*
756 * Set both merges (front and back)
757 */
758 if (spec == 'M') {
759 override_format['B'] = strdup(optarg);
760 override_format['M'] = strdup(optarg);
761 } else
762 override_format[spec] = strdup(optarg);
d0ca268b 763
ab197ca7 764 return 0;
d0ca268b
JA
765}
766
ab197ca7
AB
767static void print_field(char *act, struct per_cpu_info *pci,
768 struct blk_io_trace *t, unsigned long long elapsed,
769 int pdu_len, unsigned char *pdu_buf, char field,
770 int minus, int has_w, int width)
d0ca268b 771{
ab197ca7 772 char format[64];
2e3e8ded 773
ab197ca7
AB
774 if (has_w) {
775 if (minus)
776 sprintf(format, "%%-%d", width);
777 else
778 sprintf(format, "%%%d", width);
779 } else
780 sprintf(format, "%%");
781
782 switch (field) {
783 case 'a':
784 fprintf(ofp, strcat(format, "s"), act);
785 break;
786 case 'c':
787 fprintf(ofp, strcat(format, "d"), pci->cpu);
788 break;
789 case 'C':
790 fprintf(ofp, strcat(format, "s"), t->comm);
791 break;
792 case 'd': {
793 char rwbs[4];
794 int i = 0;
795 int w = t->action & BLK_TC_ACT(BLK_TC_WRITE);
796 int b = t->action & BLK_TC_ACT(BLK_TC_BARRIER);
797 int s = t->action & BLK_TC_ACT(BLK_TC_SYNC);
798 if (w)
799 rwbs[i++] = 'W';
800 else
801 rwbs[i++] = 'R';
802 if (b)
803 rwbs[i++] = 'B';
804 if (s)
805 rwbs[i++] = 'S';
806 rwbs[i] = '\0';
807 fprintf(ofp, strcat(format, "s"), rwbs);
808 break;
809 }
810 case 'D': /* format width ignored */
811 fprintf(ofp,"%3d,%-3d", MAJOR(t->device), MINOR(t->device));
812 break;
813 case 'e':
814 fprintf(ofp, strcat(format, "d"), t->error);
815 break;
816 case 'M':
817 fprintf(ofp, strcat(format, "d"), MAJOR(t->device));
818 break;
819 case 'm':
820 fprintf(ofp, strcat(format, "d"), MINOR(t->device));
821 break;
822 case 'n':
823 fprintf(ofp, strcat(format, "u"), t->bytes >> 9);
824 break;
825 case 'p':
826 fprintf(ofp, strcat(format, "u"), t->pid);
827 break;
828 case 'P': /* format width ignored */
829 if ((pdu_len > 0) && (pdu_buf != NULL)) {
830 int i;
831 unsigned char *p = pdu_buf;
182f2fcc
JA
832 for (i = 0; i < pdu_len; i++) {
833 if (i)
834 fprintf(ofp, " ");
835
836 fprintf(ofp, "%02x", *p++);
837 }
ab197ca7
AB
838 }
839 break;
840 case 's':
841 fprintf(ofp, strcat(format, "ld"), t->sequence);
842 break;
843 case 'S':
844 fprintf(ofp, strcat(format, "lu"), t->sector);
845 break;
846 case 't':
847 sprintf(format, "%%0%dlu", has_w ? width : 9);
848 fprintf(ofp, format, NANO_SECONDS(t->time));
849 break;
850 case 'T':
851 fprintf(ofp, strcat(format, "d"), SECONDS(t->time));
852 break;
853 case 'u':
854 if (elapsed == -1ULL) {
855 fprintf(stderr, "Expecting elapsed value\n");
856 exit(1);
857 }
858 fprintf(ofp, strcat(format, "llu"), elapsed / 1000);
859 break;
860 case 'U': {
861 __u64 *depth = (__u64 *) ((char *) t + sizeof(*t));
862 fprintf(ofp, strcat(format, "u"),
863 (unsigned int) be64_to_cpu(*depth));
864 break;
865 }
866 default:
867 fprintf(ofp,strcat(format, "c"), field);
868 break;
869 }
870}
2e3e8ded 871
ab197ca7
AB
872static char *parse_field(char *act, struct per_cpu_info *pci,
873 struct blk_io_trace *t, unsigned long long elapsed,
874 int pdu_len, unsigned char *pdu_buf,
875 char *master_format)
876{
877 int minus = 0;
878 int has_w = 0;
879 int width = 0;
880 char *p = master_format;
881
882 if (*p == '-') {
883 minus = 1;
884 p++;
2e3e8ded 885 }
ab197ca7
AB
886 if (isdigit(*p)) {
887 has_w = 1;
888 do {
889 width = (width * 10) + (*p++ - '0');
890 } while ((*p) && (isdigit(*p)));
891 }
892 if (*p) {
893 print_field(act, pci, t, elapsed, pdu_len, pdu_buf, *p++,
894 minus, has_w, width);
895 }
896 return p;
d0ca268b
JA
897}
898
ab197ca7
AB
899static char *fmt_select(int fmt_spec, struct blk_io_trace *t,
900 unsigned long long elapsed)
d0ca268b 901{
ab197ca7
AB
902 char *fmt;
903 char scratch_format[1024];
2e3e8ded 904
ab197ca7
AB
905 if (override_format[fmt_spec] != NULL)
906 return override_format[fmt_spec];
95c15013 907
ab197ca7
AB
908 switch (fmt_spec) {
909 case 'C': /* Complete */
910 if (t->action & BLK_TC_ACT(BLK_TC_PC)) {
911 strcpy(scratch_format, HEADER);
182f2fcc 912 strcat(scratch_format, "(%P) ");
ab197ca7
AB
913 } else {
914 strcpy(scratch_format, HEADER "%S + %n ");
915 if (elapsed != -1ULL)
916 strcat(scratch_format, "(%8u) ");
917 }
918 strcat(scratch_format, "[%e]\n");
919 fmt = scratch_format;
920 break;
921
922 case 'D': /* Issue */
923 if (t->action & BLK_TC_ACT(BLK_TC_PC)) {
924 strcpy(scratch_format, HEADER);
182f2fcc 925 strcat(scratch_format, "%n (%P) ");
ab197ca7
AB
926 } else {
927 strcpy(scratch_format, HEADER "%S + %n ");
928 if (elapsed != -1ULL)
929 strcat(scratch_format, "(%8u) ");
930 }
931 strcat(scratch_format,"[%C]\n");
932 fmt = scratch_format;
933 break;
934
b6076a9b
JA
935 case 'I': /* Insert */
936 if (t->action & BLK_TC_ACT(BLK_TC_PC)) {
937 strcpy(scratch_format, HEADER);
182f2fcc 938 strcat(scratch_format, "%n (%P) ");
b6076a9b
JA
939 } else {
940 strcpy(scratch_format, HEADER "%S + %n ");
941 if (elapsed != -1ULL)
942 strcat(scratch_format, "(%8u) ");
943 }
944 strcat(scratch_format,"[%C]\n");
945 fmt = scratch_format;
946 break;
947
ab197ca7 948 case 'Q': /* Queue */
93f1c611 949 case 'W': /* Bounce */
ab197ca7
AB
950 strcpy(scratch_format, HEADER "%S + %n ");
951 if (elapsed != -1ULL)
952 strcat(scratch_format, "(%8u) ");
953 strcat(scratch_format,"[%C]\n");
954 fmt = scratch_format;
955 break;
956
957 case 'B': /* Back merge */
958 case 'F': /* Front merge */
959 case 'M': /* Front or back merge */
960 fmt = HEADER "%S + %n [%C]\n";
961 break;
962
963 case 'P': /* Plug */
964 fmt = HEADER "[%C]\n";
965 break;
966
967 case 'G': /* Get request */
968 case 'S': /* Sleep request */
969 fmt = HEADER "%S + %n [%C]\n";
970 break;
971
972 case 'U': /* Unplug IO */
973 case 'T': /* Unplug timer */
974 fmt = HEADER "[%C] %U\n";
975 break;
976
93f1c611
JA
977 case 'X': /* Split */
978 strcpy(scratch_format, HEADER "%S / %U ");
979 strcat(scratch_format,"[%C]\n");
980 fmt = scratch_format;
981 break;
982
ab197ca7
AB
983 default:
984 fprintf(stderr,"FATAL: Invalid format spec %c\n", fmt_spec);
985 exit(1);
986 /*NOTREACHED*/
95c15013 987 }
ab197ca7
AB
988
989 return fmt;
d0ca268b
JA
990}
991
ab197ca7
AB
992static void process_fmt(char *act, struct per_cpu_info *pci,
993 struct blk_io_trace *t, unsigned long long elapsed,
994 int pdu_len, unsigned char *pdu_buf)
d0ca268b 995{
ab197ca7 996 char *p = fmt_select(act[0], t, elapsed);
2e3e8ded 997
ab197ca7
AB
998 while (*p) {
999 switch (*p) {
1000 case '%': /* Field specifier */
1001 p++;
1002 if (*p == '%')
1003 fprintf(ofp, "%c", *p++);
1004 else if (!*p)
1005 fprintf(ofp, "%c", '%');
1006 else
1007 p = parse_field(act, pci, t, elapsed,
1008 pdu_len, pdu_buf, p);
1009 break;
1010 case '\\': { /* escape */
1011 switch (p[1]) {
1012 case 'b': fprintf(ofp, "\b"); break;
1013 case 'n': fprintf(ofp, "\n"); break;
1014 case 'r': fprintf(ofp, "\r"); break;
1015 case 't': fprintf(ofp, "\t"); break;
1016 default:
1017 fprintf(stderr,
1018 "Invalid escape char in format %c\n",
1019 p[1]);
1020 exit(1);
1021 /*NOTREACHED*/
1022 }
1023 p += 2;
1024 break;
1025 }
1026 default:
1027 fprintf(ofp, "%c", *p++);
1028 break;
1029 }
2e3e8ded 1030 }
ab197ca7
AB
1031}
1032
1033static void log_complete(struct per_cpu_info *pci, struct blk_io_trace *t,
1034 char *act)
1035{
1036 process_fmt(act, pci, t, log_track_complete(t), 0, NULL);
1037}
1038
b6076a9b
JA
1039static void log_insert(struct per_cpu_info *pci, struct blk_io_trace *t,
1040 char *act)
1041{
1042 process_fmt(act, pci, t, log_track_insert(t), 0, NULL);
1043}
1044
ab197ca7
AB
1045static void log_queue(struct per_cpu_info *pci, struct blk_io_trace *t,
1046 char *act)
1047{
b6076a9b 1048 process_fmt(act, pci, t, -1, 0, NULL);
ab197ca7 1049}
2e3e8ded 1050
ab197ca7
AB
1051static void log_issue(struct per_cpu_info *pci, struct blk_io_trace *t,
1052 char *act)
1053{
1054 process_fmt(act, pci, t, log_track_issue(t), 0, NULL);
d0ca268b
JA
1055}
1056
d5396421 1057static void log_merge(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 1058 char *act)
d0ca268b 1059{
a01516de
JA
1060 if (act[0] == 'F')
1061 log_track_frontmerge(t);
2e3e8ded 1062
ab197ca7 1063 process_fmt(act, pci, t, -1ULL, 0, NULL);
d0ca268b
JA
1064}
1065
dfe34da1 1066static void log_action(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 1067 char *act)
dfe34da1 1068{
ab197ca7 1069 process_fmt(act, pci, t, -1ULL, 0, NULL);
dfe34da1
JA
1070}
1071
d5396421 1072static void log_generic(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 1073 char *act)
d0ca268b 1074{
ab197ca7 1075 process_fmt(act, pci, t, -1ULL, 0, NULL);
d0ca268b
JA
1076}
1077
ab197ca7 1078static void log_unplug(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 1079 char *act)
67e14fdc 1080{
ab197ca7 1081 process_fmt(act, pci, t, -1ULL, 0, NULL);
67e14fdc
JA
1082}
1083
93f1c611
JA
1084static void log_split(struct per_cpu_info *pci, struct blk_io_trace *t,
1085 char *act)
1086{
1087 process_fmt(act, pci, t, -1ULL, 0, NULL);
1088}
1089
ab197ca7 1090static void log_pc(struct per_cpu_info *pci, struct blk_io_trace *t, char *act)
d0ca268b 1091{
ab197ca7 1092 unsigned char *buf = (unsigned char *) t + sizeof(*t);
d0ca268b 1093
ab197ca7 1094 process_fmt(act, pci, t, -1ULL, t->pdu_len, buf);
d0ca268b
JA
1095}
1096
ff3a732c 1097static void dump_trace_pc(struct blk_io_trace *t, struct per_cpu_info *pci)
d0ca268b 1098{
56f2af81
JA
1099 int act = t->action & 0xffff;
1100
1101 switch (act) {
d0ca268b 1102 case __BLK_TA_QUEUE:
3639a11e 1103 log_generic(pci, t, "Q");
d0ca268b
JA
1104 break;
1105 case __BLK_TA_GETRQ:
3639a11e 1106 log_generic(pci, t, "G");
d0ca268b
JA
1107 break;
1108 case __BLK_TA_SLEEPRQ:
3639a11e 1109 log_generic(pci, t, "S");
d0ca268b
JA
1110 break;
1111 case __BLK_TA_REQUEUE:
3639a11e 1112 log_generic(pci, t, "R");
d0ca268b
JA
1113 break;
1114 case __BLK_TA_ISSUE:
ab197ca7 1115 log_pc(pci, t, "D");
d0ca268b
JA
1116 break;
1117 case __BLK_TA_COMPLETE:
3639a11e 1118 log_pc(pci, t, "C");
d0ca268b 1119 break;
56f2af81
JA
1120 case __BLK_TA_INSERT:
1121 log_pc(pci, t, "I");
1122 break;
d0ca268b 1123 default:
56f2af81 1124 fprintf(stderr, "Bad pc action %x\n", act);
87b72777 1125 break;
d0ca268b 1126 }
d0ca268b
JA
1127}
1128
d5396421 1129static void dump_trace_fs(struct blk_io_trace *t, struct per_cpu_info *pci)
d0ca268b
JA
1130{
1131 int w = t->action & BLK_TC_ACT(BLK_TC_WRITE);
7997c5b0 1132 int act = t->action & 0xffff;
d0ca268b 1133
7997c5b0 1134 switch (act) {
d0ca268b 1135 case __BLK_TA_QUEUE:
b6076a9b 1136 account_queue(t, pci, w);
3639a11e 1137 log_queue(pci, t, "Q");
d0ca268b 1138 break;
b6076a9b
JA
1139 case __BLK_TA_INSERT:
1140 log_insert(pci, t, "I");
1141 break;
d0ca268b 1142 case __BLK_TA_BACKMERGE:
152f6476 1143 account_m(t, pci, w);
3639a11e 1144 log_merge(pci, t, "M");
d0ca268b
JA
1145 break;
1146 case __BLK_TA_FRONTMERGE:
152f6476 1147 account_m(t, pci, w);
3639a11e 1148 log_merge(pci, t, "F");
d0ca268b
JA
1149 break;
1150 case __BLK_TA_GETRQ:
95c15013 1151 log_track_getrq(t);
3639a11e 1152 log_generic(pci, t, "G");
d0ca268b
JA
1153 break;
1154 case __BLK_TA_SLEEPRQ:
3639a11e 1155 log_generic(pci, t, "S");
d0ca268b
JA
1156 break;
1157 case __BLK_TA_REQUEUE:
152f6476 1158 account_c(t, pci, w, -t->bytes);
3639a11e 1159 log_queue(pci, t, "R");
d0ca268b
JA
1160 break;
1161 case __BLK_TA_ISSUE:
b6076a9b 1162 account_issue(t, pci, w);
3639a11e 1163 log_issue(pci, t, "D");
d0ca268b
JA
1164 break;
1165 case __BLK_TA_COMPLETE:
152f6476 1166 account_c(t, pci, w, t->bytes);
3639a11e 1167 log_complete(pci, t, "C");
d0ca268b 1168 break;
88b1a526 1169 case __BLK_TA_PLUG:
3639a11e 1170 log_action(pci, t, "P");
88b1a526 1171 break;
3639a11e 1172 case __BLK_TA_UNPLUG_IO:
06639b27 1173 account_unplug(t, pci, 0);
3639a11e
JA
1174 log_unplug(pci, t, "U");
1175 break;
1176 case __BLK_TA_UNPLUG_TIMER:
06639b27 1177 account_unplug(t, pci, 1);
3639a11e 1178 log_unplug(pci, t, "UT");
88b1a526 1179 break;
93f1c611
JA
1180 case __BLK_TA_SPLIT:
1181 log_split(pci, t, "X");
1182 break;
1183 case __BLK_TA_BOUNCE:
1184 log_generic(pci, t, "B");
1185 break;
d0ca268b
JA
1186 default:
1187 fprintf(stderr, "Bad fs action %x\n", t->action);
1f79c4a0 1188 break;
d0ca268b 1189 }
d0ca268b
JA
1190}
1191
ff3a732c
JA
1192static void dump_trace(struct blk_io_trace *t, struct per_cpu_info *pci,
1193 struct per_dev_info *pdi)
d0ca268b
JA
1194{
1195 if (t->action & BLK_TC_ACT(BLK_TC_PC))
ff3a732c 1196 dump_trace_pc(t, pci);
d0ca268b 1197 else
d5396421 1198 dump_trace_fs(t, pci);
87b72777 1199
e7c9f3ff 1200 pdi->events++;
d0ca268b
JA
1201}
1202
152f6476 1203static void dump_io_stats(struct io_stats *ios, char *msg)
5c017e4b 1204{
152f6476
JA
1205 fprintf(ofp, "%s\n", msg);
1206
1207 fprintf(ofp, " Reads Queued: %'8lu, %'8LuKiB\t", ios->qreads, ios->qread_kb);
1208 fprintf(ofp, " Writes Queued: %'8lu, %'8LuKiB\n", ios->qwrites,ios->qwrite_kb);
0a6b8fc4 1209
152f6476
JA
1210 fprintf(ofp, " Read Dispatches: %'8lu, %'8LuKiB\t", ios->ireads, ios->iread_kb);
1211 fprintf(ofp, " Write Dispatches: %'8lu, %'8LuKiB\n", ios->iwrites,ios->iwrite_kb);
1212 fprintf(ofp, " Reads Completed: %'8lu, %'8LuKiB\t", ios->creads, ios->cread_kb);
1213 fprintf(ofp, " Writes Completed: %'8lu, %'8LuKiB\n", ios->cwrites,ios->cwrite_kb);
1214 fprintf(ofp, " Read Merges: %'8lu%8c\t", ios->mreads, ' ');
152f6476 1215 fprintf(ofp, " Write Merges: %'8lu\n", ios->mwrites);
06639b27
JA
1216 fprintf(ofp, " IO unplugs: %'8lu%8c\t", ios->io_unplugs, ' ');
1217 fprintf(ofp, " Timer unplugs: %'8lu\n", ios->timer_unplugs);
5c017e4b
JA
1218}
1219
50adc0ba
JA
1220static void dump_wait_stats(struct per_process_info *ppi)
1221{
b9d40d6f
JA
1222 unsigned long rawait = ppi->longest_allocation_wait[0] / 1000;
1223 unsigned long rdwait = ppi->longest_dispatch_wait[0] / 1000;
1224 unsigned long rcwait = ppi->longest_completion_wait[0] / 1000;
1225 unsigned long wawait = ppi->longest_allocation_wait[1] / 1000;
1226 unsigned long wdwait = ppi->longest_dispatch_wait[1] / 1000;
1227 unsigned long wcwait = ppi->longest_completion_wait[1] / 1000;
1228
1229 fprintf(ofp, " Allocation wait: %'8lu%8c\t", rawait, ' ');
1230 fprintf(ofp, " Allocation wait: %'8lu\n", wawait);
1231 fprintf(ofp, " Dispatch wait: %'8lu%8c\t", rdwait, ' ');
1232 fprintf(ofp, " Dispatch wait: %'8lu\n", wdwait);
1233 fprintf(ofp, " Completion wait: %'8lu%8c\t", rcwait, ' ');
1234 fprintf(ofp, " Completion wait: %'8lu\n", wcwait);
50adc0ba
JA
1235}
1236
886ecf0e
JA
1237static int ppi_name_compare(const void *p1, const void *p2)
1238{
1239 struct per_process_info *ppi1 = *((struct per_process_info **) p1);
1240 struct per_process_info *ppi2 = *((struct per_process_info **) p2);
1241 int res;
1242
1243 res = strverscmp(ppi1->name, ppi2->name);
1244 if (!res)
06e6f286 1245 res = ppi1->pid > ppi2->pid;
886ecf0e
JA
1246
1247 return res;
1248}
1249
1250static void sort_process_list(void)
1251{
1252 struct per_process_info **ppis;
1253 struct per_process_info *ppi;
1254 int i = 0;
1255
1256 ppis = malloc(ppi_list_entries * sizeof(struct per_process_info *));
1257
1258 ppi = ppi_list;
1259 while (ppi) {
06e6f286 1260 ppis[i++] = ppi;
886ecf0e
JA
1261 ppi = ppi->list_next;
1262 }
1263
06e6f286 1264 qsort(ppis, ppi_list_entries, sizeof(ppi), ppi_name_compare);
886ecf0e
JA
1265
1266 i = ppi_list_entries - 1;
1267 ppi_list = NULL;
1268 while (i >= 0) {
1269 ppi = ppis[i];
1270
1271 ppi->list_next = ppi_list;
1272 ppi_list = ppi;
1273 i--;
1274 }
50c38702
JA
1275
1276 free(ppis);
886ecf0e
JA
1277}
1278
152f6476
JA
1279static void show_process_stats(void)
1280{
1281 struct per_process_info *ppi;
1282
886ecf0e
JA
1283 sort_process_list();
1284
152f6476
JA
1285 ppi = ppi_list;
1286 while (ppi) {
ce8b6b4f
JA
1287 char name[64];
1288
1289 snprintf(name, sizeof(name)-1, "%s (%u)", ppi->name, ppi->pid);
1290 dump_io_stats(&ppi->io_stats, name);
50adc0ba 1291 dump_wait_stats(ppi);
152f6476
JA
1292 ppi = ppi->list_next;
1293 }
1294
1295 fprintf(ofp, "\n");
1296}
1297
e7c9f3ff 1298static void show_device_and_cpu_stats(void)
d0ca268b 1299{
e7c9f3ff
NS
1300 struct per_dev_info *pdi;
1301 struct per_cpu_info *pci;
1302 struct io_stats total, *ios;
1303 int i, j, pci_events;
1304 char line[3 + 8/*cpu*/ + 2 + 32/*dev*/ + 3];
1305 char name[32];
1306
1307 for (pdi = devices, i = 0; i < ndevices; i++, pdi++) {
1308
1309 memset(&total, 0, sizeof(total));
1310 pci_events = 0;
1311
1312 if (i > 0)
1313 fprintf(ofp, "\n");
1314
1315 for (pci = pdi->cpus, j = 0; j < pdi->ncpus; j++, pci++) {
1316 if (!pci->nelems)
1317 continue;
1318
1319 ios = &pci->io_stats;
1320 total.qreads += ios->qreads;
1321 total.qwrites += ios->qwrites;
1322 total.creads += ios->creads;
1323 total.cwrites += ios->cwrites;
1324 total.mreads += ios->mreads;
1325 total.mwrites += ios->mwrites;
1326 total.ireads += ios->ireads;
1327 total.iwrites += ios->iwrites;
1328 total.qread_kb += ios->qread_kb;
1329 total.qwrite_kb += ios->qwrite_kb;
1330 total.cread_kb += ios->cread_kb;
1331 total.cwrite_kb += ios->cwrite_kb;
1332 total.iread_kb += ios->iread_kb;
1333 total.iwrite_kb += ios->iwrite_kb;
06639b27
JA
1334 total.timer_unplugs += ios->timer_unplugs;
1335 total.io_unplugs += ios->io_unplugs;
e7c9f3ff
NS
1336
1337 snprintf(line, sizeof(line) - 1, "CPU%d (%s):",
1338 j, get_dev_name(pdi, name, sizeof(name)));
1339 dump_io_stats(ios, line);
1340 pci_events++;
1341 }
5c017e4b 1342
e7c9f3ff
NS
1343 if (pci_events > 1) {
1344 fprintf(ofp, "\n");
1345 snprintf(line, sizeof(line) - 1, "Total (%s):",
1346 get_dev_name(pdi, name, sizeof(name)));
1347 dump_io_stats(&total, line);
1348 }
d0ca268b 1349
0aa8a62d
JA
1350 fprintf(ofp, "\nEvents (%s): %'Lu entries, %'lu skips\n",
1351 get_dev_name(pdi, line, sizeof(line)), pdi->events,
1352 pdi->skips);
e7c9f3ff 1353 }
d0ca268b
JA
1354}
1355
cb2a1a62 1356static struct blk_io_trace *find_trace(void *p, unsigned long offset)
2ff323b0 1357{
cb2a1a62 1358 unsigned long max_offset = offset;
2ff323b0
JA
1359 unsigned long off;
1360 struct blk_io_trace *bit;
1361 __u32 magic;
1362
1363 for (off = 0; off < max_offset; off++) {
1364 bit = p + off;
1365
1366 magic = be32_to_cpu(bit->magic);
1367 if ((magic & 0xffffff00) == BLK_IO_TRACE_MAGIC)
1368 return bit;
1369 }
1370
1371 return NULL;
1372}
1373
cb2a1a62 1374static int sort_entries(void)
8fc0abbc 1375{
e7c9f3ff 1376 struct per_dev_info *pdi;
412819ce 1377 struct per_cpu_info *pci;
8fc0abbc
JA
1378 struct blk_io_trace *bit;
1379 struct trace *t;
cb2a1a62 1380 int nr = 0;
8fc0abbc 1381
cb2a1a62 1382 while ((t = trace_list) != NULL) {
412819ce 1383
cb2a1a62
JA
1384 trace_list = t->next;
1385 bit = t->bit;
6fe4709e 1386
8fc0abbc
JA
1387 memset(&t->rb_node, 0, sizeof(t->rb_node));
1388
cb2a1a62 1389 if (verify_trace(bit))
66fa7233 1390 break;
cb2a1a62
JA
1391 if (trace_rb_insert(t))
1392 return -1;
66fa7233 1393
cb2a1a62
JA
1394 pdi = get_dev_info(bit->device);
1395 pci = get_cpu_info(pdi, bit->cpu);
412819ce
JA
1396 pci->nelems++;
1397
cb2a1a62 1398 nr++;
6fe4709e 1399 }
8fc0abbc 1400
cb2a1a62 1401 return nr;
412819ce
JA
1402}
1403
f1327cc5 1404static void show_entries_rb(int piped)
8fc0abbc 1405{
e7c9f3ff 1406 struct per_dev_info *pdi;
8fc0abbc 1407 struct blk_io_trace *bit;
3aabcd89 1408 struct rb_node *n;
8fc0abbc
JA
1409 struct trace *t;
1410 int cpu;
1411
cb2a1a62 1412 while ((n = rb_first(&rb_sort_root)) != NULL) {
8fc0abbc
JA
1413
1414 t = rb_entry(n, struct trace, rb_node);
1415 bit = t->bit;
1416
cb2a1a62 1417 pdi = get_dev_info(bit->device);
e7c9f3ff
NS
1418 if (!pdi) {
1419 fprintf(stderr, "Unknown device ID? (%d,%d)\n",
1420 MAJOR(bit->device), MINOR(bit->device));
1421 break;
1422 }
d5396421 1423 cpu = bit->cpu;
e7c9f3ff
NS
1424 if (cpu > pdi->ncpus) {
1425 fprintf(stderr, "Unknown CPU ID? (%d, device %d,%d)\n",
1426 cpu, MAJOR(bit->device), MINOR(bit->device));
87b72777 1427 break;
8fc0abbc
JA
1428 }
1429
cb2a1a62
JA
1430 /*
1431 * back off displaying more info if we are out of sync
1432 * on SMP systems. to prevent stalling on lost events,
0aa8a62d 1433 * only allow an event to us a few times
cb2a1a62 1434 */
1b928247
JA
1435 if (bit->sequence != (pdi->last_sequence + 1)
1436 && pdi->last_sequence != -1) {
a3983763 1437 if (piped && t->skipped < 5) {
0aa8a62d 1438 t->skipped++;
cb2a1a62 1439 break;
0aa8a62d
JA
1440 } else {
1441 fprintf(stderr, "skipping from %lu to %u\n", pdi->last_sequence, bit->sequence);
1442 pdi->skips++;
cb2a1a62
JA
1443 }
1444 }
1445
1446 pdi->last_sequence = bit->sequence;
1447
cfab07eb 1448 bit->time -= genesis_time;
46e6968b
NS
1449 if (bit->time >= stopwatch_end)
1450 break;
8fc0abbc 1451
1b928247
JA
1452 if (bit->time >= stopwatch_start) {
1453 check_time(pdi, bit);
8fc0abbc 1454
1b928247
JA
1455 dump_trace(bit, &pdi->cpus[cpu], pdi);
1456 }
87b72777 1457
cb2a1a62 1458 rb_erase(&t->rb_node, &rb_sort_root);
e8741a4a 1459
f1327cc5 1460 if (piped) {
e8741a4a
JA
1461 free(bit);
1462 free(t);
1463 }
cb2a1a62 1464 }
8fc0abbc
JA
1465}
1466
1f79c4a0
JA
1467static int read_data(int fd, void *buffer, int bytes, int block)
1468{
1469 int ret, bytes_left, fl;
1470 void *p;
1471
1472 fl = fcntl(fd, F_GETFL);
1473
1474 if (!block)
1475 fcntl(fd, F_SETFL, fl | O_NONBLOCK);
1476 else
1477 fcntl(fd, F_SETFL, fl & ~O_NONBLOCK);
1478
1479 bytes_left = bytes;
1480 p = buffer;
1481 while (bytes_left > 0) {
1482 ret = read(fd, p, bytes_left);
1483 if (!ret)
1484 return 1;
1485 else if (ret < 0) {
1486 if (errno != EAGAIN)
1487 perror("read");
1488 return -1;
1489 } else {
1490 p += ret;
1491 bytes_left -= ret;
1492 }
1493 }
1494
1495 return 0;
1496}
1497
cb2a1a62
JA
1498/*
1499 * Find the traces in 'tb' and add them to the list for sorting and
1500 * displaying
1501 */
1502static int find_entries(void *tb, unsigned long size)
1503{
1504 struct blk_io_trace *bit;
1505 struct trace *t;
1506 void *start = tb;
1507
1508 while (tb - start <= size - sizeof(*bit)) {
1509 bit = find_trace(tb, size - (tb - start));
1510 if (!bit)
1511 break;
1512
1513 t = malloc(sizeof(*t));
1514 memset(t, 0, sizeof(*t));
1515 t->bit = bit;
1516
f1327cc5 1517 trace_to_cpu(bit);
cb2a1a62
JA
1518 t->next = trace_list;
1519 trace_list = t;
1520
1521 tb += sizeof(*bit) + bit->pdu_len;
1522 }
1523
1524 return 0;
1525}
1526
d5396421 1527static int do_file(void)
d0ca268b 1528{
cb2a1a62 1529 int i, j, nfiles = 0, nelems;
d0ca268b 1530
e8741a4a 1531 for (i = 0; i < ndevices; i++) {
e7c9f3ff 1532 for (j = 0;; j++, nfiles++) {
e8741a4a 1533 struct per_dev_info *pdi;
e7c9f3ff
NS
1534 struct per_cpu_info *pci;
1535 struct stat st;
1536 void *tb;
87b72777 1537
e8741a4a 1538 pdi = &devices[i];
1b928247 1539 pdi->last_sequence = -1;
e7c9f3ff
NS
1540 pci = get_cpu_info(pdi, j);
1541 pci->cpu = j;
d0ca268b 1542
e7c9f3ff 1543 snprintf(pci->fname, sizeof(pci->fname)-1,
9f6486bd 1544 "%s.blktrace.%d", pdi->name, j);
e7c9f3ff
NS
1545 if (stat(pci->fname, &st) < 0)
1546 break;
1547 if (!st.st_size)
1548 continue;
1549
1550 printf("Processing %s\n", pci->fname);
1551
1552 tb = malloc(st.st_size);
1553 if (!tb) {
1554 fprintf(stderr, "Out of memory, skip file %s\n",
1555 pci->fname);
1556 continue;
1557 }
1558
1559 pci->fd = open(pci->fname, O_RDONLY);
1560 if (pci->fd < 0) {
1561 perror(pci->fname);
1562 free(tb);
1563 continue;
1564 }
1565
1566 if (read_data(pci->fd, tb, st.st_size, 1)) {
1567 close(pci->fd);
1568 free(tb);
1569 continue;
1570 }
1571
cb2a1a62
JA
1572 if (find_entries(tb, st.st_size)) {
1573 close(pci->fd);
1574 free(tb);
1575 }
1576
1577 nelems = sort_entries();
1578 if (nelems == -1) {
e7c9f3ff
NS
1579 close(pci->fd);
1580 free(tb);
1581 continue;
1582 }
1583
1584 printf("Completed %s (CPU%d %d, entries)\n",
cb2a1a62 1585 pci->fname, j, nelems);
e7c9f3ff 1586 close(pci->fd);
d0ca268b 1587 }
d5396421
JA
1588 }
1589
1590 if (!nfiles) {
1591 fprintf(stderr, "No files found\n");
1592 return 1;
1593 }
1594
e8741a4a 1595 show_entries_rb(0);
d5396421
JA
1596 return 0;
1597}
1598
cb2a1a62 1599static int read_sort_events(int fd)
d5396421 1600{
cb2a1a62 1601 int events = 0;
d5396421 1602
412819ce 1603 do {
cb2a1a62
JA
1604 struct blk_io_trace *bit;
1605 struct trace *t;
412819ce 1606 int pdu_len;
51128a28 1607 __u32 magic;
d5396421 1608
cb2a1a62 1609 bit = malloc(sizeof(*bit));
d5396421 1610
cb2a1a62 1611 if (read_data(fd, bit, sizeof(*bit), !events))
c80c18a7 1612 break;
d5396421 1613
cb2a1a62 1614 magic = be32_to_cpu(bit->magic);
51128a28
JA
1615 if ((magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
1616 fprintf(stderr, "Bad magic %x\n", magic);
1617 break;
1618 }
1619
cb2a1a62 1620 pdu_len = be16_to_cpu(bit->pdu_len);
2ff323b0 1621 if (pdu_len) {
cb2a1a62 1622 void *ptr = realloc(bit, sizeof(*bit) + pdu_len);
d5396421 1623
cb2a1a62 1624 if (read_data(fd, ptr + sizeof(*bit), pdu_len, 1))
2ff323b0 1625 break;
d5396421 1626
cb2a1a62 1627 bit = ptr;
2ff323b0 1628 }
d5396421 1629
cb2a1a62
JA
1630 t = malloc(sizeof(*t));
1631 memset(t, 0, sizeof(*t));
1632 t->bit = bit;
f1327cc5
JA
1633
1634 trace_to_cpu(bit);
cb2a1a62
JA
1635 t->next = trace_list;
1636 trace_list = t;
1637
412819ce 1638 events++;
79f19470 1639 } while (!is_done() && events < rb_batch);
d5396421 1640
412819ce
JA
1641 return events;
1642}
d5396421 1643
412819ce
JA
1644static int do_stdin(void)
1645{
1646 int fd;
d5396421 1647
1f79c4a0 1648 fd = dup(STDIN_FILENO);
412819ce
JA
1649 do {
1650 int events;
d5396421 1651
cb2a1a62 1652 events = read_sort_events(fd);
412819ce
JA
1653 if (!events)
1654 break;
1655
cb2a1a62 1656 if (sort_entries() == -1)
2ff323b0
JA
1657 break;
1658
e8741a4a 1659 show_entries_rb(1);
d5396421
JA
1660 } while (1);
1661
1662 close(fd);
d5396421
JA
1663 return 0;
1664}
d0ca268b 1665
1f79c4a0 1666static void flush_output(void)
412819ce 1667{
152f6476 1668 fflush(ofp);
412819ce
JA
1669}
1670
1f79c4a0 1671static void handle_sigint(int sig)
412819ce
JA
1672{
1673 done = 1;
1674 flush_output();
1675}
1676
46e6968b
NS
1677/*
1678 * Extract start and duration times from a string, allowing
1679 * us to specify a time interval of interest within a trace.
1680 * Format: "duration" (start is zero) or "start:duration".
1681 */
1682static int find_stopwatch_interval(char *string)
1683{
1684 double value;
1685 char *sp;
1686
1687 value = strtod(string, &sp);
1688 if (sp == string) {
1689 fprintf(stderr,"Invalid stopwatch timer: %s\n", string);
1690 return 1;
1691 }
1692 if (*sp == ':') {
1693 stopwatch_start = DOUBLE_TO_NANO_ULL(value);
1694 string = sp + 1;
1695 value = strtod(string, &sp);
1696 if (sp == string || *sp != '\0') {
1697 fprintf(stderr,"Invalid stopwatch duration time: %s\n",
1698 string);
1699 return 1;
1700 }
1701 } else if (*sp != '\0') {
1702 fprintf(stderr,"Invalid stopwatch start timer: %s\n", string);
1703 return 1;
1704 }
1b928247
JA
1705 stopwatch_end = DOUBLE_TO_NANO_ULL(value);
1706 if (stopwatch_end <= stopwatch_start) {
1707 fprintf(stderr, "Invalid stopwatch interval: %Lu -> %Lu\n",
1708 stopwatch_start, stopwatch_end);
1709 return 1;
1710 }
1711
46e6968b
NS
1712 return 0;
1713}
1714
1f79c4a0
JA
1715static void usage(char *prog)
1716{
46e6968b
NS
1717 fprintf(stderr, "Usage: %s "
1718 "[-i <name>] [-o <output>] [-s] [-w N[:n]] <name>...\n",
1719 prog);
1f79c4a0
JA
1720}
1721
d5396421
JA
1722int main(int argc, char *argv[])
1723{
152f6476 1724 char *ofp_buffer;
a66877e6 1725 int c, ret, mode;
1e1c60f1 1726 int per_device_and_cpu_stats = 1;
d5396421
JA
1727
1728 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) != -1) {
1729 switch (c) {
1730 case 'i':
e7c9f3ff
NS
1731 if (!strcmp(optarg, "-") && !pipeline)
1732 pipeline = 1;
1733 else if (resize_devices(optarg) != 0)
1734 return 1;
d5396421
JA
1735 break;
1736 case 'o':
66efebf8 1737 output_name = optarg;
d5396421 1738 break;
79f19470
JA
1739 case 'b':
1740 rb_batch = atoi(optarg);
1741 if (rb_batch <= 0)
1742 rb_batch = RB_BATCH_DEFAULT;
1743 break;
152f6476
JA
1744 case 's':
1745 per_process_stats = 1;
1746 break;
7997c5b0
JA
1747 case 't':
1748 track_ios = 1;
1749 break;
1e1c60f1
NS
1750 case 'q':
1751 per_device_and_cpu_stats = 0;
1752 break;
46e6968b
NS
1753 case 'w':
1754 if (find_stopwatch_interval(optarg) != 0)
1755 return 1;
1756 break;
ab197ca7
AB
1757 case 'f':
1758 set_all_format_specs(optarg);
1759 break;
1760 case 'F':
1761 if (add_format_spec(optarg) != 0)
1762 return 1;
1763 break;
d5396421 1764 default:
1f79c4a0 1765 usage(argv[0]);
d5396421
JA
1766 return 1;
1767 }
d0ca268b
JA
1768 }
1769
e7c9f3ff
NS
1770 while (optind < argc) {
1771 if (!strcmp(argv[optind], "-") && !pipeline)
1772 pipeline = 1;
1773 else if (resize_devices(argv[optind]) != 0)
1774 return 1;
1775 optind++;
1776 }
1777
1778 if (!pipeline && !ndevices) {
1f79c4a0 1779 usage(argv[0]);
d5396421
JA
1780 return 1;
1781 }
1782
7997c5b0
JA
1783 memset(&rb_sort_root, 0, sizeof(rb_sort_root));
1784 memset(&rb_track_root, 0, sizeof(rb_track_root));
412819ce
JA
1785
1786 signal(SIGINT, handle_sigint);
1787 signal(SIGHUP, handle_sigint);
1788 signal(SIGTERM, handle_sigint);
d5396421 1789
d69db225
JA
1790 setlocale(LC_NUMERIC, "en_US");
1791
a66877e6 1792 if (!output_name) {
152f6476 1793 ofp = fdopen(STDOUT_FILENO, "w");
a66877e6
JA
1794 mode = _IOLBF;
1795 } else {
152f6476
JA
1796 char ofname[128];
1797
1798 snprintf(ofname, sizeof(ofname) - 1, "%s.log", output_name);
1799 ofp = fopen(ofname, "w");
a66877e6 1800 mode = _IOFBF;
152f6476
JA
1801 }
1802
1803 if (!ofp) {
1804 perror("fopen");
1805 return 1;
1806 }
1807
1808 ofp_buffer = malloc(4096);
a66877e6 1809 if (setvbuf(ofp, ofp_buffer, mode, 4096)) {
152f6476
JA
1810 perror("setvbuf");
1811 return 1;
1812 }
1813
e7c9f3ff 1814 if (pipeline)
d5396421
JA
1815 ret = do_stdin();
1816 else
1817 ret = do_file();
1818
152f6476
JA
1819 if (per_process_stats)
1820 show_process_stats();
1821
1e1c60f1
NS
1822 if (per_device_and_cpu_stats)
1823 show_device_and_cpu_stats();
152f6476 1824
412819ce 1825 flush_output();
d5396421 1826 return ret;
d0ca268b 1827}