[PATCH] blkparse: sort the processes by name in stat output
[blktrace.git] / blkparse.c
CommitLineData
d956a2cd
JA
1/*
2 * block queue tracing parse application
3 *
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
20 */
d0ca268b
JA
21#include <sys/types.h>
22#include <sys/stat.h>
23#include <unistd.h>
24#include <stdio.h>
25#include <fcntl.h>
26#include <stdlib.h>
8fc0abbc 27#include <string.h>
d5396421 28#include <getopt.h>
412819ce
JA
29#include <errno.h>
30#include <signal.h>
d69db225 31#include <locale.h>
46e6968b 32#include <limits.h>
ab197ca7 33#include <ctype.h>
d0ca268b 34
8fc0abbc
JA
35#include "blktrace.h"
36#include "rbtree.h"
d0ca268b 37
2e3e8ded
JA
38#define SECONDS(x) ((unsigned long long)(x) / 1000000000)
39#define NANO_SECONDS(x) ((unsigned long long)(x) % 1000000000)
46e6968b 40#define DOUBLE_TO_NANO_ULL(d) ((unsigned long long)((d) * 1000000000))
cfab07eb 41
e7c9f3ff
NS
42#define MINORBITS 20
43#define MINORMASK ((1U << MINORBITS) - 1)
44#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS))
45#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK))
46
47#define min(a, b) ((a) < (b) ? (a) : (b))
d5396421 48
ab197ca7
AB
49#define HEADER "%D %2c %8s %5T.%9t %5p %2a %3d "
50
152f6476
JA
51struct io_stats {
52 unsigned long qreads, qwrites, creads, cwrites, mreads, mwrites;
53 unsigned long ireads, iwrites;
54 unsigned long long qread_kb, qwrite_kb, cread_kb, cwrite_kb;
55 unsigned long long iread_kb, iwrite_kb;
06639b27 56 unsigned long io_unplugs, timer_unplugs;
152f6476
JA
57};
58
d5396421 59struct per_cpu_info {
d0ca268b
JA
60 int cpu;
61 int nelems;
d0ca268b
JA
62
63 int fd;
87b72777 64 char fname[128];
d0ca268b 65
152f6476
JA
66 struct io_stats io_stats;
67};
8fc0abbc 68
e7c9f3ff
NS
69struct per_dev_info {
70 dev_t id;
71 char *name;
72
73 int backwards;
74 unsigned long long events;
75 unsigned long long last_reported_time;
76 struct io_stats io_stats;
cb2a1a62 77 unsigned long last_sequence;
0aa8a62d 78 unsigned long skips;
e7c9f3ff
NS
79
80 int ncpus;
81 struct per_cpu_info *cpus;
82};
83
152f6476
JA
84struct per_process_info {
85 char name[16];
86 __u32 pid;
87 struct io_stats io_stats;
88 struct per_process_info *hash_next, *list_next;
50adc0ba
JA
89
90 /*
91 * individual io stats
92 */
b9d40d6f
JA
93 unsigned long long longest_allocation_wait[2];
94 unsigned long long longest_dispatch_wait[2];
95 unsigned long long longest_completion_wait[2];
d0ca268b
JA
96};
97
152f6476
JA
98#define PPI_HASH_SHIFT (8)
99static struct per_process_info *ppi_hash[1 << PPI_HASH_SHIFT];
100static struct per_process_info *ppi_list;
886ecf0e 101static int ppi_list_entries;
152f6476 102
ab197ca7 103#define S_OPTS "i:o:b:stqw:f:F:"
d5396421
JA
104static struct option l_opts[] = {
105 {
106 .name = "input",
428683db 107 .has_arg = required_argument,
d5396421
JA
108 .flag = NULL,
109 .val = 'i'
110 },
111 {
112 .name = "output",
428683db 113 .has_arg = required_argument,
d5396421
JA
114 .flag = NULL,
115 .val = 'o'
116 },
79f19470
JA
117 {
118 .name = "batch",
428683db 119 .has_arg = required_argument,
79f19470
JA
120 .flag = NULL,
121 .val = 'b'
122 },
152f6476
JA
123 {
124 .name = "per program stats",
428683db 125 .has_arg = no_argument,
152f6476
JA
126 .flag = NULL,
127 .val = 's'
128 },
7997c5b0
JA
129 {
130 .name = "track ios",
428683db 131 .has_arg = no_argument,
7997c5b0
JA
132 .flag = NULL,
133 .val = 't'
134 },
1e1c60f1
NS
135 {
136 .name = "quiet",
428683db 137 .has_arg = no_argument,
1e1c60f1
NS
138 .flag = NULL,
139 .val = 'q'
140 },
46e6968b
NS
141 {
142 .name = "stopwatch",
428683db 143 .has_arg = required_argument,
46e6968b
NS
144 .flag = NULL,
145 .val = 'w'
146 },
ab197ca7
AB
147 {
148 .name = "format",
428683db 149 .has_arg = required_argument,
ab197ca7
AB
150 .flag = NULL,
151 .val = 'f'
152 },
153 {
154 .name = "format-spec",
428683db 155 .has_arg = required_argument,
ab197ca7
AB
156 .flag = NULL,
157 .val = 'F'
158 },
d5396421
JA
159};
160
7997c5b0
JA
161/*
162 * for sorting the displayed output
163 */
8fc0abbc
JA
164struct trace {
165 struct blk_io_trace *bit;
166 struct rb_node rb_node;
cb2a1a62
JA
167 struct trace *next;
168 int skipped;
8fc0abbc
JA
169};
170
cb2a1a62
JA
171static struct rb_root rb_sort_root;
172static struct rb_root rb_track_root;
173
174static struct trace *trace_list;
175
7997c5b0
JA
176/*
177 * for tracking individual ios
178 */
179struct io_track {
180 struct rb_node rb_node;
181
e7c9f3ff 182 dev_t device;
7997c5b0
JA
183 __u64 sector;
184 __u32 pid;
95c15013 185 unsigned long long allocation_time;
7997c5b0
JA
186 unsigned long long queue_time;
187 unsigned long long dispatch_time;
188 unsigned long long completion_time;
189};
190
e7c9f3ff
NS
191static int ndevices;
192static struct per_dev_info *devices;
193static char *get_dev_name(struct per_dev_info *, char *, int);
d0ca268b 194
152f6476 195static FILE *ofp;
e7c9f3ff
NS
196static char *output_name;
197
198static unsigned long long genesis_time;
46e6968b
NS
199static unsigned long long stopwatch_start; /* start from zero by default */
200static unsigned long long stopwatch_end = ULONG_LONG_MAX; /* "infinity" */
152f6476
JA
201
202static int per_process_stats;
7997c5b0 203static int track_ios;
d0ca268b 204
79f19470
JA
205#define RB_BATCH_DEFAULT (1024)
206static int rb_batch = RB_BATCH_DEFAULT;
207
e7c9f3ff
NS
208static int pipeline;
209
412819ce
JA
210#define is_done() (*(volatile int *)(&done))
211static volatile int done;
212
152f6476
JA
213static inline unsigned long hash_long(unsigned long val)
214{
16ef714e
JA
215#if __WORDSIZE == 32
216 val *= 0x9e370001UL;
217#elif __WORDSIZE == 64
218 val *= 0x9e37fffffffc0001UL;
219#else
220#error unknown word size
221#endif
222
223 return val >> (__WORDSIZE - PPI_HASH_SHIFT);
152f6476
JA
224}
225
226static inline void add_process_to_hash(struct per_process_info *ppi)
227{
228 const int hash_idx = hash_long(ppi->pid);
229
230 ppi->hash_next = ppi_hash[hash_idx];
231 ppi_hash[hash_idx] = ppi;
232}
233
234static inline void add_process_to_list(struct per_process_info *ppi)
235{
236 ppi->list_next = ppi_list;
237 ppi_list = ppi;
886ecf0e 238 ppi_list_entries++;
152f6476
JA
239}
240
241static struct per_process_info *find_process_by_pid(__u32 pid)
242{
243 const int hash_idx = hash_long(pid);
244 struct per_process_info *ppi;
245
246 ppi = ppi_hash[hash_idx];
247 while (ppi) {
248 if (ppi->pid == pid)
249 return ppi;
250
251 ppi = ppi->hash_next;
252 }
253
254 return NULL;
255}
256
7997c5b0
JA
257static inline int trace_rb_insert(struct trace *t)
258{
259 struct rb_node **p = &rb_sort_root.rb_node;
260 struct rb_node *parent = NULL;
261 struct trace *__t;
262
e7c9f3ff
NS
263 if (genesis_time == 0 || t->bit->time < genesis_time)
264 genesis_time = t->bit->time;
265
7997c5b0
JA
266 while (*p) {
267 parent = *p;
268 __t = rb_entry(parent, struct trace, rb_node);
269
e7c9f3ff
NS
270 if (t->bit->time < __t->bit->time)
271 p = &(*p)->rb_left;
272 else if (t->bit->time > __t->bit->time)
273 p = &(*p)->rb_right;
274 else if (t->bit->device < __t->bit->device)
275 p = &(*p)->rb_left;
276 else if (t->bit->device > __t->bit->device)
277 p = &(*p)->rb_right;
278 else if (t->bit->sequence < __t->bit->sequence)
7997c5b0
JA
279 p = &(*p)->rb_left;
280 else if (t->bit->sequence > __t->bit->sequence)
281 p = &(*p)->rb_right;
e7c9f3ff
NS
282 else if (t->bit->device == __t->bit->device) {
283 fprintf(stderr,
284 "sequence alias (%d) on device %d,%d!\n",
285 t->bit->sequence,
286 MAJOR(t->bit->device), MINOR(t->bit->device));
7997c5b0
JA
287 return 1;
288 }
289 }
290
291 rb_link_node(&t->rb_node, parent, p);
292 rb_insert_color(&t->rb_node, &rb_sort_root);
293 return 0;
294}
295
296static inline int track_rb_insert(struct io_track *iot)
297{
298 struct rb_node **p = &rb_track_root.rb_node;
299 struct rb_node *parent = NULL;
300 struct io_track *__iot;
301
302 while (*p) {
303 parent = *p;
e7c9f3ff 304
7997c5b0
JA
305 __iot = rb_entry(parent, struct io_track, rb_node);
306
e7c9f3ff
NS
307 if (iot->device < __iot->device)
308 p = &(*p)->rb_left;
309 else if (iot->device > __iot->device)
310 p = &(*p)->rb_right;
311 else if (iot->sector < __iot->sector)
7997c5b0
JA
312 p = &(*p)->rb_left;
313 else if (iot->sector > __iot->sector)
314 p = &(*p)->rb_right;
315 else {
e7c9f3ff 316 fprintf(stderr,
ab197ca7
AB
317 "sector alias (%Lu) on device %d,%d!\n",
318 (unsigned long long) iot->sector,
e7c9f3ff 319 MAJOR(iot->device), MINOR(iot->device));
7997c5b0
JA
320 return 1;
321 }
322 }
323
324 rb_link_node(&iot->rb_node, parent, p);
325 rb_insert_color(&iot->rb_node, &rb_track_root);
326 return 0;
327}
328
e7c9f3ff 329static struct io_track *__find_track(dev_t device, __u64 sector)
7997c5b0
JA
330{
331 struct rb_node **p = &rb_track_root.rb_node;
332 struct rb_node *parent = NULL;
333 struct io_track *__iot;
334
335 while (*p) {
336 parent = *p;
337
338 __iot = rb_entry(parent, struct io_track, rb_node);
339
e7c9f3ff
NS
340 if (device < __iot->device)
341 p = &(*p)->rb_left;
342 else if (device > __iot->device)
343 p = &(*p)->rb_right;
344 else if (sector < __iot->sector)
7997c5b0
JA
345 p = &(*p)->rb_left;
346 else if (sector > __iot->sector)
347 p = &(*p)->rb_right;
348 else
349 return __iot;
350 }
351
352 return NULL;
353}
354
e7c9f3ff 355static struct io_track *find_track(__u32 pid, dev_t device, __u64 sector)
7997c5b0 356{
916b5501 357 struct io_track *iot;
7997c5b0 358
e7c9f3ff 359 iot = __find_track(device, sector);
7997c5b0
JA
360 if (!iot) {
361 iot = malloc(sizeof(*iot));
50adc0ba 362 iot->pid = pid;
e7c9f3ff 363 iot->device = device;
7997c5b0
JA
364 iot->sector = sector;
365 track_rb_insert(iot);
366 }
367
368 return iot;
369}
370
a01516de 371static void log_track_frontmerge(struct blk_io_trace *t)
2e3e8ded
JA
372{
373 struct io_track *iot;
374
375 if (!track_ios)
376 return;
2e3e8ded 377
a01516de 378 iot = __find_track(t->device, t->sector + (t->bytes >> 9));
cb2a1a62
JA
379 if (!iot) {
380 fprintf(stderr, "failed to find mergeable event\n");
381 return;
2e3e8ded 382 }
cb2a1a62
JA
383
384 rb_erase(&iot->rb_node, &rb_track_root);
a01516de 385 iot->sector -= t->bytes >> 9;
cb2a1a62 386 track_rb_insert(iot);
2e3e8ded
JA
387}
388
95c15013 389static void log_track_getrq(struct blk_io_trace *t)
2e3e8ded
JA
390{
391 struct io_track *iot;
392
393 if (!track_ios)
394 return;
395
e7c9f3ff 396 iot = find_track(t->pid, t->device, t->sector);
95c15013
JA
397 iot->allocation_time = t->time;
398}
399
400
401/*
402 * return time between rq allocation and queue
403 */
404static unsigned long long log_track_queue(struct blk_io_trace *t)
405{
50adc0ba 406 unsigned long long elapsed;
95c15013
JA
407 struct io_track *iot;
408
409 if (!track_ios)
410 return -1;
411
e7c9f3ff 412 iot = find_track(t->pid, t->device, t->sector);
2e3e8ded 413 iot->queue_time = t->time;
50adc0ba
JA
414 elapsed = iot->queue_time - iot->allocation_time;
415
416 if (per_process_stats) {
417 struct per_process_info *ppi = find_process_by_pid(iot->pid);
b9d40d6f 418 int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
50adc0ba 419
b9d40d6f
JA
420 if (ppi && elapsed > ppi->longest_allocation_wait[w])
421 ppi->longest_allocation_wait[w] = elapsed;
50adc0ba
JA
422 }
423
424 return elapsed;
2e3e8ded
JA
425}
426
427/*
428 * return time between queue and issue
429 */
430static unsigned long long log_track_issue(struct blk_io_trace *t)
431{
50adc0ba 432 unsigned long long elapsed;
2e3e8ded
JA
433 struct io_track *iot;
434
435 if (!track_ios)
436 return -1;
437 if ((t->action & BLK_TC_ACT(BLK_TC_FS)) == 0)
438 return -1;
439
e7c9f3ff 440 iot = __find_track(t->device, t->sector);
cb2a1a62
JA
441 if (!iot) {
442 fprintf(stderr, "failed to find issue event\n");
2e3e8ded 443 return -1;
cb2a1a62 444 }
2e3e8ded
JA
445
446 iot->dispatch_time = t->time;
50adc0ba
JA
447 elapsed = iot->dispatch_time - iot->queue_time;
448
449 if (per_process_stats) {
450 struct per_process_info *ppi = find_process_by_pid(iot->pid);
b9d40d6f 451 int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
50adc0ba 452
b9d40d6f
JA
453 if (ppi && elapsed > ppi->longest_dispatch_wait[w])
454 ppi->longest_dispatch_wait[w] = elapsed;
50adc0ba
JA
455 }
456
457 return elapsed;
2e3e8ded
JA
458}
459
460/*
461 * return time between dispatch and complete
462 */
463static unsigned long long log_track_complete(struct blk_io_trace *t)
464{
465 unsigned long long elapsed;
466 struct io_track *iot;
467
468 if (!track_ios)
469 return -1;
470 if ((t->action & BLK_TC_ACT(BLK_TC_FS)) == 0)
471 return -1;
472
e7c9f3ff 473 iot = __find_track(t->device, t->sector);
cb2a1a62
JA
474 if (!iot) {
475 fprintf(stderr, "failed to find complete event\n");
2e3e8ded 476 return -1;
cb2a1a62 477 }
2e3e8ded
JA
478
479 iot->completion_time = t->time;
480 elapsed = iot->completion_time - iot->dispatch_time;
481
50adc0ba
JA
482 if (per_process_stats) {
483 struct per_process_info *ppi = find_process_by_pid(iot->pid);
b9d40d6f 484 int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
50adc0ba 485
b9d40d6f
JA
486 if (ppi && elapsed > ppi->longest_completion_wait[w])
487 ppi->longest_completion_wait[w] = elapsed;
50adc0ba
JA
488 }
489
2e3e8ded
JA
490 /*
491 * kill the trace, we don't need it after completion
492 */
493 rb_erase(&iot->rb_node, &rb_track_root);
494 free(iot);
495
496 return elapsed;
497}
498
499
152f6476
JA
500static struct io_stats *find_process_io_stats(__u32 pid, char *name)
501{
502 struct per_process_info *ppi = find_process_by_pid(pid);
503
504 if (!ppi) {
505 ppi = malloc(sizeof(*ppi));
506 memset(ppi, 0, sizeof(*ppi));
507 strncpy(ppi->name, name, sizeof(ppi->name));
508 ppi->pid = pid;
509 add_process_to_hash(ppi);
510 add_process_to_list(ppi);
511 }
512
513 return &ppi->io_stats;
514}
515
e7c9f3ff
NS
516
517static void resize_cpu_info(struct per_dev_info *pdi, int cpu)
a718bd37 518{
e7c9f3ff
NS
519 struct per_cpu_info *cpus = pdi->cpus;
520 int ncpus = pdi->ncpus;
521 int new_count = cpu + 1;
522 int new_space, size;
a718bd37
NS
523 char *new_start;
524
e7c9f3ff
NS
525 size = new_count * sizeof(struct per_cpu_info);
526 cpus = realloc(cpus, size);
527 if (!cpus) {
528 char name[20];
529 fprintf(stderr, "Out of memory, CPU info for device %s (%d)\n",
530 get_dev_name(pdi, name, sizeof(name)), size);
a718bd37
NS
531 exit(1);
532 }
533
e7c9f3ff
NS
534 new_start = (char *)cpus + (ncpus * sizeof(struct per_cpu_info));
535 new_space = (new_count - ncpus) * sizeof(struct per_cpu_info);
a718bd37 536 memset(new_start, 0, new_space);
e7c9f3ff
NS
537
538 pdi->ncpus = new_count;
539 pdi->cpus = cpus;
540}
cb2a1a62 541
e7c9f3ff
NS
542static struct per_cpu_info *get_cpu_info(struct per_dev_info *pdi, int cpu)
543{
cb2a1a62
JA
544 struct per_cpu_info *pci;
545
e7c9f3ff
NS
546 if (cpu >= pdi->ncpus)
547 resize_cpu_info(pdi, cpu);
cb2a1a62
JA
548
549 pci = &pdi->cpus[cpu];
550 pci->cpu = cpu;
551 return pci;
a718bd37
NS
552}
553
e7c9f3ff
NS
554
555static int resize_devices(char *name)
a718bd37 556{
e7c9f3ff 557 int size = (ndevices + 1) * sizeof(struct per_dev_info);
c499bf38 558
e7c9f3ff
NS
559 devices = realloc(devices, size);
560 if (!devices) {
561 fprintf(stderr, "Out of memory, device %s (%d)\n", name, size);
562 return 1;
563 }
564 memset(&devices[ndevices], 0, sizeof(struct per_dev_info));
565 devices[ndevices].name = name;
566 ndevices++;
567 return 0;
568}
a718bd37 569
cb2a1a62 570static struct per_dev_info *get_dev_info(dev_t id)
e7c9f3ff 571{
cb2a1a62 572 struct per_dev_info *pdi;
e7c9f3ff 573 int i;
c499bf38 574
e7c9f3ff
NS
575 for (i = 0; i < ndevices; i++)
576 if (devices[i].id == id)
577 return &devices[i];
cb2a1a62 578
e7c9f3ff
NS
579 if (resize_devices(NULL) != 0)
580 return NULL;
cb2a1a62
JA
581
582 pdi = &devices[ndevices - 1];
583 pdi->id = id;
584 return pdi;
a718bd37
NS
585}
586
e7c9f3ff
NS
587static char *get_dev_name(struct per_dev_info *pdi, char *buffer, int size)
588{
589 if (pdi->name)
590 snprintf(buffer, size, "%s", pdi->name);
591 else
592 snprintf(buffer, size, "%d,%d", MAJOR(pdi->id), MINOR(pdi->id));
593 return buffer;
594}
595
e7c9f3ff 596static void check_time(struct per_dev_info *pdi, struct blk_io_trace *bit)
cfab07eb
AB
597{
598 unsigned long long this = bit->time;
e7c9f3ff 599 unsigned long long last = pdi->last_reported_time;
cfab07eb 600
e7c9f3ff
NS
601 pdi->backwards = (this < last) ? 'B' : ' ';
602 pdi->last_reported_time = this;
cfab07eb
AB
603}
604
152f6476
JA
605static inline void __account_m(struct io_stats *ios, struct blk_io_trace *t,
606 int rw)
d0ca268b
JA
607{
608 if (rw) {
152f6476
JA
609 ios->mwrites++;
610 ios->qwrite_kb += t->bytes >> 10;
d0ca268b 611 } else {
152f6476
JA
612 ios->mreads++;
613 ios->qread_kb += t->bytes >> 10;
614 }
615}
616
617static inline void account_m(struct blk_io_trace *t, struct per_cpu_info *pci,
618 int rw)
619{
620 __account_m(&pci->io_stats, t, rw);
621
622 if (per_process_stats) {
623 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
624
625 __account_m(ios, t, rw);
d0ca268b
JA
626 }
627}
628
152f6476
JA
629static inline void __account_q(struct io_stats *ios, struct blk_io_trace *t,
630 int rw)
d0ca268b
JA
631{
632 if (rw) {
152f6476
JA
633 ios->qwrites++;
634 ios->qwrite_kb += t->bytes >> 10;
d0ca268b 635 } else {
152f6476
JA
636 ios->qreads++;
637 ios->qread_kb += t->bytes >> 10;
638 }
639}
640
641static inline void account_q(struct blk_io_trace *t, struct per_cpu_info *pci,
642 int rw)
643{
644 __account_q(&pci->io_stats, t, rw);
645
646 if (per_process_stats) {
647 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
648
649 __account_q(ios, t, rw);
d0ca268b
JA
650 }
651}
652
152f6476 653static inline void __account_c(struct io_stats *ios, int rw, unsigned int bytes)
d0ca268b
JA
654{
655 if (rw) {
152f6476
JA
656 ios->cwrites++;
657 ios->cwrite_kb += bytes >> 10;
d0ca268b 658 } else {
152f6476
JA
659 ios->creads++;
660 ios->cread_kb += bytes >> 10;
661 }
662}
663
664static inline void account_c(struct blk_io_trace *t, struct per_cpu_info *pci,
665 int rw, int bytes)
666{
667 __account_c(&pci->io_stats, rw, bytes);
668
669 if (per_process_stats) {
670 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
671
672 __account_c(ios, rw, bytes);
d0ca268b
JA
673 }
674}
675
152f6476 676static inline void __account_i(struct io_stats *ios, int rw, unsigned int bytes)
afd2d7ad 677{
678 if (rw) {
152f6476
JA
679 ios->iwrites++;
680 ios->iwrite_kb += bytes >> 10;
afd2d7ad 681 } else {
152f6476
JA
682 ios->ireads++;
683 ios->iread_kb += bytes >> 10;
afd2d7ad 684 }
685}
686
152f6476
JA
687static inline void account_i(struct blk_io_trace *t, struct per_cpu_info *pci,
688 int rw)
d0ca268b 689{
152f6476
JA
690 __account_i(&pci->io_stats, rw, t->bytes);
691
692 if (per_process_stats) {
693 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
d5396421 694
152f6476
JA
695 __account_i(ios, rw, t->bytes);
696 }
697}
698
06639b27
JA
699static inline void __account_unplug(struct io_stats *ios, int timer)
700{
701 if (timer)
702 ios->timer_unplugs++;
703 else
704 ios->io_unplugs++;
705}
706
707static inline void account_unplug(struct blk_io_trace *t,
708 struct per_cpu_info *pci, int timer)
709{
710 __account_unplug(&pci->io_stats, timer);
711
712 if (per_process_stats) {
713 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
714
715 __account_unplug(ios, timer);
716 }
717}
718
ab197ca7
AB
719#define VALID_SPECS "BCDFGMPQRSTU"
720static char *override_format[256];
721static inline int valid_spec(int spec)
152f6476 722{
ab197ca7 723 return strchr(VALID_SPECS, spec) != NULL;
d0ca268b
JA
724}
725
ab197ca7 726static void set_all_format_specs(char *optarg)
d0ca268b 727{
ab197ca7 728 char *p;
d0ca268b 729
ab197ca7
AB
730 for (p = VALID_SPECS; *p; p++)
731 if (override_format[(int)(*p)] == NULL)
732 override_format[(int)(*p)] = strdup(optarg);
733}
d0ca268b 734
ab197ca7
AB
735static int add_format_spec(char *optarg)
736{
737 int spec = optarg[0];
d0ca268b 738
ab197ca7
AB
739 if (!valid_spec(spec)) {
740 fprintf(stderr,"Bad format specifier %c\n", spec);
741 return 1;
742 }
743 if (optarg[1] != ',') {
744 fprintf(stderr,"Bad format specifier - need ',' %s\n", optarg);
745 return 1;
746 }
747 optarg += 2;
748 if (*optarg == '\0') {
749 fprintf(stderr,"Bad format specifier - need fmt %s\n", optarg);
750 return 1;
751 }
752
753 /*
754 * Set both merges (front and back)
755 */
756 if (spec == 'M') {
757 override_format['B'] = strdup(optarg);
758 override_format['M'] = strdup(optarg);
759 } else
760 override_format[spec] = strdup(optarg);
d0ca268b 761
ab197ca7 762 return 0;
d0ca268b
JA
763}
764
ab197ca7
AB
765static void print_field(char *act, struct per_cpu_info *pci,
766 struct blk_io_trace *t, unsigned long long elapsed,
767 int pdu_len, unsigned char *pdu_buf, char field,
768 int minus, int has_w, int width)
d0ca268b 769{
ab197ca7 770 char format[64];
2e3e8ded 771
ab197ca7
AB
772 if (has_w) {
773 if (minus)
774 sprintf(format, "%%-%d", width);
775 else
776 sprintf(format, "%%%d", width);
777 } else
778 sprintf(format, "%%");
779
780 switch (field) {
781 case 'a':
782 fprintf(ofp, strcat(format, "s"), act);
783 break;
784 case 'c':
785 fprintf(ofp, strcat(format, "d"), pci->cpu);
786 break;
787 case 'C':
788 fprintf(ofp, strcat(format, "s"), t->comm);
789 break;
790 case 'd': {
791 char rwbs[4];
792 int i = 0;
793 int w = t->action & BLK_TC_ACT(BLK_TC_WRITE);
794 int b = t->action & BLK_TC_ACT(BLK_TC_BARRIER);
795 int s = t->action & BLK_TC_ACT(BLK_TC_SYNC);
796 if (w)
797 rwbs[i++] = 'W';
798 else
799 rwbs[i++] = 'R';
800 if (b)
801 rwbs[i++] = 'B';
802 if (s)
803 rwbs[i++] = 'S';
804 rwbs[i] = '\0';
805 fprintf(ofp, strcat(format, "s"), rwbs);
806 break;
807 }
808 case 'D': /* format width ignored */
809 fprintf(ofp,"%3d,%-3d", MAJOR(t->device), MINOR(t->device));
810 break;
811 case 'e':
812 fprintf(ofp, strcat(format, "d"), t->error);
813 break;
814 case 'M':
815 fprintf(ofp, strcat(format, "d"), MAJOR(t->device));
816 break;
817 case 'm':
818 fprintf(ofp, strcat(format, "d"), MINOR(t->device));
819 break;
820 case 'n':
821 fprintf(ofp, strcat(format, "u"), t->bytes >> 9);
822 break;
823 case 'p':
824 fprintf(ofp, strcat(format, "u"), t->pid);
825 break;
826 case 'P': /* format width ignored */
827 if ((pdu_len > 0) && (pdu_buf != NULL)) {
828 int i;
829 unsigned char *p = pdu_buf;
830 for (i = 0; i < pdu_len; i++)
831 fprintf(ofp, "%02x ", *p++);
832 }
833 break;
834 case 's':
835 fprintf(ofp, strcat(format, "ld"), t->sequence);
836 break;
837 case 'S':
838 fprintf(ofp, strcat(format, "lu"), t->sector);
839 break;
840 case 't':
841 sprintf(format, "%%0%dlu", has_w ? width : 9);
842 fprintf(ofp, format, NANO_SECONDS(t->time));
843 break;
844 case 'T':
845 fprintf(ofp, strcat(format, "d"), SECONDS(t->time));
846 break;
847 case 'u':
848 if (elapsed == -1ULL) {
849 fprintf(stderr, "Expecting elapsed value\n");
850 exit(1);
851 }
852 fprintf(ofp, strcat(format, "llu"), elapsed / 1000);
853 break;
854 case 'U': {
855 __u64 *depth = (__u64 *) ((char *) t + sizeof(*t));
856 fprintf(ofp, strcat(format, "u"),
857 (unsigned int) be64_to_cpu(*depth));
858 break;
859 }
860 default:
861 fprintf(ofp,strcat(format, "c"), field);
862 break;
863 }
864}
2e3e8ded 865
ab197ca7
AB
866static char *parse_field(char *act, struct per_cpu_info *pci,
867 struct blk_io_trace *t, unsigned long long elapsed,
868 int pdu_len, unsigned char *pdu_buf,
869 char *master_format)
870{
871 int minus = 0;
872 int has_w = 0;
873 int width = 0;
874 char *p = master_format;
875
876 if (*p == '-') {
877 minus = 1;
878 p++;
2e3e8ded 879 }
ab197ca7
AB
880 if (isdigit(*p)) {
881 has_w = 1;
882 do {
883 width = (width * 10) + (*p++ - '0');
884 } while ((*p) && (isdigit(*p)));
885 }
886 if (*p) {
887 print_field(act, pci, t, elapsed, pdu_len, pdu_buf, *p++,
888 minus, has_w, width);
889 }
890 return p;
d0ca268b
JA
891}
892
ab197ca7
AB
893static char *fmt_select(int fmt_spec, struct blk_io_trace *t,
894 unsigned long long elapsed)
d0ca268b 895{
ab197ca7
AB
896 char *fmt;
897 char scratch_format[1024];
2e3e8ded 898
ab197ca7
AB
899 if (override_format[fmt_spec] != NULL)
900 return override_format[fmt_spec];
95c15013 901
ab197ca7
AB
902 switch (fmt_spec) {
903 case 'C': /* Complete */
904 if (t->action & BLK_TC_ACT(BLK_TC_PC)) {
905 strcpy(scratch_format, HEADER);
906 strcat(scratch_format, "%P");
907 } else {
908 strcpy(scratch_format, HEADER "%S + %n ");
909 if (elapsed != -1ULL)
910 strcat(scratch_format, "(%8u) ");
911 }
912 strcat(scratch_format, "[%e]\n");
913 fmt = scratch_format;
914 break;
915
916 case 'D': /* Issue */
917 if (t->action & BLK_TC_ACT(BLK_TC_PC)) {
918 strcpy(scratch_format, HEADER);
919 strcat(scratch_format, "%P");
920 } else {
921 strcpy(scratch_format, HEADER "%S + %n ");
922 if (elapsed != -1ULL)
923 strcat(scratch_format, "(%8u) ");
924 }
925 strcat(scratch_format,"[%C]\n");
926 fmt = scratch_format;
927 break;
928
929 case 'Q': /* Queue */
930 strcpy(scratch_format, HEADER "%S + %n ");
931 if (elapsed != -1ULL)
932 strcat(scratch_format, "(%8u) ");
933 strcat(scratch_format,"[%C]\n");
934 fmt = scratch_format;
935 break;
936
937 case 'B': /* Back merge */
938 case 'F': /* Front merge */
939 case 'M': /* Front or back merge */
940 fmt = HEADER "%S + %n [%C]\n";
941 break;
942
943 case 'P': /* Plug */
944 fmt = HEADER "[%C]\n";
945 break;
946
947 case 'G': /* Get request */
948 case 'S': /* Sleep request */
949 fmt = HEADER "%S + %n [%C]\n";
950 break;
951
952 case 'U': /* Unplug IO */
953 case 'T': /* Unplug timer */
954 fmt = HEADER "[%C] %U\n";
955 break;
956
957 default:
958 fprintf(stderr,"FATAL: Invalid format spec %c\n", fmt_spec);
959 exit(1);
960 /*NOTREACHED*/
95c15013 961 }
ab197ca7
AB
962
963 return fmt;
d0ca268b
JA
964}
965
ab197ca7
AB
966static void process_fmt(char *act, struct per_cpu_info *pci,
967 struct blk_io_trace *t, unsigned long long elapsed,
968 int pdu_len, unsigned char *pdu_buf)
d0ca268b 969{
ab197ca7 970 char *p = fmt_select(act[0], t, elapsed);
2e3e8ded 971
ab197ca7
AB
972 while (*p) {
973 switch (*p) {
974 case '%': /* Field specifier */
975 p++;
976 if (*p == '%')
977 fprintf(ofp, "%c", *p++);
978 else if (!*p)
979 fprintf(ofp, "%c", '%');
980 else
981 p = parse_field(act, pci, t, elapsed,
982 pdu_len, pdu_buf, p);
983 break;
984 case '\\': { /* escape */
985 switch (p[1]) {
986 case 'b': fprintf(ofp, "\b"); break;
987 case 'n': fprintf(ofp, "\n"); break;
988 case 'r': fprintf(ofp, "\r"); break;
989 case 't': fprintf(ofp, "\t"); break;
990 default:
991 fprintf(stderr,
992 "Invalid escape char in format %c\n",
993 p[1]);
994 exit(1);
995 /*NOTREACHED*/
996 }
997 p += 2;
998 break;
999 }
1000 default:
1001 fprintf(ofp, "%c", *p++);
1002 break;
1003 }
2e3e8ded 1004 }
ab197ca7
AB
1005}
1006
1007static void log_complete(struct per_cpu_info *pci, struct blk_io_trace *t,
1008 char *act)
1009{
1010 process_fmt(act, pci, t, log_track_complete(t), 0, NULL);
1011}
1012
1013static void log_queue(struct per_cpu_info *pci, struct blk_io_trace *t,
1014 char *act)
1015{
1016 process_fmt(act, pci, t, log_track_queue(t), 0, NULL);
1017}
2e3e8ded 1018
ab197ca7
AB
1019static void log_issue(struct per_cpu_info *pci, struct blk_io_trace *t,
1020 char *act)
1021{
1022 process_fmt(act, pci, t, log_track_issue(t), 0, NULL);
d0ca268b
JA
1023}
1024
d5396421 1025static void log_merge(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 1026 char *act)
d0ca268b 1027{
a01516de
JA
1028 if (act[0] == 'F')
1029 log_track_frontmerge(t);
2e3e8ded 1030
ab197ca7 1031 process_fmt(act, pci, t, -1ULL, 0, NULL);
d0ca268b
JA
1032}
1033
dfe34da1 1034static void log_action(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 1035 char *act)
dfe34da1 1036{
ab197ca7 1037 process_fmt(act, pci, t, -1ULL, 0, NULL);
dfe34da1
JA
1038}
1039
d5396421 1040static void log_generic(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 1041 char *act)
d0ca268b 1042{
ab197ca7 1043 process_fmt(act, pci, t, -1ULL, 0, NULL);
d0ca268b
JA
1044}
1045
ab197ca7 1046static void log_unplug(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 1047 char *act)
67e14fdc 1048{
ab197ca7 1049 process_fmt(act, pci, t, -1ULL, 0, NULL);
67e14fdc
JA
1050}
1051
ab197ca7 1052static void log_pc(struct per_cpu_info *pci, struct blk_io_trace *t, char *act)
d0ca268b 1053{
ab197ca7 1054 unsigned char *buf = (unsigned char *) t + sizeof(*t);
d0ca268b 1055
ab197ca7 1056 process_fmt(act, pci, t, -1ULL, t->pdu_len, buf);
d0ca268b
JA
1057}
1058
d5396421 1059static int dump_trace_pc(struct blk_io_trace *t, struct per_cpu_info *pci)
d0ca268b 1060{
87b72777
JA
1061 int ret = 0;
1062
d0ca268b
JA
1063 switch (t->action & 0xffff) {
1064 case __BLK_TA_QUEUE:
3639a11e 1065 log_generic(pci, t, "Q");
d0ca268b
JA
1066 break;
1067 case __BLK_TA_GETRQ:
3639a11e 1068 log_generic(pci, t, "G");
d0ca268b
JA
1069 break;
1070 case __BLK_TA_SLEEPRQ:
3639a11e 1071 log_generic(pci, t, "S");
d0ca268b
JA
1072 break;
1073 case __BLK_TA_REQUEUE:
3639a11e 1074 log_generic(pci, t, "R");
d0ca268b
JA
1075 break;
1076 case __BLK_TA_ISSUE:
ab197ca7 1077 log_pc(pci, t, "D");
d0ca268b
JA
1078 break;
1079 case __BLK_TA_COMPLETE:
3639a11e 1080 log_pc(pci, t, "C");
d0ca268b
JA
1081 break;
1082 default:
1083 fprintf(stderr, "Bad pc action %x\n", t->action);
87b72777
JA
1084 ret = 1;
1085 break;
d0ca268b
JA
1086 }
1087
ab197ca7 1088 return 0;
d0ca268b
JA
1089}
1090
d5396421 1091static void dump_trace_fs(struct blk_io_trace *t, struct per_cpu_info *pci)
d0ca268b
JA
1092{
1093 int w = t->action & BLK_TC_ACT(BLK_TC_WRITE);
7997c5b0 1094 int act = t->action & 0xffff;
d0ca268b 1095
7997c5b0 1096 switch (act) {
d0ca268b 1097 case __BLK_TA_QUEUE:
152f6476 1098 account_q(t, pci, w);
3639a11e 1099 log_queue(pci, t, "Q");
d0ca268b
JA
1100 break;
1101 case __BLK_TA_BACKMERGE:
152f6476 1102 account_m(t, pci, w);
3639a11e 1103 log_merge(pci, t, "M");
d0ca268b
JA
1104 break;
1105 case __BLK_TA_FRONTMERGE:
152f6476 1106 account_m(t, pci, w);
3639a11e 1107 log_merge(pci, t, "F");
d0ca268b
JA
1108 break;
1109 case __BLK_TA_GETRQ:
95c15013 1110 log_track_getrq(t);
3639a11e 1111 log_generic(pci, t, "G");
d0ca268b
JA
1112 break;
1113 case __BLK_TA_SLEEPRQ:
3639a11e 1114 log_generic(pci, t, "S");
d0ca268b
JA
1115 break;
1116 case __BLK_TA_REQUEUE:
152f6476 1117 account_c(t, pci, w, -t->bytes);
3639a11e 1118 log_queue(pci, t, "R");
d0ca268b
JA
1119 break;
1120 case __BLK_TA_ISSUE:
152f6476 1121 account_i(t, pci, w);
3639a11e 1122 log_issue(pci, t, "D");
d0ca268b
JA
1123 break;
1124 case __BLK_TA_COMPLETE:
152f6476 1125 account_c(t, pci, w, t->bytes);
3639a11e 1126 log_complete(pci, t, "C");
d0ca268b 1127 break;
88b1a526 1128 case __BLK_TA_PLUG:
3639a11e 1129 log_action(pci, t, "P");
88b1a526 1130 break;
3639a11e 1131 case __BLK_TA_UNPLUG_IO:
06639b27 1132 account_unplug(t, pci, 0);
3639a11e
JA
1133 log_unplug(pci, t, "U");
1134 break;
1135 case __BLK_TA_UNPLUG_TIMER:
06639b27 1136 account_unplug(t, pci, 1);
3639a11e 1137 log_unplug(pci, t, "UT");
88b1a526 1138 break;
d0ca268b
JA
1139 default:
1140 fprintf(stderr, "Bad fs action %x\n", t->action);
1f79c4a0 1141 break;
d0ca268b 1142 }
d0ca268b
JA
1143}
1144
e7c9f3ff
NS
1145static int dump_trace(struct blk_io_trace *t, struct per_cpu_info *pci,
1146 struct per_dev_info *pdi)
d0ca268b 1147{
87b72777
JA
1148 int ret = 0;
1149
d0ca268b 1150 if (t->action & BLK_TC_ACT(BLK_TC_PC))
d5396421 1151 ret = dump_trace_pc(t, pci);
d0ca268b 1152 else
d5396421 1153 dump_trace_fs(t, pci);
87b72777 1154
e7c9f3ff 1155 pdi->events++;
87b72777 1156 return ret;
d0ca268b
JA
1157}
1158
152f6476 1159static void dump_io_stats(struct io_stats *ios, char *msg)
5c017e4b 1160{
152f6476
JA
1161 fprintf(ofp, "%s\n", msg);
1162
1163 fprintf(ofp, " Reads Queued: %'8lu, %'8LuKiB\t", ios->qreads, ios->qread_kb);
1164 fprintf(ofp, " Writes Queued: %'8lu, %'8LuKiB\n", ios->qwrites,ios->qwrite_kb);
0a6b8fc4 1165
152f6476
JA
1166 fprintf(ofp, " Read Dispatches: %'8lu, %'8LuKiB\t", ios->ireads, ios->iread_kb);
1167 fprintf(ofp, " Write Dispatches: %'8lu, %'8LuKiB\n", ios->iwrites,ios->iwrite_kb);
1168 fprintf(ofp, " Reads Completed: %'8lu, %'8LuKiB\t", ios->creads, ios->cread_kb);
1169 fprintf(ofp, " Writes Completed: %'8lu, %'8LuKiB\n", ios->cwrites,ios->cwrite_kb);
1170 fprintf(ofp, " Read Merges: %'8lu%8c\t", ios->mreads, ' ');
152f6476 1171 fprintf(ofp, " Write Merges: %'8lu\n", ios->mwrites);
06639b27
JA
1172 fprintf(ofp, " IO unplugs: %'8lu%8c\t", ios->io_unplugs, ' ');
1173 fprintf(ofp, " Timer unplugs: %'8lu\n", ios->timer_unplugs);
5c017e4b
JA
1174}
1175
50adc0ba
JA
1176static void dump_wait_stats(struct per_process_info *ppi)
1177{
b9d40d6f
JA
1178 unsigned long rawait = ppi->longest_allocation_wait[0] / 1000;
1179 unsigned long rdwait = ppi->longest_dispatch_wait[0] / 1000;
1180 unsigned long rcwait = ppi->longest_completion_wait[0] / 1000;
1181 unsigned long wawait = ppi->longest_allocation_wait[1] / 1000;
1182 unsigned long wdwait = ppi->longest_dispatch_wait[1] / 1000;
1183 unsigned long wcwait = ppi->longest_completion_wait[1] / 1000;
1184
1185 fprintf(ofp, " Allocation wait: %'8lu%8c\t", rawait, ' ');
1186 fprintf(ofp, " Allocation wait: %'8lu\n", wawait);
1187 fprintf(ofp, " Dispatch wait: %'8lu%8c\t", rdwait, ' ');
1188 fprintf(ofp, " Dispatch wait: %'8lu\n", wdwait);
1189 fprintf(ofp, " Completion wait: %'8lu%8c\t", rcwait, ' ');
1190 fprintf(ofp, " Completion wait: %'8lu\n", wcwait);
50adc0ba
JA
1191}
1192
886ecf0e
JA
1193static int ppi_name_compare(const void *p1, const void *p2)
1194{
1195 struct per_process_info *ppi1 = *((struct per_process_info **) p1);
1196 struct per_process_info *ppi2 = *((struct per_process_info **) p2);
1197 int res;
1198
1199 res = strverscmp(ppi1->name, ppi2->name);
1200 if (!res)
1201 res = ppi1->pid < ppi2->pid;
1202
1203 return res;
1204}
1205
1206static void sort_process_list(void)
1207{
1208 struct per_process_info **ppis;
1209 struct per_process_info *ppi;
1210 int i = 0;
1211
1212 ppis = malloc(ppi_list_entries * sizeof(struct per_process_info *));
1213
1214 ppi = ppi_list;
1215 while (ppi) {
1216 ppis[i] = ppi;
1217 ppi = ppi->list_next;
1218 }
1219
1220 qsort(ppis, ppi_list_entries, sizeof(*ppi), ppi_name_compare);
1221
1222 i = ppi_list_entries - 1;
1223 ppi_list = NULL;
1224 while (i >= 0) {
1225 ppi = ppis[i];
1226
1227 ppi->list_next = ppi_list;
1228 ppi_list = ppi;
1229 i--;
1230 }
1231}
1232
152f6476
JA
1233static void show_process_stats(void)
1234{
1235 struct per_process_info *ppi;
1236
886ecf0e
JA
1237 sort_process_list();
1238
152f6476
JA
1239 ppi = ppi_list;
1240 while (ppi) {
1241 dump_io_stats(&ppi->io_stats, ppi->name);
50adc0ba 1242 dump_wait_stats(ppi);
152f6476
JA
1243 ppi = ppi->list_next;
1244 }
1245
1246 fprintf(ofp, "\n");
1247}
1248
e7c9f3ff 1249static void show_device_and_cpu_stats(void)
d0ca268b 1250{
e7c9f3ff
NS
1251 struct per_dev_info *pdi;
1252 struct per_cpu_info *pci;
1253 struct io_stats total, *ios;
1254 int i, j, pci_events;
1255 char line[3 + 8/*cpu*/ + 2 + 32/*dev*/ + 3];
1256 char name[32];
1257
1258 for (pdi = devices, i = 0; i < ndevices; i++, pdi++) {
1259
1260 memset(&total, 0, sizeof(total));
1261 pci_events = 0;
1262
1263 if (i > 0)
1264 fprintf(ofp, "\n");
1265
1266 for (pci = pdi->cpus, j = 0; j < pdi->ncpus; j++, pci++) {
1267 if (!pci->nelems)
1268 continue;
1269
1270 ios = &pci->io_stats;
1271 total.qreads += ios->qreads;
1272 total.qwrites += ios->qwrites;
1273 total.creads += ios->creads;
1274 total.cwrites += ios->cwrites;
1275 total.mreads += ios->mreads;
1276 total.mwrites += ios->mwrites;
1277 total.ireads += ios->ireads;
1278 total.iwrites += ios->iwrites;
1279 total.qread_kb += ios->qread_kb;
1280 total.qwrite_kb += ios->qwrite_kb;
1281 total.cread_kb += ios->cread_kb;
1282 total.cwrite_kb += ios->cwrite_kb;
1283 total.iread_kb += ios->iread_kb;
1284 total.iwrite_kb += ios->iwrite_kb;
06639b27
JA
1285 total.timer_unplugs += ios->timer_unplugs;
1286 total.io_unplugs += ios->io_unplugs;
e7c9f3ff
NS
1287
1288 snprintf(line, sizeof(line) - 1, "CPU%d (%s):",
1289 j, get_dev_name(pdi, name, sizeof(name)));
1290 dump_io_stats(ios, line);
1291 pci_events++;
1292 }
5c017e4b 1293
e7c9f3ff
NS
1294 if (pci_events > 1) {
1295 fprintf(ofp, "\n");
1296 snprintf(line, sizeof(line) - 1, "Total (%s):",
1297 get_dev_name(pdi, name, sizeof(name)));
1298 dump_io_stats(&total, line);
1299 }
d0ca268b 1300
0aa8a62d
JA
1301 fprintf(ofp, "\nEvents (%s): %'Lu entries, %'lu skips\n",
1302 get_dev_name(pdi, line, sizeof(line)), pdi->events,
1303 pdi->skips);
e7c9f3ff 1304 }
d0ca268b
JA
1305}
1306
cb2a1a62 1307static struct blk_io_trace *find_trace(void *p, unsigned long offset)
2ff323b0 1308{
cb2a1a62 1309 unsigned long max_offset = offset;
2ff323b0
JA
1310 unsigned long off;
1311 struct blk_io_trace *bit;
1312 __u32 magic;
1313
1314 for (off = 0; off < max_offset; off++) {
1315 bit = p + off;
1316
1317 magic = be32_to_cpu(bit->magic);
1318 if ((magic & 0xffffff00) == BLK_IO_TRACE_MAGIC)
1319 return bit;
1320 }
1321
1322 return NULL;
1323}
1324
cb2a1a62 1325static int sort_entries(void)
8fc0abbc 1326{
e7c9f3ff 1327 struct per_dev_info *pdi;
412819ce 1328 struct per_cpu_info *pci;
8fc0abbc
JA
1329 struct blk_io_trace *bit;
1330 struct trace *t;
cb2a1a62 1331 int nr = 0;
8fc0abbc 1332
cb2a1a62 1333 while ((t = trace_list) != NULL) {
412819ce 1334
cb2a1a62
JA
1335 trace_list = t->next;
1336 bit = t->bit;
6fe4709e 1337
8fc0abbc
JA
1338 memset(&t->rb_node, 0, sizeof(t->rb_node));
1339
cb2a1a62 1340 if (verify_trace(bit))
66fa7233 1341 break;
cb2a1a62
JA
1342 if (trace_rb_insert(t))
1343 return -1;
66fa7233 1344
cb2a1a62
JA
1345 pdi = get_dev_info(bit->device);
1346 pci = get_cpu_info(pdi, bit->cpu);
412819ce
JA
1347 pci->nelems++;
1348
cb2a1a62 1349 nr++;
6fe4709e 1350 }
8fc0abbc 1351
cb2a1a62 1352 return nr;
412819ce
JA
1353}
1354
f1327cc5 1355static void show_entries_rb(int piped)
8fc0abbc 1356{
e7c9f3ff 1357 struct per_dev_info *pdi;
8fc0abbc 1358 struct blk_io_trace *bit;
3aabcd89 1359 struct rb_node *n;
8fc0abbc
JA
1360 struct trace *t;
1361 int cpu;
1362
cb2a1a62 1363 while ((n = rb_first(&rb_sort_root)) != NULL) {
8fc0abbc
JA
1364
1365 t = rb_entry(n, struct trace, rb_node);
1366 bit = t->bit;
1367
cb2a1a62 1368 pdi = get_dev_info(bit->device);
e7c9f3ff
NS
1369 if (!pdi) {
1370 fprintf(stderr, "Unknown device ID? (%d,%d)\n",
1371 MAJOR(bit->device), MINOR(bit->device));
1372 break;
1373 }
d5396421 1374 cpu = bit->cpu;
e7c9f3ff
NS
1375 if (cpu > pdi->ncpus) {
1376 fprintf(stderr, "Unknown CPU ID? (%d, device %d,%d)\n",
1377 cpu, MAJOR(bit->device), MINOR(bit->device));
87b72777 1378 break;
8fc0abbc
JA
1379 }
1380
cb2a1a62
JA
1381 /*
1382 * back off displaying more info if we are out of sync
1383 * on SMP systems. to prevent stalling on lost events,
0aa8a62d 1384 * only allow an event to us a few times
cb2a1a62 1385 */
a3983763
JA
1386 if (bit->sequence != (pdi->last_sequence + 1)) {
1387 if (piped && t->skipped < 5) {
0aa8a62d 1388 t->skipped++;
cb2a1a62 1389 break;
0aa8a62d
JA
1390 } else {
1391 fprintf(stderr, "skipping from %lu to %u\n", pdi->last_sequence, bit->sequence);
1392 pdi->skips++;
cb2a1a62
JA
1393 }
1394 }
1395
1396 pdi->last_sequence = bit->sequence;
1397
cfab07eb 1398 bit->time -= genesis_time;
46e6968b
NS
1399 if (bit->time < stopwatch_start)
1400 continue;
1401 if (bit->time >= stopwatch_end)
1402 break;
8fc0abbc 1403
e7c9f3ff 1404 check_time(pdi, bit);
8fc0abbc 1405
e7c9f3ff 1406 if (dump_trace(bit, &pdi->cpus[cpu], pdi))
87b72777
JA
1407 break;
1408
cb2a1a62 1409 rb_erase(&t->rb_node, &rb_sort_root);
e8741a4a 1410
f1327cc5 1411 if (piped) {
e8741a4a
JA
1412 free(bit);
1413 free(t);
1414 }
cb2a1a62 1415 }
8fc0abbc
JA
1416}
1417
1f79c4a0
JA
1418static int read_data(int fd, void *buffer, int bytes, int block)
1419{
1420 int ret, bytes_left, fl;
1421 void *p;
1422
1423 fl = fcntl(fd, F_GETFL);
1424
1425 if (!block)
1426 fcntl(fd, F_SETFL, fl | O_NONBLOCK);
1427 else
1428 fcntl(fd, F_SETFL, fl & ~O_NONBLOCK);
1429
1430 bytes_left = bytes;
1431 p = buffer;
1432 while (bytes_left > 0) {
1433 ret = read(fd, p, bytes_left);
1434 if (!ret)
1435 return 1;
1436 else if (ret < 0) {
1437 if (errno != EAGAIN)
1438 perror("read");
1439 return -1;
1440 } else {
1441 p += ret;
1442 bytes_left -= ret;
1443 }
1444 }
1445
1446 return 0;
1447}
1448
cb2a1a62
JA
1449/*
1450 * Find the traces in 'tb' and add them to the list for sorting and
1451 * displaying
1452 */
1453static int find_entries(void *tb, unsigned long size)
1454{
1455 struct blk_io_trace *bit;
1456 struct trace *t;
1457 void *start = tb;
1458
1459 while (tb - start <= size - sizeof(*bit)) {
1460 bit = find_trace(tb, size - (tb - start));
1461 if (!bit)
1462 break;
1463
1464 t = malloc(sizeof(*t));
1465 memset(t, 0, sizeof(*t));
1466 t->bit = bit;
1467
f1327cc5 1468 trace_to_cpu(bit);
cb2a1a62
JA
1469 t->next = trace_list;
1470 trace_list = t;
1471
1472 tb += sizeof(*bit) + bit->pdu_len;
1473 }
1474
1475 return 0;
1476}
1477
d5396421 1478static int do_file(void)
d0ca268b 1479{
cb2a1a62 1480 int i, j, nfiles = 0, nelems;
d0ca268b 1481
e8741a4a 1482 for (i = 0; i < ndevices; i++) {
e7c9f3ff 1483 for (j = 0;; j++, nfiles++) {
e8741a4a 1484 struct per_dev_info *pdi;
e7c9f3ff
NS
1485 struct per_cpu_info *pci;
1486 struct stat st;
1487 void *tb;
87b72777 1488
e8741a4a 1489 pdi = &devices[i];
e7c9f3ff
NS
1490 pci = get_cpu_info(pdi, j);
1491 pci->cpu = j;
d0ca268b 1492
e7c9f3ff 1493 snprintf(pci->fname, sizeof(pci->fname)-1,
428683db 1494 "%s.%d", pdi->name, j);
e7c9f3ff
NS
1495 if (stat(pci->fname, &st) < 0)
1496 break;
1497 if (!st.st_size)
1498 continue;
1499
1500 printf("Processing %s\n", pci->fname);
1501
1502 tb = malloc(st.st_size);
1503 if (!tb) {
1504 fprintf(stderr, "Out of memory, skip file %s\n",
1505 pci->fname);
1506 continue;
1507 }
1508
1509 pci->fd = open(pci->fname, O_RDONLY);
1510 if (pci->fd < 0) {
1511 perror(pci->fname);
1512 free(tb);
1513 continue;
1514 }
1515
1516 if (read_data(pci->fd, tb, st.st_size, 1)) {
1517 close(pci->fd);
1518 free(tb);
1519 continue;
1520 }
1521
cb2a1a62
JA
1522 if (find_entries(tb, st.st_size)) {
1523 close(pci->fd);
1524 free(tb);
1525 }
1526
1527 nelems = sort_entries();
1528 if (nelems == -1) {
e7c9f3ff
NS
1529 close(pci->fd);
1530 free(tb);
1531 continue;
1532 }
1533
1534 printf("Completed %s (CPU%d %d, entries)\n",
cb2a1a62 1535 pci->fname, j, nelems);
e7c9f3ff 1536 close(pci->fd);
d0ca268b 1537 }
d5396421
JA
1538 }
1539
1540 if (!nfiles) {
1541 fprintf(stderr, "No files found\n");
1542 return 1;
1543 }
1544
e8741a4a 1545 show_entries_rb(0);
d5396421
JA
1546 return 0;
1547}
1548
cb2a1a62 1549static int read_sort_events(int fd)
d5396421 1550{
cb2a1a62 1551 int events = 0;
d5396421 1552
412819ce 1553 do {
cb2a1a62
JA
1554 struct blk_io_trace *bit;
1555 struct trace *t;
412819ce 1556 int pdu_len;
51128a28 1557 __u32 magic;
d5396421 1558
cb2a1a62 1559 bit = malloc(sizeof(*bit));
d5396421 1560
cb2a1a62 1561 if (read_data(fd, bit, sizeof(*bit), !events))
c80c18a7 1562 break;
d5396421 1563
cb2a1a62 1564 magic = be32_to_cpu(bit->magic);
51128a28
JA
1565 if ((magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
1566 fprintf(stderr, "Bad magic %x\n", magic);
1567 break;
1568 }
1569
cb2a1a62 1570 pdu_len = be16_to_cpu(bit->pdu_len);
2ff323b0 1571 if (pdu_len) {
cb2a1a62 1572 void *ptr = realloc(bit, sizeof(*bit) + pdu_len);
d5396421 1573
cb2a1a62 1574 if (read_data(fd, ptr + sizeof(*bit), pdu_len, 1))
2ff323b0 1575 break;
d5396421 1576
cb2a1a62 1577 bit = ptr;
2ff323b0 1578 }
d5396421 1579
cb2a1a62
JA
1580 t = malloc(sizeof(*t));
1581 memset(t, 0, sizeof(*t));
1582 t->bit = bit;
f1327cc5
JA
1583
1584 trace_to_cpu(bit);
cb2a1a62
JA
1585 t->next = trace_list;
1586 trace_list = t;
1587
412819ce 1588 events++;
79f19470 1589 } while (!is_done() && events < rb_batch);
d5396421 1590
412819ce
JA
1591 return events;
1592}
d5396421 1593
412819ce
JA
1594static int do_stdin(void)
1595{
1596 int fd;
d5396421 1597
1f79c4a0 1598 fd = dup(STDIN_FILENO);
412819ce
JA
1599 do {
1600 int events;
d5396421 1601
cb2a1a62 1602 events = read_sort_events(fd);
412819ce
JA
1603 if (!events)
1604 break;
1605
cb2a1a62 1606 if (sort_entries() == -1)
2ff323b0
JA
1607 break;
1608
e8741a4a 1609 show_entries_rb(1);
d5396421
JA
1610 } while (1);
1611
1612 close(fd);
d5396421
JA
1613 return 0;
1614}
d0ca268b 1615
1f79c4a0 1616static void flush_output(void)
412819ce 1617{
152f6476 1618 fflush(ofp);
412819ce
JA
1619}
1620
1f79c4a0 1621static void handle_sigint(int sig)
412819ce
JA
1622{
1623 done = 1;
1624 flush_output();
1625}
1626
46e6968b
NS
1627/*
1628 * Extract start and duration times from a string, allowing
1629 * us to specify a time interval of interest within a trace.
1630 * Format: "duration" (start is zero) or "start:duration".
1631 */
1632static int find_stopwatch_interval(char *string)
1633{
1634 double value;
1635 char *sp;
1636
1637 value = strtod(string, &sp);
1638 if (sp == string) {
1639 fprintf(stderr,"Invalid stopwatch timer: %s\n", string);
1640 return 1;
1641 }
1642 if (*sp == ':') {
1643 stopwatch_start = DOUBLE_TO_NANO_ULL(value);
1644 string = sp + 1;
1645 value = strtod(string, &sp);
1646 if (sp == string || *sp != '\0') {
1647 fprintf(stderr,"Invalid stopwatch duration time: %s\n",
1648 string);
1649 return 1;
1650 }
1651 } else if (*sp != '\0') {
1652 fprintf(stderr,"Invalid stopwatch start timer: %s\n", string);
1653 return 1;
1654 }
1655 stopwatch_end = stopwatch_start + DOUBLE_TO_NANO_ULL(value);
1656 return 0;
1657}
1658
1f79c4a0
JA
1659static void usage(char *prog)
1660{
46e6968b
NS
1661 fprintf(stderr, "Usage: %s "
1662 "[-i <name>] [-o <output>] [-s] [-w N[:n]] <name>...\n",
1663 prog);
1f79c4a0
JA
1664}
1665
d5396421
JA
1666int main(int argc, char *argv[])
1667{
152f6476 1668 char *ofp_buffer;
a66877e6 1669 int c, ret, mode;
1e1c60f1 1670 int per_device_and_cpu_stats = 1;
d5396421
JA
1671
1672 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) != -1) {
1673 switch (c) {
1674 case 'i':
e7c9f3ff
NS
1675 if (!strcmp(optarg, "-") && !pipeline)
1676 pipeline = 1;
1677 else if (resize_devices(optarg) != 0)
1678 return 1;
d5396421
JA
1679 break;
1680 case 'o':
66efebf8 1681 output_name = optarg;
d5396421 1682 break;
79f19470
JA
1683 case 'b':
1684 rb_batch = atoi(optarg);
1685 if (rb_batch <= 0)
1686 rb_batch = RB_BATCH_DEFAULT;
1687 break;
152f6476
JA
1688 case 's':
1689 per_process_stats = 1;
1690 break;
7997c5b0
JA
1691 case 't':
1692 track_ios = 1;
1693 break;
1e1c60f1
NS
1694 case 'q':
1695 per_device_and_cpu_stats = 0;
1696 break;
46e6968b
NS
1697 case 'w':
1698 if (find_stopwatch_interval(optarg) != 0)
1699 return 1;
1700 break;
ab197ca7
AB
1701 case 'f':
1702 set_all_format_specs(optarg);
1703 break;
1704 case 'F':
1705 if (add_format_spec(optarg) != 0)
1706 return 1;
1707 break;
d5396421 1708 default:
1f79c4a0 1709 usage(argv[0]);
d5396421
JA
1710 return 1;
1711 }
d0ca268b
JA
1712 }
1713
e7c9f3ff
NS
1714 while (optind < argc) {
1715 if (!strcmp(argv[optind], "-") && !pipeline)
1716 pipeline = 1;
1717 else if (resize_devices(argv[optind]) != 0)
1718 return 1;
1719 optind++;
1720 }
1721
1722 if (!pipeline && !ndevices) {
1f79c4a0 1723 usage(argv[0]);
d5396421
JA
1724 return 1;
1725 }
1726
7997c5b0
JA
1727 memset(&rb_sort_root, 0, sizeof(rb_sort_root));
1728 memset(&rb_track_root, 0, sizeof(rb_track_root));
412819ce
JA
1729
1730 signal(SIGINT, handle_sigint);
1731 signal(SIGHUP, handle_sigint);
1732 signal(SIGTERM, handle_sigint);
d5396421 1733
d69db225
JA
1734 setlocale(LC_NUMERIC, "en_US");
1735
a66877e6 1736 if (!output_name) {
152f6476 1737 ofp = fdopen(STDOUT_FILENO, "w");
a66877e6
JA
1738 mode = _IOLBF;
1739 } else {
152f6476
JA
1740 char ofname[128];
1741
1742 snprintf(ofname, sizeof(ofname) - 1, "%s.log", output_name);
1743 ofp = fopen(ofname, "w");
a66877e6 1744 mode = _IOFBF;
152f6476
JA
1745 }
1746
1747 if (!ofp) {
1748 perror("fopen");
1749 return 1;
1750 }
1751
1752 ofp_buffer = malloc(4096);
a66877e6 1753 if (setvbuf(ofp, ofp_buffer, mode, 4096)) {
152f6476
JA
1754 perror("setvbuf");
1755 return 1;
1756 }
1757
e7c9f3ff 1758 if (pipeline)
d5396421
JA
1759 ret = do_stdin();
1760 else
1761 ret = do_file();
1762
152f6476
JA
1763 if (per_process_stats)
1764 show_process_stats();
1765
1e1c60f1
NS
1766 if (per_device_and_cpu_stats)
1767 show_device_and_cpu_stats();
152f6476 1768
412819ce 1769 flush_output();
d5396421 1770 return ret;
d0ca268b 1771}