[PATCH] blkparse: actually include blkparse diff
[blktrace.git] / blkparse.c
CommitLineData
d956a2cd
JA
1/*
2 * block queue tracing parse application
3 *
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
20 */
d0ca268b
JA
21#include <sys/types.h>
22#include <sys/stat.h>
23#include <unistd.h>
24#include <stdio.h>
25#include <fcntl.h>
26#include <stdlib.h>
8fc0abbc 27#include <string.h>
d5396421 28#include <getopt.h>
412819ce
JA
29#include <errno.h>
30#include <signal.h>
d69db225 31#include <locale.h>
46e6968b 32#include <limits.h>
d0ca268b 33
8fc0abbc
JA
34#include "blktrace.h"
35#include "rbtree.h"
d0ca268b 36
2e3e8ded
JA
37#define SECONDS(x) ((unsigned long long)(x) / 1000000000)
38#define NANO_SECONDS(x) ((unsigned long long)(x) % 1000000000)
46e6968b 39#define DOUBLE_TO_NANO_ULL(d) ((unsigned long long)((d) * 1000000000))
cfab07eb 40
e7c9f3ff
NS
41#define MINORBITS 20
42#define MINORMASK ((1U << MINORBITS) - 1)
43#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS))
44#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK))
45
46#define min(a, b) ((a) < (b) ? (a) : (b))
d5396421 47
152f6476
JA
48struct io_stats {
49 unsigned long qreads, qwrites, creads, cwrites, mreads, mwrites;
50 unsigned long ireads, iwrites;
51 unsigned long long qread_kb, qwrite_kb, cread_kb, cwrite_kb;
52 unsigned long long iread_kb, iwrite_kb;
06639b27 53 unsigned long io_unplugs, timer_unplugs;
152f6476
JA
54};
55
d5396421 56struct per_cpu_info {
d0ca268b
JA
57 int cpu;
58 int nelems;
d0ca268b
JA
59
60 int fd;
87b72777 61 char fname[128];
d0ca268b 62
152f6476
JA
63 struct io_stats io_stats;
64};
8fc0abbc 65
e7c9f3ff
NS
66struct per_dev_info {
67 dev_t id;
68 char *name;
69
70 int backwards;
71 unsigned long long events;
72 unsigned long long last_reported_time;
73 struct io_stats io_stats;
cb2a1a62 74 unsigned long last_sequence;
e7c9f3ff
NS
75
76 int ncpus;
77 struct per_cpu_info *cpus;
78};
79
152f6476
JA
80struct per_process_info {
81 char name[16];
82 __u32 pid;
83 struct io_stats io_stats;
84 struct per_process_info *hash_next, *list_next;
50adc0ba
JA
85
86 /*
87 * individual io stats
88 */
b9d40d6f
JA
89 unsigned long long longest_allocation_wait[2];
90 unsigned long long longest_dispatch_wait[2];
91 unsigned long long longest_completion_wait[2];
d0ca268b
JA
92};
93
152f6476
JA
94#define PPI_HASH_SHIFT (8)
95static struct per_process_info *ppi_hash[1 << PPI_HASH_SHIFT];
96static struct per_process_info *ppi_list;
97
46e6968b 98#define S_OPTS "i:o:b:stqw:"
d5396421
JA
99static struct option l_opts[] = {
100 {
101 .name = "input",
102 .has_arg = 1,
103 .flag = NULL,
104 .val = 'i'
105 },
106 {
107 .name = "output",
108 .has_arg = 1,
109 .flag = NULL,
110 .val = 'o'
111 },
79f19470
JA
112 {
113 .name = "batch",
114 .has_arg = 1,
115 .flag = NULL,
116 .val = 'b'
117 },
152f6476
JA
118 {
119 .name = "per program stats",
120 .has_arg = 0,
121 .flag = NULL,
122 .val = 's'
123 },
7997c5b0
JA
124 {
125 .name = "track ios",
126 .has_arg = 0,
127 .flag = NULL,
128 .val = 't'
129 },
1e1c60f1
NS
130 {
131 .name = "quiet",
132 .has_arg = 0,
133 .flag = NULL,
134 .val = 'q'
135 },
46e6968b
NS
136 {
137 .name = "stopwatch",
138 .has_arg = 1,
139 .flag = NULL,
140 .val = 'w'
141 },
d5396421
JA
142 {
143 .name = NULL,
144 .has_arg = 0,
145 .flag = NULL,
146 .val = 0
147 }
148};
149
7997c5b0
JA
150/*
151 * for sorting the displayed output
152 */
8fc0abbc
JA
153struct trace {
154 struct blk_io_trace *bit;
155 struct rb_node rb_node;
cb2a1a62
JA
156 struct trace *next;
157 int skipped;
8fc0abbc
JA
158};
159
cb2a1a62
JA
160static struct rb_root rb_sort_root;
161static struct rb_root rb_track_root;
162
163static struct trace *trace_list;
164
7997c5b0
JA
165/*
166 * for tracking individual ios
167 */
168struct io_track {
169 struct rb_node rb_node;
170
e7c9f3ff 171 dev_t device;
7997c5b0
JA
172 __u64 sector;
173 __u32 pid;
95c15013 174 unsigned long long allocation_time;
7997c5b0
JA
175 unsigned long long queue_time;
176 unsigned long long dispatch_time;
177 unsigned long long completion_time;
178};
179
e7c9f3ff
NS
180static int ndevices;
181static struct per_dev_info *devices;
182static char *get_dev_name(struct per_dev_info *, char *, int);
d0ca268b 183
152f6476 184static FILE *ofp;
e7c9f3ff
NS
185static char *output_name;
186
187static unsigned long long genesis_time;
46e6968b
NS
188static unsigned long long stopwatch_start; /* start from zero by default */
189static unsigned long long stopwatch_end = ULONG_LONG_MAX; /* "infinity" */
152f6476
JA
190
191static int per_process_stats;
7997c5b0 192static int track_ios;
d0ca268b 193
79f19470
JA
194#define RB_BATCH_DEFAULT (1024)
195static int rb_batch = RB_BATCH_DEFAULT;
196
e7c9f3ff
NS
197static int pipeline;
198
412819ce
JA
199#define is_done() (*(volatile int *)(&done))
200static volatile int done;
201
152f6476
JA
202static inline unsigned long hash_long(unsigned long val)
203{
16ef714e
JA
204#if __WORDSIZE == 32
205 val *= 0x9e370001UL;
206#elif __WORDSIZE == 64
207 val *= 0x9e37fffffffc0001UL;
208#else
209#error unknown word size
210#endif
211
212 return val >> (__WORDSIZE - PPI_HASH_SHIFT);
152f6476
JA
213}
214
215static inline void add_process_to_hash(struct per_process_info *ppi)
216{
217 const int hash_idx = hash_long(ppi->pid);
218
219 ppi->hash_next = ppi_hash[hash_idx];
220 ppi_hash[hash_idx] = ppi;
221}
222
223static inline void add_process_to_list(struct per_process_info *ppi)
224{
225 ppi->list_next = ppi_list;
226 ppi_list = ppi;
227}
228
229static struct per_process_info *find_process_by_pid(__u32 pid)
230{
231 const int hash_idx = hash_long(pid);
232 struct per_process_info *ppi;
233
234 ppi = ppi_hash[hash_idx];
235 while (ppi) {
236 if (ppi->pid == pid)
237 return ppi;
238
239 ppi = ppi->hash_next;
240 }
241
242 return NULL;
243}
244
7997c5b0
JA
245static inline int trace_rb_insert(struct trace *t)
246{
247 struct rb_node **p = &rb_sort_root.rb_node;
248 struct rb_node *parent = NULL;
249 struct trace *__t;
250
e7c9f3ff
NS
251 if (genesis_time == 0 || t->bit->time < genesis_time)
252 genesis_time = t->bit->time;
253
7997c5b0
JA
254 while (*p) {
255 parent = *p;
256 __t = rb_entry(parent, struct trace, rb_node);
257
e7c9f3ff
NS
258 if (t->bit->time < __t->bit->time)
259 p = &(*p)->rb_left;
260 else if (t->bit->time > __t->bit->time)
261 p = &(*p)->rb_right;
262 else if (t->bit->device < __t->bit->device)
263 p = &(*p)->rb_left;
264 else if (t->bit->device > __t->bit->device)
265 p = &(*p)->rb_right;
266 else if (t->bit->sequence < __t->bit->sequence)
7997c5b0
JA
267 p = &(*p)->rb_left;
268 else if (t->bit->sequence > __t->bit->sequence)
269 p = &(*p)->rb_right;
e7c9f3ff
NS
270 else if (t->bit->device == __t->bit->device) {
271 fprintf(stderr,
272 "sequence alias (%d) on device %d,%d!\n",
273 t->bit->sequence,
274 MAJOR(t->bit->device), MINOR(t->bit->device));
7997c5b0
JA
275 return 1;
276 }
277 }
278
279 rb_link_node(&t->rb_node, parent, p);
280 rb_insert_color(&t->rb_node, &rb_sort_root);
281 return 0;
282}
283
284static inline int track_rb_insert(struct io_track *iot)
285{
286 struct rb_node **p = &rb_track_root.rb_node;
287 struct rb_node *parent = NULL;
288 struct io_track *__iot;
289
290 while (*p) {
291 parent = *p;
e7c9f3ff 292
7997c5b0
JA
293 __iot = rb_entry(parent, struct io_track, rb_node);
294
e7c9f3ff
NS
295 if (iot->device < __iot->device)
296 p = &(*p)->rb_left;
297 else if (iot->device > __iot->device)
298 p = &(*p)->rb_right;
299 else if (iot->sector < __iot->sector)
7997c5b0
JA
300 p = &(*p)->rb_left;
301 else if (iot->sector > __iot->sector)
302 p = &(*p)->rb_right;
303 else {
e7c9f3ff
NS
304 fprintf(stderr,
305 "sector alias (%llu) on device %d,%d!\n",
306 iot->sector,
307 MAJOR(iot->device), MINOR(iot->device));
7997c5b0
JA
308 return 1;
309 }
310 }
311
312 rb_link_node(&iot->rb_node, parent, p);
313 rb_insert_color(&iot->rb_node, &rb_track_root);
314 return 0;
315}
316
e7c9f3ff 317static struct io_track *__find_track(dev_t device, __u64 sector)
7997c5b0
JA
318{
319 struct rb_node **p = &rb_track_root.rb_node;
320 struct rb_node *parent = NULL;
321 struct io_track *__iot;
322
323 while (*p) {
324 parent = *p;
325
326 __iot = rb_entry(parent, struct io_track, rb_node);
327
e7c9f3ff
NS
328 if (device < __iot->device)
329 p = &(*p)->rb_left;
330 else if (device > __iot->device)
331 p = &(*p)->rb_right;
332 else if (sector < __iot->sector)
7997c5b0
JA
333 p = &(*p)->rb_left;
334 else if (sector > __iot->sector)
335 p = &(*p)->rb_right;
336 else
337 return __iot;
338 }
339
340 return NULL;
341}
342
e7c9f3ff 343static struct io_track *find_track(__u32 pid, dev_t device, __u64 sector)
7997c5b0 344{
916b5501 345 struct io_track *iot;
7997c5b0 346
e7c9f3ff 347 iot = __find_track(device, sector);
7997c5b0
JA
348 if (!iot) {
349 iot = malloc(sizeof(*iot));
50adc0ba 350 iot->pid = pid;
e7c9f3ff 351 iot->device = device;
7997c5b0
JA
352 iot->sector = sector;
353 track_rb_insert(iot);
354 }
355
356 return iot;
357}
358
a01516de 359static void log_track_frontmerge(struct blk_io_trace *t)
2e3e8ded
JA
360{
361 struct io_track *iot;
362
363 if (!track_ios)
364 return;
2e3e8ded 365
a01516de 366 iot = __find_track(t->device, t->sector + (t->bytes >> 9));
cb2a1a62
JA
367 if (!iot) {
368 fprintf(stderr, "failed to find mergeable event\n");
369 return;
2e3e8ded 370 }
cb2a1a62
JA
371
372 rb_erase(&iot->rb_node, &rb_track_root);
a01516de 373 iot->sector -= t->bytes >> 9;
cb2a1a62 374 track_rb_insert(iot);
2e3e8ded
JA
375}
376
95c15013 377static void log_track_getrq(struct blk_io_trace *t)
2e3e8ded
JA
378{
379 struct io_track *iot;
380
381 if (!track_ios)
382 return;
383
e7c9f3ff 384 iot = find_track(t->pid, t->device, t->sector);
95c15013
JA
385 iot->allocation_time = t->time;
386}
387
388
389/*
390 * return time between rq allocation and queue
391 */
392static unsigned long long log_track_queue(struct blk_io_trace *t)
393{
50adc0ba 394 unsigned long long elapsed;
95c15013
JA
395 struct io_track *iot;
396
397 if (!track_ios)
398 return -1;
399
e7c9f3ff 400 iot = find_track(t->pid, t->device, t->sector);
2e3e8ded 401 iot->queue_time = t->time;
50adc0ba
JA
402 elapsed = iot->queue_time - iot->allocation_time;
403
404 if (per_process_stats) {
405 struct per_process_info *ppi = find_process_by_pid(iot->pid);
b9d40d6f 406 int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
50adc0ba 407
b9d40d6f
JA
408 if (ppi && elapsed > ppi->longest_allocation_wait[w])
409 ppi->longest_allocation_wait[w] = elapsed;
50adc0ba
JA
410 }
411
412 return elapsed;
2e3e8ded
JA
413}
414
415/*
416 * return time between queue and issue
417 */
418static unsigned long long log_track_issue(struct blk_io_trace *t)
419{
50adc0ba 420 unsigned long long elapsed;
2e3e8ded
JA
421 struct io_track *iot;
422
423 if (!track_ios)
424 return -1;
425 if ((t->action & BLK_TC_ACT(BLK_TC_FS)) == 0)
426 return -1;
427
e7c9f3ff 428 iot = __find_track(t->device, t->sector);
cb2a1a62
JA
429 if (!iot) {
430 fprintf(stderr, "failed to find issue event\n");
2e3e8ded 431 return -1;
cb2a1a62 432 }
2e3e8ded
JA
433
434 iot->dispatch_time = t->time;
50adc0ba
JA
435 elapsed = iot->dispatch_time - iot->queue_time;
436
437 if (per_process_stats) {
438 struct per_process_info *ppi = find_process_by_pid(iot->pid);
b9d40d6f 439 int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
50adc0ba 440
b9d40d6f
JA
441 if (ppi && elapsed > ppi->longest_dispatch_wait[w])
442 ppi->longest_dispatch_wait[w] = elapsed;
50adc0ba
JA
443 }
444
445 return elapsed;
2e3e8ded
JA
446}
447
448/*
449 * return time between dispatch and complete
450 */
451static unsigned long long log_track_complete(struct blk_io_trace *t)
452{
453 unsigned long long elapsed;
454 struct io_track *iot;
455
456 if (!track_ios)
457 return -1;
458 if ((t->action & BLK_TC_ACT(BLK_TC_FS)) == 0)
459 return -1;
460
e7c9f3ff 461 iot = __find_track(t->device, t->sector);
cb2a1a62
JA
462 if (!iot) {
463 fprintf(stderr, "failed to find complete event\n");
2e3e8ded 464 return -1;
cb2a1a62 465 }
2e3e8ded
JA
466
467 iot->completion_time = t->time;
468 elapsed = iot->completion_time - iot->dispatch_time;
469
50adc0ba
JA
470 if (per_process_stats) {
471 struct per_process_info *ppi = find_process_by_pid(iot->pid);
b9d40d6f 472 int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
50adc0ba 473
b9d40d6f
JA
474 if (ppi && elapsed > ppi->longest_completion_wait[w])
475 ppi->longest_completion_wait[w] = elapsed;
50adc0ba
JA
476 }
477
2e3e8ded
JA
478 /*
479 * kill the trace, we don't need it after completion
480 */
481 rb_erase(&iot->rb_node, &rb_track_root);
482 free(iot);
483
484 return elapsed;
485}
486
487
152f6476
JA
488static struct io_stats *find_process_io_stats(__u32 pid, char *name)
489{
490 struct per_process_info *ppi = find_process_by_pid(pid);
491
492 if (!ppi) {
493 ppi = malloc(sizeof(*ppi));
494 memset(ppi, 0, sizeof(*ppi));
495 strncpy(ppi->name, name, sizeof(ppi->name));
496 ppi->pid = pid;
497 add_process_to_hash(ppi);
498 add_process_to_list(ppi);
499 }
500
501 return &ppi->io_stats;
502}
503
e7c9f3ff
NS
504
505static void resize_cpu_info(struct per_dev_info *pdi, int cpu)
a718bd37 506{
e7c9f3ff
NS
507 struct per_cpu_info *cpus = pdi->cpus;
508 int ncpus = pdi->ncpus;
509 int new_count = cpu + 1;
510 int new_space, size;
a718bd37
NS
511 char *new_start;
512
e7c9f3ff
NS
513 size = new_count * sizeof(struct per_cpu_info);
514 cpus = realloc(cpus, size);
515 if (!cpus) {
516 char name[20];
517 fprintf(stderr, "Out of memory, CPU info for device %s (%d)\n",
518 get_dev_name(pdi, name, sizeof(name)), size);
a718bd37
NS
519 exit(1);
520 }
521
e7c9f3ff
NS
522 new_start = (char *)cpus + (ncpus * sizeof(struct per_cpu_info));
523 new_space = (new_count - ncpus) * sizeof(struct per_cpu_info);
a718bd37 524 memset(new_start, 0, new_space);
e7c9f3ff
NS
525
526 pdi->ncpus = new_count;
527 pdi->cpus = cpus;
528}
cb2a1a62 529
e7c9f3ff
NS
530static struct per_cpu_info *get_cpu_info(struct per_dev_info *pdi, int cpu)
531{
cb2a1a62
JA
532 struct per_cpu_info *pci;
533
e7c9f3ff
NS
534 if (cpu >= pdi->ncpus)
535 resize_cpu_info(pdi, cpu);
cb2a1a62
JA
536
537 pci = &pdi->cpus[cpu];
538 pci->cpu = cpu;
539 return pci;
a718bd37
NS
540}
541
e7c9f3ff
NS
542
543static int resize_devices(char *name)
a718bd37 544{
e7c9f3ff 545 int size = (ndevices + 1) * sizeof(struct per_dev_info);
c499bf38 546
e7c9f3ff
NS
547 devices = realloc(devices, size);
548 if (!devices) {
549 fprintf(stderr, "Out of memory, device %s (%d)\n", name, size);
550 return 1;
551 }
552 memset(&devices[ndevices], 0, sizeof(struct per_dev_info));
553 devices[ndevices].name = name;
554 ndevices++;
555 return 0;
556}
a718bd37 557
cb2a1a62 558static struct per_dev_info *get_dev_info(dev_t id)
e7c9f3ff 559{
cb2a1a62 560 struct per_dev_info *pdi;
e7c9f3ff 561 int i;
c499bf38 562
e7c9f3ff
NS
563 for (i = 0; i < ndevices; i++)
564 if (devices[i].id == id)
565 return &devices[i];
cb2a1a62 566
e7c9f3ff
NS
567 if (resize_devices(NULL) != 0)
568 return NULL;
cb2a1a62
JA
569
570 pdi = &devices[ndevices - 1];
571 pdi->id = id;
572 return pdi;
a718bd37
NS
573}
574
e7c9f3ff
NS
575static char *get_dev_name(struct per_dev_info *pdi, char *buffer, int size)
576{
577 if (pdi->name)
578 snprintf(buffer, size, "%s", pdi->name);
579 else
580 snprintf(buffer, size, "%d,%d", MAJOR(pdi->id), MINOR(pdi->id));
581 return buffer;
582}
583
e7c9f3ff 584static void check_time(struct per_dev_info *pdi, struct blk_io_trace *bit)
cfab07eb
AB
585{
586 unsigned long long this = bit->time;
e7c9f3ff 587 unsigned long long last = pdi->last_reported_time;
cfab07eb 588
e7c9f3ff
NS
589 pdi->backwards = (this < last) ? 'B' : ' ';
590 pdi->last_reported_time = this;
cfab07eb
AB
591}
592
152f6476
JA
593static inline void __account_m(struct io_stats *ios, struct blk_io_trace *t,
594 int rw)
d0ca268b
JA
595{
596 if (rw) {
152f6476
JA
597 ios->mwrites++;
598 ios->qwrite_kb += t->bytes >> 10;
d0ca268b 599 } else {
152f6476
JA
600 ios->mreads++;
601 ios->qread_kb += t->bytes >> 10;
602 }
603}
604
605static inline void account_m(struct blk_io_trace *t, struct per_cpu_info *pci,
606 int rw)
607{
608 __account_m(&pci->io_stats, t, rw);
609
610 if (per_process_stats) {
611 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
612
613 __account_m(ios, t, rw);
d0ca268b
JA
614 }
615}
616
152f6476
JA
617static inline void __account_q(struct io_stats *ios, struct blk_io_trace *t,
618 int rw)
d0ca268b
JA
619{
620 if (rw) {
152f6476
JA
621 ios->qwrites++;
622 ios->qwrite_kb += t->bytes >> 10;
d0ca268b 623 } else {
152f6476
JA
624 ios->qreads++;
625 ios->qread_kb += t->bytes >> 10;
626 }
627}
628
629static inline void account_q(struct blk_io_trace *t, struct per_cpu_info *pci,
630 int rw)
631{
632 __account_q(&pci->io_stats, t, rw);
633
634 if (per_process_stats) {
635 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
636
637 __account_q(ios, t, rw);
d0ca268b
JA
638 }
639}
640
152f6476 641static inline void __account_c(struct io_stats *ios, int rw, unsigned int bytes)
d0ca268b
JA
642{
643 if (rw) {
152f6476
JA
644 ios->cwrites++;
645 ios->cwrite_kb += bytes >> 10;
d0ca268b 646 } else {
152f6476
JA
647 ios->creads++;
648 ios->cread_kb += bytes >> 10;
649 }
650}
651
652static inline void account_c(struct blk_io_trace *t, struct per_cpu_info *pci,
653 int rw, int bytes)
654{
655 __account_c(&pci->io_stats, rw, bytes);
656
657 if (per_process_stats) {
658 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
659
660 __account_c(ios, rw, bytes);
d0ca268b
JA
661 }
662}
663
152f6476 664static inline void __account_i(struct io_stats *ios, int rw, unsigned int bytes)
afd2d7ad 665{
666 if (rw) {
152f6476
JA
667 ios->iwrites++;
668 ios->iwrite_kb += bytes >> 10;
afd2d7ad 669 } else {
152f6476
JA
670 ios->ireads++;
671 ios->iread_kb += bytes >> 10;
afd2d7ad 672 }
673}
674
152f6476
JA
675static inline void account_i(struct blk_io_trace *t, struct per_cpu_info *pci,
676 int rw)
d0ca268b 677{
152f6476
JA
678 __account_i(&pci->io_stats, rw, t->bytes);
679
680 if (per_process_stats) {
681 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
d5396421 682
152f6476
JA
683 __account_i(ios, rw, t->bytes);
684 }
685}
686
06639b27
JA
687static inline void __account_unplug(struct io_stats *ios, int timer)
688{
689 if (timer)
690 ios->timer_unplugs++;
691 else
692 ios->io_unplugs++;
693}
694
695static inline void account_unplug(struct blk_io_trace *t,
696 struct per_cpu_info *pci, int timer)
697{
698 __account_unplug(&pci->io_stats, timer);
699
700 if (per_process_stats) {
701 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
702
703 __account_unplug(ios, timer);
704 }
705}
706
152f6476
JA
707static void output(struct per_cpu_info *pci, char *s)
708{
709 fprintf(ofp, "%s", s);
d0ca268b
JA
710}
711
3aabcd89
JA
712static char hstring[256];
713static char tstring[256];
d0ca268b 714
d5396421 715static inline char *setup_header(struct per_cpu_info *pci,
3639a11e 716 struct blk_io_trace *t, char *act)
d0ca268b
JA
717{
718 int w = t->action & BLK_TC_ACT(BLK_TC_WRITE);
719 int b = t->action & BLK_TC_ACT(BLK_TC_BARRIER);
720 int s = t->action & BLK_TC_ACT(BLK_TC_SYNC);
721 char rwbs[4];
722 int i = 0;
723
724 if (w)
725 rwbs[i++] = 'W';
726 else
727 rwbs[i++] = 'R';
728 if (b)
729 rwbs[i++] = 'B';
730 if (s)
731 rwbs[i++] = 'S';
732
733 rwbs[i] = '\0';
734
3639a11e 735 sprintf(hstring, "%3d,%-3d %2d %8ld %5Lu.%09Lu %5u %2s %3s",
e7c9f3ff 736 MAJOR(t->device), MINOR(t->device), pci->cpu,
3639a11e 737 (unsigned long)t->sequence, SECONDS(t->time),
cfab07eb 738 NANO_SECONDS(t->time), t->pid, act, rwbs);
d0ca268b
JA
739
740 return hstring;
741}
742
d5396421 743static void log_complete(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 744 char *act)
d0ca268b 745{
2e3e8ded
JA
746 unsigned long long elapsed = log_track_complete(t);
747
748 if (elapsed != -1ULL) {
b9d40d6f 749 unsigned long usec = elapsed / 1000;
2e3e8ded 750
b9d40d6f 751 sprintf(tstring,"%s %Lu + %u (%8lu) [%d]\n",
2e3e8ded
JA
752 setup_header(pci, t, act),
753 (unsigned long long)t->sector, t->bytes >> 9,
754 usec, t->error);
755 } else {
756 sprintf(tstring,"%s %Lu + %u [%d]\n", setup_header(pci, t, act),
757 (unsigned long long)t->sector, t->bytes >> 9, t->error);
758 }
759
d5396421 760 output(pci, tstring);
d0ca268b
JA
761}
762
d5396421 763static void log_queue(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 764 char *act)
d0ca268b 765{
95c15013 766 unsigned long long elapsed = log_track_queue(t);
2e3e8ded 767
95c15013 768 if (elapsed != -1ULL) {
b9d40d6f 769 unsigned long usec = elapsed / 1000;
95c15013 770
b9d40d6f 771 sprintf(tstring,"%s %Lu + %u (%8lu) [%s]\n",
95c15013
JA
772 setup_header(pci, t, act),
773 (unsigned long long)t->sector, t->bytes >> 9,
774 usec, t->comm);
775 } else {
776 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
777 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
778 }
d5396421 779 output(pci, tstring);
d0ca268b
JA
780}
781
d5396421 782static void log_issue(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 783 char *act)
d0ca268b 784{
2e3e8ded
JA
785 unsigned long long elapsed = log_track_issue(t);
786
787 if (elapsed != -1ULL) {
788 double usec = (double) elapsed / 1000;
789
555d3a31 790 sprintf(tstring,"%s %Lu + %u (%8.2f) [%s]\n",
2e3e8ded
JA
791 setup_header(pci, t, act),
792 (unsigned long long)t->sector, t->bytes >> 9,
793 usec, t->comm);
794 } else {
795 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
796 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
797 }
798
d5396421 799 output(pci, tstring);
d0ca268b
JA
800}
801
d5396421 802static void log_merge(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 803 char *act)
d0ca268b 804{
a01516de
JA
805 if (act[0] == 'F')
806 log_track_frontmerge(t);
2e3e8ded 807
984c63b7 808 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
2955af9d 809 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
d5396421 810 output(pci, tstring);
d0ca268b
JA
811}
812
dfe34da1 813static void log_action(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 814 char *act)
dfe34da1
JA
815{
816 sprintf(tstring,"%s [%s]\n", setup_header(pci, t, act), t->comm);
817 output(pci, tstring);
818}
819
d5396421 820static void log_generic(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 821 char *act)
d0ca268b 822{
2955af9d
NS
823 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
824 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
d5396421 825 output(pci, tstring);
d0ca268b
JA
826}
827
67e14fdc 828static int log_unplug(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 829 char *act)
67e14fdc
JA
830{
831 __u64 *depth;
832 int len;
833
06639b27 834 len = sprintf(tstring,"%s [%s] ", setup_header(pci, t, act), t->comm);
3639a11e 835 depth = (__u64 *) ((char *) t + sizeof(*t));
67e14fdc
JA
836 sprintf(tstring + len, "%u\n", (unsigned int) be64_to_cpu(*depth));
837 output(pci, tstring);
838
839 return 0;
840}
841
3639a11e 842static int log_pc(struct per_cpu_info *pci, struct blk_io_trace *t, char *act)
d0ca268b 843{
87b72777
JA
844 unsigned char *buf;
845 int i;
d0ca268b 846
d5396421
JA
847 sprintf(tstring,"%s ", setup_header(pci, t, act));
848 output(pci, tstring);
d0ca268b 849
87b72777 850 buf = (unsigned char *) t + sizeof(*t);
d0ca268b
JA
851 for (i = 0; i < t->pdu_len; i++) {
852 sprintf(tstring,"%02x ", buf[i]);
d5396421 853 output(pci, tstring);
d0ca268b
JA
854 }
855
3639a11e 856 if (act[0] == 'C') {
2955af9d
NS
857 sprintf(tstring,"[%d]\n", t->error);
858 output(pci, tstring);
859 } else {
860 sprintf(tstring,"[%s]\n", t->comm);
d5396421 861 output(pci, tstring);
d0ca268b 862 }
87b72777 863 return 0;
d0ca268b
JA
864}
865
d5396421 866static int dump_trace_pc(struct blk_io_trace *t, struct per_cpu_info *pci)
d0ca268b 867{
87b72777
JA
868 int ret = 0;
869
d0ca268b
JA
870 switch (t->action & 0xffff) {
871 case __BLK_TA_QUEUE:
3639a11e 872 log_generic(pci, t, "Q");
d0ca268b
JA
873 break;
874 case __BLK_TA_GETRQ:
3639a11e 875 log_generic(pci, t, "G");
d0ca268b
JA
876 break;
877 case __BLK_TA_SLEEPRQ:
3639a11e 878 log_generic(pci, t, "S");
d0ca268b
JA
879 break;
880 case __BLK_TA_REQUEUE:
3639a11e 881 log_generic(pci, t, "R");
d0ca268b
JA
882 break;
883 case __BLK_TA_ISSUE:
3639a11e 884 ret = log_pc(pci, t, "D");
d0ca268b
JA
885 break;
886 case __BLK_TA_COMPLETE:
3639a11e 887 log_pc(pci, t, "C");
d0ca268b
JA
888 break;
889 default:
890 fprintf(stderr, "Bad pc action %x\n", t->action);
87b72777
JA
891 ret = 1;
892 break;
d0ca268b
JA
893 }
894
87b72777 895 return ret;
d0ca268b
JA
896}
897
d5396421 898static void dump_trace_fs(struct blk_io_trace *t, struct per_cpu_info *pci)
d0ca268b
JA
899{
900 int w = t->action & BLK_TC_ACT(BLK_TC_WRITE);
7997c5b0 901 int act = t->action & 0xffff;
d0ca268b 902
7997c5b0 903 switch (act) {
d0ca268b 904 case __BLK_TA_QUEUE:
152f6476 905 account_q(t, pci, w);
3639a11e 906 log_queue(pci, t, "Q");
d0ca268b
JA
907 break;
908 case __BLK_TA_BACKMERGE:
152f6476 909 account_m(t, pci, w);
3639a11e 910 log_merge(pci, t, "M");
d0ca268b
JA
911 break;
912 case __BLK_TA_FRONTMERGE:
152f6476 913 account_m(t, pci, w);
3639a11e 914 log_merge(pci, t, "F");
d0ca268b
JA
915 break;
916 case __BLK_TA_GETRQ:
95c15013 917 log_track_getrq(t);
3639a11e 918 log_generic(pci, t, "G");
d0ca268b
JA
919 break;
920 case __BLK_TA_SLEEPRQ:
3639a11e 921 log_generic(pci, t, "S");
d0ca268b
JA
922 break;
923 case __BLK_TA_REQUEUE:
152f6476 924 account_c(t, pci, w, -t->bytes);
3639a11e 925 log_queue(pci, t, "R");
d0ca268b
JA
926 break;
927 case __BLK_TA_ISSUE:
152f6476 928 account_i(t, pci, w);
3639a11e 929 log_issue(pci, t, "D");
d0ca268b
JA
930 break;
931 case __BLK_TA_COMPLETE:
152f6476 932 account_c(t, pci, w, t->bytes);
3639a11e 933 log_complete(pci, t, "C");
d0ca268b 934 break;
88b1a526 935 case __BLK_TA_PLUG:
3639a11e 936 log_action(pci, t, "P");
88b1a526 937 break;
3639a11e 938 case __BLK_TA_UNPLUG_IO:
06639b27 939 account_unplug(t, pci, 0);
3639a11e
JA
940 log_unplug(pci, t, "U");
941 break;
942 case __BLK_TA_UNPLUG_TIMER:
06639b27 943 account_unplug(t, pci, 1);
3639a11e 944 log_unplug(pci, t, "UT");
88b1a526 945 break;
d0ca268b
JA
946 default:
947 fprintf(stderr, "Bad fs action %x\n", t->action);
1f79c4a0 948 break;
d0ca268b 949 }
d0ca268b
JA
950}
951
e7c9f3ff
NS
952static int dump_trace(struct blk_io_trace *t, struct per_cpu_info *pci,
953 struct per_dev_info *pdi)
d0ca268b 954{
87b72777
JA
955 int ret = 0;
956
d0ca268b 957 if (t->action & BLK_TC_ACT(BLK_TC_PC))
d5396421 958 ret = dump_trace_pc(t, pci);
d0ca268b 959 else
d5396421 960 dump_trace_fs(t, pci);
87b72777 961
e7c9f3ff 962 pdi->events++;
87b72777 963 return ret;
d0ca268b
JA
964}
965
152f6476 966static void dump_io_stats(struct io_stats *ios, char *msg)
5c017e4b 967{
152f6476
JA
968 fprintf(ofp, "%s\n", msg);
969
970 fprintf(ofp, " Reads Queued: %'8lu, %'8LuKiB\t", ios->qreads, ios->qread_kb);
971 fprintf(ofp, " Writes Queued: %'8lu, %'8LuKiB\n", ios->qwrites,ios->qwrite_kb);
0a6b8fc4 972
152f6476
JA
973 fprintf(ofp, " Read Dispatches: %'8lu, %'8LuKiB\t", ios->ireads, ios->iread_kb);
974 fprintf(ofp, " Write Dispatches: %'8lu, %'8LuKiB\n", ios->iwrites,ios->iwrite_kb);
975 fprintf(ofp, " Reads Completed: %'8lu, %'8LuKiB\t", ios->creads, ios->cread_kb);
976 fprintf(ofp, " Writes Completed: %'8lu, %'8LuKiB\n", ios->cwrites,ios->cwrite_kb);
977 fprintf(ofp, " Read Merges: %'8lu%8c\t", ios->mreads, ' ');
152f6476 978 fprintf(ofp, " Write Merges: %'8lu\n", ios->mwrites);
06639b27
JA
979 fprintf(ofp, " IO unplugs: %'8lu%8c\t", ios->io_unplugs, ' ');
980 fprintf(ofp, " Timer unplugs: %'8lu\n", ios->timer_unplugs);
5c017e4b
JA
981}
982
50adc0ba
JA
983static void dump_wait_stats(struct per_process_info *ppi)
984{
b9d40d6f
JA
985 unsigned long rawait = ppi->longest_allocation_wait[0] / 1000;
986 unsigned long rdwait = ppi->longest_dispatch_wait[0] / 1000;
987 unsigned long rcwait = ppi->longest_completion_wait[0] / 1000;
988 unsigned long wawait = ppi->longest_allocation_wait[1] / 1000;
989 unsigned long wdwait = ppi->longest_dispatch_wait[1] / 1000;
990 unsigned long wcwait = ppi->longest_completion_wait[1] / 1000;
991
992 fprintf(ofp, " Allocation wait: %'8lu%8c\t", rawait, ' ');
993 fprintf(ofp, " Allocation wait: %'8lu\n", wawait);
994 fprintf(ofp, " Dispatch wait: %'8lu%8c\t", rdwait, ' ');
995 fprintf(ofp, " Dispatch wait: %'8lu\n", wdwait);
996 fprintf(ofp, " Completion wait: %'8lu%8c\t", rcwait, ' ');
997 fprintf(ofp, " Completion wait: %'8lu\n", wcwait);
50adc0ba
JA
998}
999
152f6476
JA
1000static void show_process_stats(void)
1001{
1002 struct per_process_info *ppi;
1003
1004 ppi = ppi_list;
1005 while (ppi) {
1006 dump_io_stats(&ppi->io_stats, ppi->name);
50adc0ba 1007 dump_wait_stats(ppi);
152f6476
JA
1008 ppi = ppi->list_next;
1009 }
1010
1011 fprintf(ofp, "\n");
1012}
1013
e7c9f3ff 1014static void show_device_and_cpu_stats(void)
d0ca268b 1015{
e7c9f3ff
NS
1016 struct per_dev_info *pdi;
1017 struct per_cpu_info *pci;
1018 struct io_stats total, *ios;
1019 int i, j, pci_events;
1020 char line[3 + 8/*cpu*/ + 2 + 32/*dev*/ + 3];
1021 char name[32];
1022
1023 for (pdi = devices, i = 0; i < ndevices; i++, pdi++) {
1024
1025 memset(&total, 0, sizeof(total));
1026 pci_events = 0;
1027
1028 if (i > 0)
1029 fprintf(ofp, "\n");
1030
1031 for (pci = pdi->cpus, j = 0; j < pdi->ncpus; j++, pci++) {
1032 if (!pci->nelems)
1033 continue;
1034
1035 ios = &pci->io_stats;
1036 total.qreads += ios->qreads;
1037 total.qwrites += ios->qwrites;
1038 total.creads += ios->creads;
1039 total.cwrites += ios->cwrites;
1040 total.mreads += ios->mreads;
1041 total.mwrites += ios->mwrites;
1042 total.ireads += ios->ireads;
1043 total.iwrites += ios->iwrites;
1044 total.qread_kb += ios->qread_kb;
1045 total.qwrite_kb += ios->qwrite_kb;
1046 total.cread_kb += ios->cread_kb;
1047 total.cwrite_kb += ios->cwrite_kb;
1048 total.iread_kb += ios->iread_kb;
1049 total.iwrite_kb += ios->iwrite_kb;
06639b27
JA
1050 total.timer_unplugs += ios->timer_unplugs;
1051 total.io_unplugs += ios->io_unplugs;
e7c9f3ff
NS
1052
1053 snprintf(line, sizeof(line) - 1, "CPU%d (%s):",
1054 j, get_dev_name(pdi, name, sizeof(name)));
1055 dump_io_stats(ios, line);
1056 pci_events++;
1057 }
5c017e4b 1058
e7c9f3ff
NS
1059 if (pci_events > 1) {
1060 fprintf(ofp, "\n");
1061 snprintf(line, sizeof(line) - 1, "Total (%s):",
1062 get_dev_name(pdi, name, sizeof(name)));
1063 dump_io_stats(&total, line);
1064 }
d0ca268b 1065
e7c9f3ff
NS
1066 fprintf(ofp, "Events (%s): %'Lu\n",
1067 get_dev_name(pdi, line, sizeof(line)), pdi->events);
1068 }
d0ca268b
JA
1069}
1070
cb2a1a62 1071static struct blk_io_trace *find_trace(void *p, unsigned long offset)
2ff323b0 1072{
cb2a1a62 1073 unsigned long max_offset = offset;
2ff323b0
JA
1074 unsigned long off;
1075 struct blk_io_trace *bit;
1076 __u32 magic;
1077
1078 for (off = 0; off < max_offset; off++) {
1079 bit = p + off;
1080
1081 magic = be32_to_cpu(bit->magic);
1082 if ((magic & 0xffffff00) == BLK_IO_TRACE_MAGIC)
1083 return bit;
1084 }
1085
1086 return NULL;
1087}
1088
cb2a1a62 1089static int sort_entries(void)
8fc0abbc 1090{
e7c9f3ff 1091 struct per_dev_info *pdi;
412819ce 1092 struct per_cpu_info *pci;
8fc0abbc
JA
1093 struct blk_io_trace *bit;
1094 struct trace *t;
cb2a1a62 1095 int nr = 0;
8fc0abbc 1096
cb2a1a62 1097 while ((t = trace_list) != NULL) {
412819ce 1098
cb2a1a62
JA
1099 trace_list = t->next;
1100 bit = t->bit;
6fe4709e 1101
8fc0abbc
JA
1102 memset(&t->rb_node, 0, sizeof(t->rb_node));
1103
6fe4709e
JA
1104 trace_to_cpu(bit);
1105
cb2a1a62 1106 if (verify_trace(bit))
66fa7233 1107 break;
cb2a1a62
JA
1108 if (trace_rb_insert(t))
1109 return -1;
66fa7233 1110
cb2a1a62
JA
1111 pdi = get_dev_info(bit->device);
1112 pci = get_cpu_info(pdi, bit->cpu);
412819ce
JA
1113 pci->nelems++;
1114
cb2a1a62 1115 nr++;
6fe4709e 1116 }
8fc0abbc 1117
cb2a1a62 1118 return nr;
412819ce
JA
1119}
1120
e8741a4a 1121static void show_entries_rb(int kill_entries)
8fc0abbc 1122{
e7c9f3ff 1123 struct per_dev_info *pdi;
8fc0abbc 1124 struct blk_io_trace *bit;
3aabcd89 1125 struct rb_node *n;
8fc0abbc
JA
1126 struct trace *t;
1127 int cpu;
1128
cb2a1a62 1129 while ((n = rb_first(&rb_sort_root)) != NULL) {
8fc0abbc
JA
1130
1131 t = rb_entry(n, struct trace, rb_node);
1132 bit = t->bit;
1133
cb2a1a62 1134 pdi = get_dev_info(bit->device);
e7c9f3ff
NS
1135 if (!pdi) {
1136 fprintf(stderr, "Unknown device ID? (%d,%d)\n",
1137 MAJOR(bit->device), MINOR(bit->device));
1138 break;
1139 }
d5396421 1140 cpu = bit->cpu;
e7c9f3ff
NS
1141 if (cpu > pdi->ncpus) {
1142 fprintf(stderr, "Unknown CPU ID? (%d, device %d,%d)\n",
1143 cpu, MAJOR(bit->device), MINOR(bit->device));
87b72777 1144 break;
8fc0abbc
JA
1145 }
1146
cb2a1a62
JA
1147 /*
1148 * back off displaying more info if we are out of sync
1149 * on SMP systems. to prevent stalling on lost events,
1150 * only allow an event to skip us once
1151 */
1152 if (bit->sequence != (pdi->last_sequence + 1)) {
1153 if (!t->skipped) {
1154 t->skipped = 1;
1155 break;
1156 }
1157 }
1158
1159 pdi->last_sequence = bit->sequence;
1160
cfab07eb 1161 bit->time -= genesis_time;
46e6968b
NS
1162 if (bit->time < stopwatch_start)
1163 continue;
1164 if (bit->time >= stopwatch_end)
1165 break;
8fc0abbc 1166
e7c9f3ff 1167 check_time(pdi, bit);
8fc0abbc 1168
e7c9f3ff 1169 if (dump_trace(bit, &pdi->cpus[cpu], pdi))
87b72777
JA
1170 break;
1171
cb2a1a62 1172 rb_erase(&t->rb_node, &rb_sort_root);
e8741a4a
JA
1173
1174 if (kill_entries) {
1175 free(bit);
1176 free(t);
1177 }
cb2a1a62 1178 }
8fc0abbc
JA
1179}
1180
1f79c4a0
JA
1181static int read_data(int fd, void *buffer, int bytes, int block)
1182{
1183 int ret, bytes_left, fl;
1184 void *p;
1185
1186 fl = fcntl(fd, F_GETFL);
1187
1188 if (!block)
1189 fcntl(fd, F_SETFL, fl | O_NONBLOCK);
1190 else
1191 fcntl(fd, F_SETFL, fl & ~O_NONBLOCK);
1192
1193 bytes_left = bytes;
1194 p = buffer;
1195 while (bytes_left > 0) {
1196 ret = read(fd, p, bytes_left);
1197 if (!ret)
1198 return 1;
1199 else if (ret < 0) {
1200 if (errno != EAGAIN)
1201 perror("read");
1202 return -1;
1203 } else {
1204 p += ret;
1205 bytes_left -= ret;
1206 }
1207 }
1208
1209 return 0;
1210}
1211
cb2a1a62
JA
1212/*
1213 * Find the traces in 'tb' and add them to the list for sorting and
1214 * displaying
1215 */
1216static int find_entries(void *tb, unsigned long size)
1217{
1218 struct blk_io_trace *bit;
1219 struct trace *t;
1220 void *start = tb;
1221
1222 while (tb - start <= size - sizeof(*bit)) {
1223 bit = find_trace(tb, size - (tb - start));
1224 if (!bit)
1225 break;
1226
1227 t = malloc(sizeof(*t));
1228 memset(t, 0, sizeof(*t));
1229 t->bit = bit;
1230
1231 t->next = trace_list;
1232 trace_list = t;
1233
1234 tb += sizeof(*bit) + bit->pdu_len;
1235 }
1236
1237 return 0;
1238}
1239
d5396421 1240static int do_file(void)
d0ca268b 1241{
cb2a1a62 1242 int i, j, nfiles = 0, nelems;
d0ca268b 1243
e8741a4a 1244 for (i = 0; i < ndevices; i++) {
e7c9f3ff 1245 for (j = 0;; j++, nfiles++) {
e8741a4a 1246 struct per_dev_info *pdi;
e7c9f3ff
NS
1247 struct per_cpu_info *pci;
1248 struct stat st;
1249 void *tb;
87b72777 1250
e8741a4a 1251 pdi = &devices[i];
e7c9f3ff
NS
1252 pci = get_cpu_info(pdi, j);
1253 pci->cpu = j;
d0ca268b 1254
e7c9f3ff
NS
1255 snprintf(pci->fname, sizeof(pci->fname)-1,
1256 "%s_out.%d", pdi->name, j);
1257 if (stat(pci->fname, &st) < 0)
1258 break;
1259 if (!st.st_size)
1260 continue;
1261
1262 printf("Processing %s\n", pci->fname);
1263
1264 tb = malloc(st.st_size);
1265 if (!tb) {
1266 fprintf(stderr, "Out of memory, skip file %s\n",
1267 pci->fname);
1268 continue;
1269 }
1270
1271 pci->fd = open(pci->fname, O_RDONLY);
1272 if (pci->fd < 0) {
1273 perror(pci->fname);
1274 free(tb);
1275 continue;
1276 }
1277
1278 if (read_data(pci->fd, tb, st.st_size, 1)) {
1279 close(pci->fd);
1280 free(tb);
1281 continue;
1282 }
1283
cb2a1a62
JA
1284 if (find_entries(tb, st.st_size)) {
1285 close(pci->fd);
1286 free(tb);
1287 }
1288
1289 nelems = sort_entries();
1290 if (nelems == -1) {
e7c9f3ff
NS
1291 close(pci->fd);
1292 free(tb);
1293 continue;
1294 }
1295
1296 printf("Completed %s (CPU%d %d, entries)\n",
cb2a1a62 1297 pci->fname, j, nelems);
e7c9f3ff 1298 close(pci->fd);
d0ca268b 1299 }
d5396421
JA
1300 }
1301
1302 if (!nfiles) {
1303 fprintf(stderr, "No files found\n");
1304 return 1;
1305 }
1306
e8741a4a 1307 show_entries_rb(0);
d5396421
JA
1308 return 0;
1309}
1310
cb2a1a62 1311static int read_sort_events(int fd)
d5396421 1312{
cb2a1a62 1313 int events = 0;
d5396421 1314
412819ce 1315 do {
cb2a1a62
JA
1316 struct blk_io_trace *bit;
1317 struct trace *t;
412819ce 1318 int pdu_len;
51128a28 1319 __u32 magic;
d5396421 1320
cb2a1a62 1321 bit = malloc(sizeof(*bit));
d5396421 1322
cb2a1a62 1323 if (read_data(fd, bit, sizeof(*bit), !events))
c80c18a7 1324 break;
d5396421 1325
cb2a1a62 1326 magic = be32_to_cpu(bit->magic);
51128a28
JA
1327 if ((magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
1328 fprintf(stderr, "Bad magic %x\n", magic);
1329 break;
1330 }
1331
cb2a1a62 1332 pdu_len = be16_to_cpu(bit->pdu_len);
2ff323b0 1333 if (pdu_len) {
cb2a1a62 1334 void *ptr = realloc(bit, sizeof(*bit) + pdu_len);
d5396421 1335
cb2a1a62 1336 if (read_data(fd, ptr + sizeof(*bit), pdu_len, 1))
2ff323b0 1337 break;
d5396421 1338
cb2a1a62 1339 bit = ptr;
2ff323b0 1340 }
d5396421 1341
cb2a1a62
JA
1342 t = malloc(sizeof(*t));
1343 memset(t, 0, sizeof(*t));
1344 t->bit = bit;
1345 t->next = trace_list;
1346 trace_list = t;
1347
412819ce 1348 events++;
79f19470 1349 } while (!is_done() && events < rb_batch);
d5396421 1350
412819ce
JA
1351 return events;
1352}
d5396421 1353
412819ce
JA
1354static int do_stdin(void)
1355{
1356 int fd;
d5396421 1357
1f79c4a0 1358 fd = dup(STDIN_FILENO);
412819ce
JA
1359 do {
1360 int events;
d5396421 1361
cb2a1a62 1362 events = read_sort_events(fd);
412819ce
JA
1363 if (!events)
1364 break;
1365
cb2a1a62 1366 if (sort_entries() == -1)
2ff323b0
JA
1367 break;
1368
e8741a4a 1369 show_entries_rb(1);
d5396421
JA
1370 } while (1);
1371
1372 close(fd);
d5396421
JA
1373 return 0;
1374}
d0ca268b 1375
1f79c4a0 1376static void flush_output(void)
412819ce 1377{
152f6476 1378 fflush(ofp);
412819ce
JA
1379}
1380
1f79c4a0 1381static void handle_sigint(int sig)
412819ce
JA
1382{
1383 done = 1;
1384 flush_output();
1385}
1386
46e6968b
NS
1387/*
1388 * Extract start and duration times from a string, allowing
1389 * us to specify a time interval of interest within a trace.
1390 * Format: "duration" (start is zero) or "start:duration".
1391 */
1392static int find_stopwatch_interval(char *string)
1393{
1394 double value;
1395 char *sp;
1396
1397 value = strtod(string, &sp);
1398 if (sp == string) {
1399 fprintf(stderr,"Invalid stopwatch timer: %s\n", string);
1400 return 1;
1401 }
1402 if (*sp == ':') {
1403 stopwatch_start = DOUBLE_TO_NANO_ULL(value);
1404 string = sp + 1;
1405 value = strtod(string, &sp);
1406 if (sp == string || *sp != '\0') {
1407 fprintf(stderr,"Invalid stopwatch duration time: %s\n",
1408 string);
1409 return 1;
1410 }
1411 } else if (*sp != '\0') {
1412 fprintf(stderr,"Invalid stopwatch start timer: %s\n", string);
1413 return 1;
1414 }
1415 stopwatch_end = stopwatch_start + DOUBLE_TO_NANO_ULL(value);
1416 return 0;
1417}
1418
1f79c4a0
JA
1419static void usage(char *prog)
1420{
46e6968b
NS
1421 fprintf(stderr, "Usage: %s "
1422 "[-i <name>] [-o <output>] [-s] [-w N[:n]] <name>...\n",
1423 prog);
1f79c4a0
JA
1424}
1425
d5396421
JA
1426int main(int argc, char *argv[])
1427{
152f6476 1428 char *ofp_buffer;
a66877e6 1429 int c, ret, mode;
1e1c60f1 1430 int per_device_and_cpu_stats = 1;
d5396421
JA
1431
1432 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) != -1) {
1433 switch (c) {
1434 case 'i':
e7c9f3ff
NS
1435 if (!strcmp(optarg, "-") && !pipeline)
1436 pipeline = 1;
1437 else if (resize_devices(optarg) != 0)
1438 return 1;
d5396421
JA
1439 break;
1440 case 'o':
66efebf8 1441 output_name = optarg;
d5396421 1442 break;
79f19470
JA
1443 case 'b':
1444 rb_batch = atoi(optarg);
1445 if (rb_batch <= 0)
1446 rb_batch = RB_BATCH_DEFAULT;
1447 break;
152f6476
JA
1448 case 's':
1449 per_process_stats = 1;
1450 break;
7997c5b0
JA
1451 case 't':
1452 track_ios = 1;
1453 break;
1e1c60f1
NS
1454 case 'q':
1455 per_device_and_cpu_stats = 0;
1456 break;
46e6968b
NS
1457 case 'w':
1458 if (find_stopwatch_interval(optarg) != 0)
1459 return 1;
1460 break;
d5396421 1461 default:
1f79c4a0 1462 usage(argv[0]);
d5396421
JA
1463 return 1;
1464 }
d0ca268b
JA
1465 }
1466
e7c9f3ff
NS
1467 while (optind < argc) {
1468 if (!strcmp(argv[optind], "-") && !pipeline)
1469 pipeline = 1;
1470 else if (resize_devices(argv[optind]) != 0)
1471 return 1;
1472 optind++;
1473 }
1474
1475 if (!pipeline && !ndevices) {
1f79c4a0 1476 usage(argv[0]);
d5396421
JA
1477 return 1;
1478 }
1479
7997c5b0
JA
1480 memset(&rb_sort_root, 0, sizeof(rb_sort_root));
1481 memset(&rb_track_root, 0, sizeof(rb_track_root));
412819ce
JA
1482
1483 signal(SIGINT, handle_sigint);
1484 signal(SIGHUP, handle_sigint);
1485 signal(SIGTERM, handle_sigint);
d5396421 1486
d69db225
JA
1487 setlocale(LC_NUMERIC, "en_US");
1488
a66877e6 1489 if (!output_name) {
152f6476 1490 ofp = fdopen(STDOUT_FILENO, "w");
a66877e6
JA
1491 mode = _IOLBF;
1492 } else {
152f6476
JA
1493 char ofname[128];
1494
1495 snprintf(ofname, sizeof(ofname) - 1, "%s.log", output_name);
1496 ofp = fopen(ofname, "w");
a66877e6 1497 mode = _IOFBF;
152f6476
JA
1498 }
1499
1500 if (!ofp) {
1501 perror("fopen");
1502 return 1;
1503 }
1504
1505 ofp_buffer = malloc(4096);
a66877e6 1506 if (setvbuf(ofp, ofp_buffer, mode, 4096)) {
152f6476
JA
1507 perror("setvbuf");
1508 return 1;
1509 }
1510
e7c9f3ff 1511 if (pipeline)
d5396421
JA
1512 ret = do_stdin();
1513 else
1514 ret = do_file();
1515
152f6476
JA
1516 if (per_process_stats)
1517 show_process_stats();
1518
1e1c60f1
NS
1519 if (per_device_and_cpu_stats)
1520 show_device_and_cpu_stats();
152f6476 1521
412819ce 1522 flush_output();
d5396421 1523 return ret;
d0ca268b 1524}