[PATCH] blkparse: better sorting over a pipe
[blktrace.git] / blkparse.c
CommitLineData
d956a2cd
JA
1/*
2 * block queue tracing parse application
3 *
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
20 */
d0ca268b
JA
21#include <sys/types.h>
22#include <sys/stat.h>
23#include <unistd.h>
24#include <stdio.h>
25#include <fcntl.h>
26#include <stdlib.h>
8fc0abbc 27#include <string.h>
d5396421 28#include <getopt.h>
412819ce
JA
29#include <errno.h>
30#include <signal.h>
d69db225 31#include <locale.h>
46e6968b 32#include <limits.h>
d0ca268b 33
8fc0abbc
JA
34#include "blktrace.h"
35#include "rbtree.h"
d0ca268b 36
2e3e8ded
JA
37#define SECONDS(x) ((unsigned long long)(x) / 1000000000)
38#define NANO_SECONDS(x) ((unsigned long long)(x) % 1000000000)
46e6968b 39#define DOUBLE_TO_NANO_ULL(d) ((unsigned long long)((d) * 1000000000))
cfab07eb 40
e7c9f3ff
NS
41#define MINORBITS 20
42#define MINORMASK ((1U << MINORBITS) - 1)
43#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS))
44#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK))
45
46#define min(a, b) ((a) < (b) ? (a) : (b))
d5396421 47
152f6476
JA
48struct io_stats {
49 unsigned long qreads, qwrites, creads, cwrites, mreads, mwrites;
50 unsigned long ireads, iwrites;
51 unsigned long long qread_kb, qwrite_kb, cread_kb, cwrite_kb;
52 unsigned long long iread_kb, iwrite_kb;
06639b27 53 unsigned long io_unplugs, timer_unplugs;
152f6476
JA
54};
55
d5396421 56struct per_cpu_info {
d0ca268b
JA
57 int cpu;
58 int nelems;
d0ca268b
JA
59
60 int fd;
87b72777 61 char fname[128];
d0ca268b 62
152f6476
JA
63 struct io_stats io_stats;
64};
8fc0abbc 65
e7c9f3ff
NS
66struct per_dev_info {
67 dev_t id;
68 char *name;
69
70 int backwards;
71 unsigned long long events;
72 unsigned long long last_reported_time;
73 struct io_stats io_stats;
cb2a1a62 74 unsigned long last_sequence;
e7c9f3ff
NS
75
76 int ncpus;
77 struct per_cpu_info *cpus;
78};
79
152f6476
JA
80struct per_process_info {
81 char name[16];
82 __u32 pid;
83 struct io_stats io_stats;
84 struct per_process_info *hash_next, *list_next;
50adc0ba
JA
85
86 /*
87 * individual io stats
88 */
b9d40d6f
JA
89 unsigned long long longest_allocation_wait[2];
90 unsigned long long longest_dispatch_wait[2];
91 unsigned long long longest_completion_wait[2];
d0ca268b
JA
92};
93
152f6476
JA
94#define PPI_HASH_SHIFT (8)
95static struct per_process_info *ppi_hash[1 << PPI_HASH_SHIFT];
96static struct per_process_info *ppi_list;
97
46e6968b 98#define S_OPTS "i:o:b:stqw:"
d5396421
JA
99static struct option l_opts[] = {
100 {
101 .name = "input",
102 .has_arg = 1,
103 .flag = NULL,
104 .val = 'i'
105 },
106 {
107 .name = "output",
108 .has_arg = 1,
109 .flag = NULL,
110 .val = 'o'
111 },
79f19470
JA
112 {
113 .name = "batch",
114 .has_arg = 1,
115 .flag = NULL,
116 .val = 'b'
117 },
152f6476
JA
118 {
119 .name = "per program stats",
120 .has_arg = 0,
121 .flag = NULL,
122 .val = 's'
123 },
7997c5b0
JA
124 {
125 .name = "track ios",
126 .has_arg = 0,
127 .flag = NULL,
128 .val = 't'
129 },
1e1c60f1
NS
130 {
131 .name = "quiet",
132 .has_arg = 0,
133 .flag = NULL,
134 .val = 'q'
135 },
46e6968b
NS
136 {
137 .name = "stopwatch",
138 .has_arg = 1,
139 .flag = NULL,
140 .val = 'w'
141 },
d5396421
JA
142 {
143 .name = NULL,
144 .has_arg = 0,
145 .flag = NULL,
146 .val = 0
147 }
148};
149
7997c5b0
JA
150/*
151 * for sorting the displayed output
152 */
8fc0abbc
JA
153struct trace {
154 struct blk_io_trace *bit;
155 struct rb_node rb_node;
cb2a1a62
JA
156 struct trace *next;
157 int skipped;
8fc0abbc
JA
158};
159
cb2a1a62
JA
160static struct rb_root rb_sort_root;
161static struct rb_root rb_track_root;
162
163static struct trace *trace_list;
164
7997c5b0
JA
165/*
166 * for tracking individual ios
167 */
168struct io_track {
169 struct rb_node rb_node;
170
e7c9f3ff 171 dev_t device;
7997c5b0
JA
172 __u64 sector;
173 __u32 pid;
95c15013 174 unsigned long long allocation_time;
7997c5b0
JA
175 unsigned long long queue_time;
176 unsigned long long dispatch_time;
177 unsigned long long completion_time;
178};
179
e7c9f3ff
NS
180static int ndevices;
181static struct per_dev_info *devices;
182static char *get_dev_name(struct per_dev_info *, char *, int);
d0ca268b 183
152f6476 184static FILE *ofp;
e7c9f3ff
NS
185static char *output_name;
186
187static unsigned long long genesis_time;
46e6968b
NS
188static unsigned long long stopwatch_start; /* start from zero by default */
189static unsigned long long stopwatch_end = ULONG_LONG_MAX; /* "infinity" */
152f6476
JA
190
191static int per_process_stats;
7997c5b0 192static int track_ios;
d0ca268b 193
79f19470
JA
194#define RB_BATCH_DEFAULT (1024)
195static int rb_batch = RB_BATCH_DEFAULT;
196
e7c9f3ff
NS
197static int pipeline;
198
412819ce
JA
199#define is_done() (*(volatile int *)(&done))
200static volatile int done;
201
152f6476
JA
202static inline unsigned long hash_long(unsigned long val)
203{
16ef714e
JA
204#if __WORDSIZE == 32
205 val *= 0x9e370001UL;
206#elif __WORDSIZE == 64
207 val *= 0x9e37fffffffc0001UL;
208#else
209#error unknown word size
210#endif
211
212 return val >> (__WORDSIZE - PPI_HASH_SHIFT);
152f6476
JA
213}
214
215static inline void add_process_to_hash(struct per_process_info *ppi)
216{
217 const int hash_idx = hash_long(ppi->pid);
218
219 ppi->hash_next = ppi_hash[hash_idx];
220 ppi_hash[hash_idx] = ppi;
221}
222
223static inline void add_process_to_list(struct per_process_info *ppi)
224{
225 ppi->list_next = ppi_list;
226 ppi_list = ppi;
227}
228
229static struct per_process_info *find_process_by_pid(__u32 pid)
230{
231 const int hash_idx = hash_long(pid);
232 struct per_process_info *ppi;
233
234 ppi = ppi_hash[hash_idx];
235 while (ppi) {
236 if (ppi->pid == pid)
237 return ppi;
238
239 ppi = ppi->hash_next;
240 }
241
242 return NULL;
243}
244
7997c5b0
JA
245static inline int trace_rb_insert(struct trace *t)
246{
247 struct rb_node **p = &rb_sort_root.rb_node;
248 struct rb_node *parent = NULL;
249 struct trace *__t;
250
e7c9f3ff
NS
251 if (genesis_time == 0 || t->bit->time < genesis_time)
252 genesis_time = t->bit->time;
253
7997c5b0
JA
254 while (*p) {
255 parent = *p;
256 __t = rb_entry(parent, struct trace, rb_node);
257
e7c9f3ff
NS
258 if (t->bit->time < __t->bit->time)
259 p = &(*p)->rb_left;
260 else if (t->bit->time > __t->bit->time)
261 p = &(*p)->rb_right;
262 else if (t->bit->device < __t->bit->device)
263 p = &(*p)->rb_left;
264 else if (t->bit->device > __t->bit->device)
265 p = &(*p)->rb_right;
266 else if (t->bit->sequence < __t->bit->sequence)
7997c5b0
JA
267 p = &(*p)->rb_left;
268 else if (t->bit->sequence > __t->bit->sequence)
269 p = &(*p)->rb_right;
e7c9f3ff
NS
270 else if (t->bit->device == __t->bit->device) {
271 fprintf(stderr,
272 "sequence alias (%d) on device %d,%d!\n",
273 t->bit->sequence,
274 MAJOR(t->bit->device), MINOR(t->bit->device));
7997c5b0
JA
275 return 1;
276 }
277 }
278
279 rb_link_node(&t->rb_node, parent, p);
280 rb_insert_color(&t->rb_node, &rb_sort_root);
281 return 0;
282}
283
284static inline int track_rb_insert(struct io_track *iot)
285{
286 struct rb_node **p = &rb_track_root.rb_node;
287 struct rb_node *parent = NULL;
288 struct io_track *__iot;
289
290 while (*p) {
291 parent = *p;
e7c9f3ff 292
7997c5b0
JA
293 __iot = rb_entry(parent, struct io_track, rb_node);
294
e7c9f3ff
NS
295 if (iot->device < __iot->device)
296 p = &(*p)->rb_left;
297 else if (iot->device > __iot->device)
298 p = &(*p)->rb_right;
299 else if (iot->sector < __iot->sector)
7997c5b0
JA
300 p = &(*p)->rb_left;
301 else if (iot->sector > __iot->sector)
302 p = &(*p)->rb_right;
303 else {
e7c9f3ff
NS
304 fprintf(stderr,
305 "sector alias (%llu) on device %d,%d!\n",
306 iot->sector,
307 MAJOR(iot->device), MINOR(iot->device));
7997c5b0
JA
308 return 1;
309 }
310 }
311
312 rb_link_node(&iot->rb_node, parent, p);
313 rb_insert_color(&iot->rb_node, &rb_track_root);
314 return 0;
315}
316
e7c9f3ff 317static struct io_track *__find_track(dev_t device, __u64 sector)
7997c5b0
JA
318{
319 struct rb_node **p = &rb_track_root.rb_node;
320 struct rb_node *parent = NULL;
321 struct io_track *__iot;
322
323 while (*p) {
324 parent = *p;
325
326 __iot = rb_entry(parent, struct io_track, rb_node);
327
e7c9f3ff
NS
328 if (device < __iot->device)
329 p = &(*p)->rb_left;
330 else if (device > __iot->device)
331 p = &(*p)->rb_right;
332 else if (sector < __iot->sector)
7997c5b0
JA
333 p = &(*p)->rb_left;
334 else if (sector > __iot->sector)
335 p = &(*p)->rb_right;
336 else
337 return __iot;
338 }
339
340 return NULL;
341}
342
e7c9f3ff 343static struct io_track *find_track(__u32 pid, dev_t device, __u64 sector)
7997c5b0 344{
916b5501 345 struct io_track *iot;
7997c5b0 346
e7c9f3ff 347 iot = __find_track(device, sector);
7997c5b0
JA
348 if (!iot) {
349 iot = malloc(sizeof(*iot));
50adc0ba 350 iot->pid = pid;
e7c9f3ff 351 iot->device = device;
7997c5b0
JA
352 iot->sector = sector;
353 track_rb_insert(iot);
354 }
355
356 return iot;
357}
358
2e3e8ded
JA
359static void log_track_merge(struct blk_io_trace *t)
360{
361 struct io_track *iot;
362
363 if (!track_ios)
364 return;
365 if ((t->action & BLK_TC_ACT(BLK_TC_FS)) == 0)
366 return;
367
592166ca
JA
368 /*
369 * this can happen if we lose events, so don't print an error
370 */
e7c9f3ff 371 iot = __find_track(t->device, t->sector - (t->bytes >> 10));
cb2a1a62
JA
372 if (!iot) {
373 fprintf(stderr, "failed to find mergeable event\n");
374 return;
2e3e8ded 375 }
cb2a1a62
JA
376
377 rb_erase(&iot->rb_node, &rb_track_root);
378 iot->sector -= t->bytes >> 10;
379 track_rb_insert(iot);
2e3e8ded
JA
380}
381
95c15013 382static void log_track_getrq(struct blk_io_trace *t)
2e3e8ded
JA
383{
384 struct io_track *iot;
385
386 if (!track_ios)
387 return;
388
e7c9f3ff 389 iot = find_track(t->pid, t->device, t->sector);
95c15013
JA
390 iot->allocation_time = t->time;
391}
392
393
394/*
395 * return time between rq allocation and queue
396 */
397static unsigned long long log_track_queue(struct blk_io_trace *t)
398{
50adc0ba 399 unsigned long long elapsed;
95c15013
JA
400 struct io_track *iot;
401
402 if (!track_ios)
403 return -1;
404
e7c9f3ff 405 iot = find_track(t->pid, t->device, t->sector);
2e3e8ded 406 iot->queue_time = t->time;
50adc0ba
JA
407 elapsed = iot->queue_time - iot->allocation_time;
408
409 if (per_process_stats) {
410 struct per_process_info *ppi = find_process_by_pid(iot->pid);
b9d40d6f 411 int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
50adc0ba 412
b9d40d6f
JA
413 if (ppi && elapsed > ppi->longest_allocation_wait[w])
414 ppi->longest_allocation_wait[w] = elapsed;
50adc0ba
JA
415 }
416
417 return elapsed;
2e3e8ded
JA
418}
419
420/*
421 * return time between queue and issue
422 */
423static unsigned long long log_track_issue(struct blk_io_trace *t)
424{
50adc0ba 425 unsigned long long elapsed;
2e3e8ded
JA
426 struct io_track *iot;
427
428 if (!track_ios)
429 return -1;
430 if ((t->action & BLK_TC_ACT(BLK_TC_FS)) == 0)
431 return -1;
432
592166ca
JA
433 /*
434 * this can happen if we lose events, so don't print an error
435 */
e7c9f3ff 436 iot = __find_track(t->device, t->sector);
cb2a1a62
JA
437 if (!iot) {
438 fprintf(stderr, "failed to find issue event\n");
2e3e8ded 439 return -1;
cb2a1a62 440 }
2e3e8ded
JA
441
442 iot->dispatch_time = t->time;
50adc0ba
JA
443 elapsed = iot->dispatch_time - iot->queue_time;
444
445 if (per_process_stats) {
446 struct per_process_info *ppi = find_process_by_pid(iot->pid);
b9d40d6f 447 int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
50adc0ba 448
b9d40d6f
JA
449 if (ppi && elapsed > ppi->longest_dispatch_wait[w])
450 ppi->longest_dispatch_wait[w] = elapsed;
50adc0ba
JA
451 }
452
453 return elapsed;
2e3e8ded
JA
454}
455
456/*
457 * return time between dispatch and complete
458 */
459static unsigned long long log_track_complete(struct blk_io_trace *t)
460{
461 unsigned long long elapsed;
462 struct io_track *iot;
463
464 if (!track_ios)
465 return -1;
466 if ((t->action & BLK_TC_ACT(BLK_TC_FS)) == 0)
467 return -1;
468
e7c9f3ff 469 iot = __find_track(t->device, t->sector);
cb2a1a62
JA
470 if (!iot) {
471 fprintf(stderr, "failed to find complete event\n");
2e3e8ded 472 return -1;
cb2a1a62 473 }
2e3e8ded
JA
474
475 iot->completion_time = t->time;
476 elapsed = iot->completion_time - iot->dispatch_time;
477
50adc0ba
JA
478 if (per_process_stats) {
479 struct per_process_info *ppi = find_process_by_pid(iot->pid);
b9d40d6f 480 int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
50adc0ba 481
b9d40d6f
JA
482 if (ppi && elapsed > ppi->longest_completion_wait[w])
483 ppi->longest_completion_wait[w] = elapsed;
50adc0ba
JA
484 }
485
2e3e8ded
JA
486 /*
487 * kill the trace, we don't need it after completion
488 */
489 rb_erase(&iot->rb_node, &rb_track_root);
490 free(iot);
491
492 return elapsed;
493}
494
495
152f6476
JA
496static struct io_stats *find_process_io_stats(__u32 pid, char *name)
497{
498 struct per_process_info *ppi = find_process_by_pid(pid);
499
500 if (!ppi) {
501 ppi = malloc(sizeof(*ppi));
502 memset(ppi, 0, sizeof(*ppi));
503 strncpy(ppi->name, name, sizeof(ppi->name));
504 ppi->pid = pid;
505 add_process_to_hash(ppi);
506 add_process_to_list(ppi);
507 }
508
509 return &ppi->io_stats;
510}
511
e7c9f3ff
NS
512
513static void resize_cpu_info(struct per_dev_info *pdi, int cpu)
a718bd37 514{
e7c9f3ff
NS
515 struct per_cpu_info *cpus = pdi->cpus;
516 int ncpus = pdi->ncpus;
517 int new_count = cpu + 1;
518 int new_space, size;
a718bd37
NS
519 char *new_start;
520
e7c9f3ff
NS
521 size = new_count * sizeof(struct per_cpu_info);
522 cpus = realloc(cpus, size);
523 if (!cpus) {
524 char name[20];
525 fprintf(stderr, "Out of memory, CPU info for device %s (%d)\n",
526 get_dev_name(pdi, name, sizeof(name)), size);
a718bd37
NS
527 exit(1);
528 }
529
e7c9f3ff
NS
530 new_start = (char *)cpus + (ncpus * sizeof(struct per_cpu_info));
531 new_space = (new_count - ncpus) * sizeof(struct per_cpu_info);
a718bd37 532 memset(new_start, 0, new_space);
e7c9f3ff
NS
533
534 pdi->ncpus = new_count;
535 pdi->cpus = cpus;
536}
cb2a1a62 537
e7c9f3ff
NS
538static struct per_cpu_info *get_cpu_info(struct per_dev_info *pdi, int cpu)
539{
cb2a1a62
JA
540 struct per_cpu_info *pci;
541
e7c9f3ff
NS
542 if (cpu >= pdi->ncpus)
543 resize_cpu_info(pdi, cpu);
cb2a1a62
JA
544
545 pci = &pdi->cpus[cpu];
546 pci->cpu = cpu;
547 return pci;
a718bd37
NS
548}
549
e7c9f3ff
NS
550
551static int resize_devices(char *name)
a718bd37 552{
e7c9f3ff 553 int size = (ndevices + 1) * sizeof(struct per_dev_info);
c499bf38 554
e7c9f3ff
NS
555 devices = realloc(devices, size);
556 if (!devices) {
557 fprintf(stderr, "Out of memory, device %s (%d)\n", name, size);
558 return 1;
559 }
560 memset(&devices[ndevices], 0, sizeof(struct per_dev_info));
561 devices[ndevices].name = name;
562 ndevices++;
563 return 0;
564}
a718bd37 565
cb2a1a62 566static struct per_dev_info *get_dev_info(dev_t id)
e7c9f3ff 567{
cb2a1a62 568 struct per_dev_info *pdi;
e7c9f3ff 569 int i;
c499bf38 570
e7c9f3ff
NS
571 for (i = 0; i < ndevices; i++)
572 if (devices[i].id == id)
573 return &devices[i];
cb2a1a62 574
e7c9f3ff
NS
575 if (resize_devices(NULL) != 0)
576 return NULL;
cb2a1a62
JA
577
578 pdi = &devices[ndevices - 1];
579 pdi->id = id;
580 return pdi;
a718bd37
NS
581}
582
e7c9f3ff
NS
583static char *get_dev_name(struct per_dev_info *pdi, char *buffer, int size)
584{
585 if (pdi->name)
586 snprintf(buffer, size, "%s", pdi->name);
587 else
588 snprintf(buffer, size, "%d,%d", MAJOR(pdi->id), MINOR(pdi->id));
589 return buffer;
590}
591
e7c9f3ff 592static void check_time(struct per_dev_info *pdi, struct blk_io_trace *bit)
cfab07eb
AB
593{
594 unsigned long long this = bit->time;
e7c9f3ff 595 unsigned long long last = pdi->last_reported_time;
cfab07eb 596
e7c9f3ff
NS
597 pdi->backwards = (this < last) ? 'B' : ' ';
598 pdi->last_reported_time = this;
cfab07eb
AB
599}
600
152f6476
JA
601static inline void __account_m(struct io_stats *ios, struct blk_io_trace *t,
602 int rw)
d0ca268b
JA
603{
604 if (rw) {
152f6476
JA
605 ios->mwrites++;
606 ios->qwrite_kb += t->bytes >> 10;
d0ca268b 607 } else {
152f6476
JA
608 ios->mreads++;
609 ios->qread_kb += t->bytes >> 10;
610 }
611}
612
613static inline void account_m(struct blk_io_trace *t, struct per_cpu_info *pci,
614 int rw)
615{
616 __account_m(&pci->io_stats, t, rw);
617
618 if (per_process_stats) {
619 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
620
621 __account_m(ios, t, rw);
d0ca268b
JA
622 }
623}
624
152f6476
JA
625static inline void __account_q(struct io_stats *ios, struct blk_io_trace *t,
626 int rw)
d0ca268b
JA
627{
628 if (rw) {
152f6476
JA
629 ios->qwrites++;
630 ios->qwrite_kb += t->bytes >> 10;
d0ca268b 631 } else {
152f6476
JA
632 ios->qreads++;
633 ios->qread_kb += t->bytes >> 10;
634 }
635}
636
637static inline void account_q(struct blk_io_trace *t, struct per_cpu_info *pci,
638 int rw)
639{
640 __account_q(&pci->io_stats, t, rw);
641
642 if (per_process_stats) {
643 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
644
645 __account_q(ios, t, rw);
d0ca268b
JA
646 }
647}
648
152f6476 649static inline void __account_c(struct io_stats *ios, int rw, unsigned int bytes)
d0ca268b
JA
650{
651 if (rw) {
152f6476
JA
652 ios->cwrites++;
653 ios->cwrite_kb += bytes >> 10;
d0ca268b 654 } else {
152f6476
JA
655 ios->creads++;
656 ios->cread_kb += bytes >> 10;
657 }
658}
659
660static inline void account_c(struct blk_io_trace *t, struct per_cpu_info *pci,
661 int rw, int bytes)
662{
663 __account_c(&pci->io_stats, rw, bytes);
664
665 if (per_process_stats) {
666 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
667
668 __account_c(ios, rw, bytes);
d0ca268b
JA
669 }
670}
671
152f6476 672static inline void __account_i(struct io_stats *ios, int rw, unsigned int bytes)
afd2d7ad 673{
674 if (rw) {
152f6476
JA
675 ios->iwrites++;
676 ios->iwrite_kb += bytes >> 10;
afd2d7ad 677 } else {
152f6476
JA
678 ios->ireads++;
679 ios->iread_kb += bytes >> 10;
afd2d7ad 680 }
681}
682
152f6476
JA
683static inline void account_i(struct blk_io_trace *t, struct per_cpu_info *pci,
684 int rw)
d0ca268b 685{
152f6476
JA
686 __account_i(&pci->io_stats, rw, t->bytes);
687
688 if (per_process_stats) {
689 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
d5396421 690
152f6476
JA
691 __account_i(ios, rw, t->bytes);
692 }
693}
694
06639b27
JA
695static inline void __account_unplug(struct io_stats *ios, int timer)
696{
697 if (timer)
698 ios->timer_unplugs++;
699 else
700 ios->io_unplugs++;
701}
702
703static inline void account_unplug(struct blk_io_trace *t,
704 struct per_cpu_info *pci, int timer)
705{
706 __account_unplug(&pci->io_stats, timer);
707
708 if (per_process_stats) {
709 struct io_stats *ios = find_process_io_stats(t->pid, t->comm);
710
711 __account_unplug(ios, timer);
712 }
713}
714
152f6476
JA
715static void output(struct per_cpu_info *pci, char *s)
716{
717 fprintf(ofp, "%s", s);
d0ca268b
JA
718}
719
3aabcd89
JA
720static char hstring[256];
721static char tstring[256];
d0ca268b 722
d5396421 723static inline char *setup_header(struct per_cpu_info *pci,
3639a11e 724 struct blk_io_trace *t, char *act)
d0ca268b
JA
725{
726 int w = t->action & BLK_TC_ACT(BLK_TC_WRITE);
727 int b = t->action & BLK_TC_ACT(BLK_TC_BARRIER);
728 int s = t->action & BLK_TC_ACT(BLK_TC_SYNC);
729 char rwbs[4];
730 int i = 0;
731
732 if (w)
733 rwbs[i++] = 'W';
734 else
735 rwbs[i++] = 'R';
736 if (b)
737 rwbs[i++] = 'B';
738 if (s)
739 rwbs[i++] = 'S';
740
741 rwbs[i] = '\0';
742
3639a11e 743 sprintf(hstring, "%3d,%-3d %2d %8ld %5Lu.%09Lu %5u %2s %3s",
e7c9f3ff 744 MAJOR(t->device), MINOR(t->device), pci->cpu,
3639a11e 745 (unsigned long)t->sequence, SECONDS(t->time),
cfab07eb 746 NANO_SECONDS(t->time), t->pid, act, rwbs);
d0ca268b
JA
747
748 return hstring;
749}
750
d5396421 751static void log_complete(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 752 char *act)
d0ca268b 753{
2e3e8ded
JA
754 unsigned long long elapsed = log_track_complete(t);
755
756 if (elapsed != -1ULL) {
b9d40d6f 757 unsigned long usec = elapsed / 1000;
2e3e8ded 758
b9d40d6f 759 sprintf(tstring,"%s %Lu + %u (%8lu) [%d]\n",
2e3e8ded
JA
760 setup_header(pci, t, act),
761 (unsigned long long)t->sector, t->bytes >> 9,
762 usec, t->error);
763 } else {
764 sprintf(tstring,"%s %Lu + %u [%d]\n", setup_header(pci, t, act),
765 (unsigned long long)t->sector, t->bytes >> 9, t->error);
766 }
767
d5396421 768 output(pci, tstring);
d0ca268b
JA
769}
770
d5396421 771static void log_queue(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 772 char *act)
d0ca268b 773{
95c15013 774 unsigned long long elapsed = log_track_queue(t);
2e3e8ded 775
95c15013 776 if (elapsed != -1ULL) {
b9d40d6f 777 unsigned long usec = elapsed / 1000;
95c15013 778
b9d40d6f 779 sprintf(tstring,"%s %Lu + %u (%8lu) [%s]\n",
95c15013
JA
780 setup_header(pci, t, act),
781 (unsigned long long)t->sector, t->bytes >> 9,
782 usec, t->comm);
783 } else {
784 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
785 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
786 }
d5396421 787 output(pci, tstring);
d0ca268b
JA
788}
789
d5396421 790static void log_issue(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 791 char *act)
d0ca268b 792{
2e3e8ded
JA
793 unsigned long long elapsed = log_track_issue(t);
794
795 if (elapsed != -1ULL) {
796 double usec = (double) elapsed / 1000;
797
555d3a31 798 sprintf(tstring,"%s %Lu + %u (%8.2f) [%s]\n",
2e3e8ded
JA
799 setup_header(pci, t, act),
800 (unsigned long long)t->sector, t->bytes >> 9,
801 usec, t->comm);
802 } else {
803 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
804 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
805 }
806
d5396421 807 output(pci, tstring);
d0ca268b
JA
808}
809
d5396421 810static void log_merge(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 811 char *act)
d0ca268b 812{
2e3e8ded
JA
813 log_track_merge(t);
814
984c63b7 815 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
2955af9d 816 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
d5396421 817 output(pci, tstring);
d0ca268b
JA
818}
819
dfe34da1 820static void log_action(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 821 char *act)
dfe34da1
JA
822{
823 sprintf(tstring,"%s [%s]\n", setup_header(pci, t, act), t->comm);
824 output(pci, tstring);
825}
826
d5396421 827static void log_generic(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 828 char *act)
d0ca268b 829{
2955af9d
NS
830 sprintf(tstring,"%s %Lu + %u [%s]\n", setup_header(pci, t, act),
831 (unsigned long long)t->sector, t->bytes >> 9, t->comm);
d5396421 832 output(pci, tstring);
d0ca268b
JA
833}
834
67e14fdc 835static int log_unplug(struct per_cpu_info *pci, struct blk_io_trace *t,
3639a11e 836 char *act)
67e14fdc
JA
837{
838 __u64 *depth;
839 int len;
840
06639b27 841 len = sprintf(tstring,"%s [%s] ", setup_header(pci, t, act), t->comm);
3639a11e 842 depth = (__u64 *) ((char *) t + sizeof(*t));
67e14fdc
JA
843 sprintf(tstring + len, "%u\n", (unsigned int) be64_to_cpu(*depth));
844 output(pci, tstring);
845
846 return 0;
847}
848
3639a11e 849static int log_pc(struct per_cpu_info *pci, struct blk_io_trace *t, char *act)
d0ca268b 850{
87b72777
JA
851 unsigned char *buf;
852 int i;
d0ca268b 853
d5396421
JA
854 sprintf(tstring,"%s ", setup_header(pci, t, act));
855 output(pci, tstring);
d0ca268b 856
87b72777 857 buf = (unsigned char *) t + sizeof(*t);
d0ca268b
JA
858 for (i = 0; i < t->pdu_len; i++) {
859 sprintf(tstring,"%02x ", buf[i]);
d5396421 860 output(pci, tstring);
d0ca268b
JA
861 }
862
3639a11e 863 if (act[0] == 'C') {
2955af9d
NS
864 sprintf(tstring,"[%d]\n", t->error);
865 output(pci, tstring);
866 } else {
867 sprintf(tstring,"[%s]\n", t->comm);
d5396421 868 output(pci, tstring);
d0ca268b 869 }
87b72777 870 return 0;
d0ca268b
JA
871}
872
d5396421 873static int dump_trace_pc(struct blk_io_trace *t, struct per_cpu_info *pci)
d0ca268b 874{
87b72777
JA
875 int ret = 0;
876
d0ca268b
JA
877 switch (t->action & 0xffff) {
878 case __BLK_TA_QUEUE:
3639a11e 879 log_generic(pci, t, "Q");
d0ca268b
JA
880 break;
881 case __BLK_TA_GETRQ:
3639a11e 882 log_generic(pci, t, "G");
d0ca268b
JA
883 break;
884 case __BLK_TA_SLEEPRQ:
3639a11e 885 log_generic(pci, t, "S");
d0ca268b
JA
886 break;
887 case __BLK_TA_REQUEUE:
3639a11e 888 log_generic(pci, t, "R");
d0ca268b
JA
889 break;
890 case __BLK_TA_ISSUE:
3639a11e 891 ret = log_pc(pci, t, "D");
d0ca268b
JA
892 break;
893 case __BLK_TA_COMPLETE:
3639a11e 894 log_pc(pci, t, "C");
d0ca268b
JA
895 break;
896 default:
897 fprintf(stderr, "Bad pc action %x\n", t->action);
87b72777
JA
898 ret = 1;
899 break;
d0ca268b
JA
900 }
901
87b72777 902 return ret;
d0ca268b
JA
903}
904
d5396421 905static void dump_trace_fs(struct blk_io_trace *t, struct per_cpu_info *pci)
d0ca268b
JA
906{
907 int w = t->action & BLK_TC_ACT(BLK_TC_WRITE);
7997c5b0 908 int act = t->action & 0xffff;
d0ca268b 909
7997c5b0 910 switch (act) {
d0ca268b 911 case __BLK_TA_QUEUE:
152f6476 912 account_q(t, pci, w);
3639a11e 913 log_queue(pci, t, "Q");
d0ca268b
JA
914 break;
915 case __BLK_TA_BACKMERGE:
152f6476 916 account_m(t, pci, w);
3639a11e 917 log_merge(pci, t, "M");
d0ca268b
JA
918 break;
919 case __BLK_TA_FRONTMERGE:
152f6476 920 account_m(t, pci, w);
3639a11e 921 log_merge(pci, t, "F");
d0ca268b
JA
922 break;
923 case __BLK_TA_GETRQ:
95c15013 924 log_track_getrq(t);
3639a11e 925 log_generic(pci, t, "G");
d0ca268b
JA
926 break;
927 case __BLK_TA_SLEEPRQ:
3639a11e 928 log_generic(pci, t, "S");
d0ca268b
JA
929 break;
930 case __BLK_TA_REQUEUE:
152f6476 931 account_c(t, pci, w, -t->bytes);
3639a11e 932 log_queue(pci, t, "R");
d0ca268b
JA
933 break;
934 case __BLK_TA_ISSUE:
152f6476 935 account_i(t, pci, w);
3639a11e 936 log_issue(pci, t, "D");
d0ca268b
JA
937 break;
938 case __BLK_TA_COMPLETE:
152f6476 939 account_c(t, pci, w, t->bytes);
3639a11e 940 log_complete(pci, t, "C");
d0ca268b 941 break;
88b1a526 942 case __BLK_TA_PLUG:
3639a11e 943 log_action(pci, t, "P");
88b1a526 944 break;
3639a11e 945 case __BLK_TA_UNPLUG_IO:
06639b27 946 account_unplug(t, pci, 0);
3639a11e
JA
947 log_unplug(pci, t, "U");
948 break;
949 case __BLK_TA_UNPLUG_TIMER:
06639b27 950 account_unplug(t, pci, 1);
3639a11e 951 log_unplug(pci, t, "UT");
88b1a526 952 break;
d0ca268b
JA
953 default:
954 fprintf(stderr, "Bad fs action %x\n", t->action);
1f79c4a0 955 break;
d0ca268b 956 }
d0ca268b
JA
957}
958
e7c9f3ff
NS
959static int dump_trace(struct blk_io_trace *t, struct per_cpu_info *pci,
960 struct per_dev_info *pdi)
d0ca268b 961{
87b72777
JA
962 int ret = 0;
963
d0ca268b 964 if (t->action & BLK_TC_ACT(BLK_TC_PC))
d5396421 965 ret = dump_trace_pc(t, pci);
d0ca268b 966 else
d5396421 967 dump_trace_fs(t, pci);
87b72777 968
e7c9f3ff 969 pdi->events++;
87b72777 970 return ret;
d0ca268b
JA
971}
972
152f6476 973static void dump_io_stats(struct io_stats *ios, char *msg)
5c017e4b 974{
152f6476
JA
975 fprintf(ofp, "%s\n", msg);
976
977 fprintf(ofp, " Reads Queued: %'8lu, %'8LuKiB\t", ios->qreads, ios->qread_kb);
978 fprintf(ofp, " Writes Queued: %'8lu, %'8LuKiB\n", ios->qwrites,ios->qwrite_kb);
0a6b8fc4 979
152f6476
JA
980 fprintf(ofp, " Read Dispatches: %'8lu, %'8LuKiB\t", ios->ireads, ios->iread_kb);
981 fprintf(ofp, " Write Dispatches: %'8lu, %'8LuKiB\n", ios->iwrites,ios->iwrite_kb);
982 fprintf(ofp, " Reads Completed: %'8lu, %'8LuKiB\t", ios->creads, ios->cread_kb);
983 fprintf(ofp, " Writes Completed: %'8lu, %'8LuKiB\n", ios->cwrites,ios->cwrite_kb);
984 fprintf(ofp, " Read Merges: %'8lu%8c\t", ios->mreads, ' ');
152f6476 985 fprintf(ofp, " Write Merges: %'8lu\n", ios->mwrites);
06639b27
JA
986 fprintf(ofp, " IO unplugs: %'8lu%8c\t", ios->io_unplugs, ' ');
987 fprintf(ofp, " Timer unplugs: %'8lu\n", ios->timer_unplugs);
5c017e4b
JA
988}
989
50adc0ba
JA
990static void dump_wait_stats(struct per_process_info *ppi)
991{
b9d40d6f
JA
992 unsigned long rawait = ppi->longest_allocation_wait[0] / 1000;
993 unsigned long rdwait = ppi->longest_dispatch_wait[0] / 1000;
994 unsigned long rcwait = ppi->longest_completion_wait[0] / 1000;
995 unsigned long wawait = ppi->longest_allocation_wait[1] / 1000;
996 unsigned long wdwait = ppi->longest_dispatch_wait[1] / 1000;
997 unsigned long wcwait = ppi->longest_completion_wait[1] / 1000;
998
999 fprintf(ofp, " Allocation wait: %'8lu%8c\t", rawait, ' ');
1000 fprintf(ofp, " Allocation wait: %'8lu\n", wawait);
1001 fprintf(ofp, " Dispatch wait: %'8lu%8c\t", rdwait, ' ');
1002 fprintf(ofp, " Dispatch wait: %'8lu\n", wdwait);
1003 fprintf(ofp, " Completion wait: %'8lu%8c\t", rcwait, ' ');
1004 fprintf(ofp, " Completion wait: %'8lu\n", wcwait);
50adc0ba
JA
1005}
1006
152f6476
JA
1007static void show_process_stats(void)
1008{
1009 struct per_process_info *ppi;
1010
1011 ppi = ppi_list;
1012 while (ppi) {
1013 dump_io_stats(&ppi->io_stats, ppi->name);
50adc0ba 1014 dump_wait_stats(ppi);
152f6476
JA
1015 ppi = ppi->list_next;
1016 }
1017
1018 fprintf(ofp, "\n");
1019}
1020
e7c9f3ff 1021static void show_device_and_cpu_stats(void)
d0ca268b 1022{
e7c9f3ff
NS
1023 struct per_dev_info *pdi;
1024 struct per_cpu_info *pci;
1025 struct io_stats total, *ios;
1026 int i, j, pci_events;
1027 char line[3 + 8/*cpu*/ + 2 + 32/*dev*/ + 3];
1028 char name[32];
1029
1030 for (pdi = devices, i = 0; i < ndevices; i++, pdi++) {
1031
1032 memset(&total, 0, sizeof(total));
1033 pci_events = 0;
1034
1035 if (i > 0)
1036 fprintf(ofp, "\n");
1037
1038 for (pci = pdi->cpus, j = 0; j < pdi->ncpus; j++, pci++) {
1039 if (!pci->nelems)
1040 continue;
1041
1042 ios = &pci->io_stats;
1043 total.qreads += ios->qreads;
1044 total.qwrites += ios->qwrites;
1045 total.creads += ios->creads;
1046 total.cwrites += ios->cwrites;
1047 total.mreads += ios->mreads;
1048 total.mwrites += ios->mwrites;
1049 total.ireads += ios->ireads;
1050 total.iwrites += ios->iwrites;
1051 total.qread_kb += ios->qread_kb;
1052 total.qwrite_kb += ios->qwrite_kb;
1053 total.cread_kb += ios->cread_kb;
1054 total.cwrite_kb += ios->cwrite_kb;
1055 total.iread_kb += ios->iread_kb;
1056 total.iwrite_kb += ios->iwrite_kb;
06639b27
JA
1057 total.timer_unplugs += ios->timer_unplugs;
1058 total.io_unplugs += ios->io_unplugs;
e7c9f3ff
NS
1059
1060 snprintf(line, sizeof(line) - 1, "CPU%d (%s):",
1061 j, get_dev_name(pdi, name, sizeof(name)));
1062 dump_io_stats(ios, line);
1063 pci_events++;
1064 }
5c017e4b 1065
e7c9f3ff
NS
1066 if (pci_events > 1) {
1067 fprintf(ofp, "\n");
1068 snprintf(line, sizeof(line) - 1, "Total (%s):",
1069 get_dev_name(pdi, name, sizeof(name)));
1070 dump_io_stats(&total, line);
1071 }
d0ca268b 1072
e7c9f3ff
NS
1073 fprintf(ofp, "Events (%s): %'Lu\n",
1074 get_dev_name(pdi, line, sizeof(line)), pdi->events);
1075 }
d0ca268b
JA
1076}
1077
cb2a1a62 1078static struct blk_io_trace *find_trace(void *p, unsigned long offset)
2ff323b0 1079{
cb2a1a62 1080 unsigned long max_offset = offset;
2ff323b0
JA
1081 unsigned long off;
1082 struct blk_io_trace *bit;
1083 __u32 magic;
1084
1085 for (off = 0; off < max_offset; off++) {
1086 bit = p + off;
1087
1088 magic = be32_to_cpu(bit->magic);
1089 if ((magic & 0xffffff00) == BLK_IO_TRACE_MAGIC)
1090 return bit;
1091 }
1092
1093 return NULL;
1094}
1095
cb2a1a62 1096static int sort_entries(void)
8fc0abbc 1097{
e7c9f3ff 1098 struct per_dev_info *pdi;
412819ce 1099 struct per_cpu_info *pci;
8fc0abbc
JA
1100 struct blk_io_trace *bit;
1101 struct trace *t;
cb2a1a62 1102 int nr = 0;
8fc0abbc 1103
cb2a1a62 1104 while ((t = trace_list) != NULL) {
412819ce 1105
cb2a1a62
JA
1106 trace_list = t->next;
1107 bit = t->bit;
6fe4709e 1108
8fc0abbc
JA
1109 memset(&t->rb_node, 0, sizeof(t->rb_node));
1110
6fe4709e
JA
1111 trace_to_cpu(bit);
1112
cb2a1a62 1113 if (verify_trace(bit))
66fa7233 1114 break;
cb2a1a62
JA
1115 if (trace_rb_insert(t))
1116 return -1;
66fa7233 1117
cb2a1a62
JA
1118 pdi = get_dev_info(bit->device);
1119 pci = get_cpu_info(pdi, bit->cpu);
412819ce
JA
1120 pci->nelems++;
1121
cb2a1a62 1122 nr++;
6fe4709e 1123 }
8fc0abbc 1124
cb2a1a62 1125 return nr;
412819ce
JA
1126}
1127
d5396421 1128static void show_entries_rb(void)
8fc0abbc 1129{
e7c9f3ff 1130 struct per_dev_info *pdi;
8fc0abbc 1131 struct blk_io_trace *bit;
3aabcd89 1132 struct rb_node *n;
8fc0abbc
JA
1133 struct trace *t;
1134 int cpu;
1135
cb2a1a62 1136 while ((n = rb_first(&rb_sort_root)) != NULL) {
8fc0abbc
JA
1137
1138 t = rb_entry(n, struct trace, rb_node);
1139 bit = t->bit;
1140
cb2a1a62 1141 pdi = get_dev_info(bit->device);
e7c9f3ff
NS
1142 if (!pdi) {
1143 fprintf(stderr, "Unknown device ID? (%d,%d)\n",
1144 MAJOR(bit->device), MINOR(bit->device));
1145 break;
1146 }
d5396421 1147 cpu = bit->cpu;
e7c9f3ff
NS
1148 if (cpu > pdi->ncpus) {
1149 fprintf(stderr, "Unknown CPU ID? (%d, device %d,%d)\n",
1150 cpu, MAJOR(bit->device), MINOR(bit->device));
87b72777 1151 break;
8fc0abbc
JA
1152 }
1153
cb2a1a62
JA
1154 /*
1155 * back off displaying more info if we are out of sync
1156 * on SMP systems. to prevent stalling on lost events,
1157 * only allow an event to skip us once
1158 */
1159 if (bit->sequence != (pdi->last_sequence + 1)) {
1160 if (!t->skipped) {
1161 t->skipped = 1;
1162 break;
1163 }
1164 }
1165
1166 pdi->last_sequence = bit->sequence;
1167
cfab07eb 1168 bit->time -= genesis_time;
46e6968b
NS
1169 if (bit->time < stopwatch_start)
1170 continue;
1171 if (bit->time >= stopwatch_end)
1172 break;
8fc0abbc 1173
e7c9f3ff 1174 check_time(pdi, bit);
8fc0abbc 1175
e7c9f3ff 1176 if (dump_trace(bit, &pdi->cpus[cpu], pdi))
87b72777
JA
1177 break;
1178
cb2a1a62
JA
1179 rb_erase(&t->rb_node, &rb_sort_root);
1180 free(bit);
1181 free(t);
1182 }
8fc0abbc
JA
1183}
1184
1f79c4a0
JA
1185static int read_data(int fd, void *buffer, int bytes, int block)
1186{
1187 int ret, bytes_left, fl;
1188 void *p;
1189
1190 fl = fcntl(fd, F_GETFL);
1191
1192 if (!block)
1193 fcntl(fd, F_SETFL, fl | O_NONBLOCK);
1194 else
1195 fcntl(fd, F_SETFL, fl & ~O_NONBLOCK);
1196
1197 bytes_left = bytes;
1198 p = buffer;
1199 while (bytes_left > 0) {
1200 ret = read(fd, p, bytes_left);
1201 if (!ret)
1202 return 1;
1203 else if (ret < 0) {
1204 if (errno != EAGAIN)
1205 perror("read");
1206 return -1;
1207 } else {
1208 p += ret;
1209 bytes_left -= ret;
1210 }
1211 }
1212
1213 return 0;
1214}
1215
cb2a1a62
JA
1216/*
1217 * Find the traces in 'tb' and add them to the list for sorting and
1218 * displaying
1219 */
1220static int find_entries(void *tb, unsigned long size)
1221{
1222 struct blk_io_trace *bit;
1223 struct trace *t;
1224 void *start = tb;
1225
1226 while (tb - start <= size - sizeof(*bit)) {
1227 bit = find_trace(tb, size - (tb - start));
1228 if (!bit)
1229 break;
1230
1231 t = malloc(sizeof(*t));
1232 memset(t, 0, sizeof(*t));
1233 t->bit = bit;
1234
1235 t->next = trace_list;
1236 trace_list = t;
1237
1238 tb += sizeof(*bit) + bit->pdu_len;
1239 }
1240
1241 return 0;
1242}
1243
d5396421 1244static int do_file(void)
d0ca268b 1245{
e7c9f3ff 1246 struct per_dev_info *pdi;
cb2a1a62 1247 int i, j, nfiles = 0, nelems;
d0ca268b 1248
e7c9f3ff
NS
1249 for (pdi = devices, i = 0; i < ndevices; i++, pdi++) {
1250 for (j = 0;; j++, nfiles++) {
1251 struct per_cpu_info *pci;
1252 struct stat st;
1253 void *tb;
87b72777 1254
e7c9f3ff
NS
1255 pci = get_cpu_info(pdi, j);
1256 pci->cpu = j;
d0ca268b 1257
e7c9f3ff
NS
1258 snprintf(pci->fname, sizeof(pci->fname)-1,
1259 "%s_out.%d", pdi->name, j);
1260 if (stat(pci->fname, &st) < 0)
1261 break;
1262 if (!st.st_size)
1263 continue;
1264
1265 printf("Processing %s\n", pci->fname);
1266
1267 tb = malloc(st.st_size);
1268 if (!tb) {
1269 fprintf(stderr, "Out of memory, skip file %s\n",
1270 pci->fname);
1271 continue;
1272 }
1273
1274 pci->fd = open(pci->fname, O_RDONLY);
1275 if (pci->fd < 0) {
1276 perror(pci->fname);
1277 free(tb);
1278 continue;
1279 }
1280
1281 if (read_data(pci->fd, tb, st.st_size, 1)) {
1282 close(pci->fd);
1283 free(tb);
1284 continue;
1285 }
1286
cb2a1a62
JA
1287 if (find_entries(tb, st.st_size)) {
1288 close(pci->fd);
1289 free(tb);
1290 }
1291
1292 nelems = sort_entries();
1293 if (nelems == -1) {
e7c9f3ff
NS
1294 close(pci->fd);
1295 free(tb);
1296 continue;
1297 }
1298
1299 printf("Completed %s (CPU%d %d, entries)\n",
cb2a1a62 1300 pci->fname, j, nelems);
e7c9f3ff 1301 close(pci->fd);
d0ca268b 1302 }
d5396421
JA
1303 }
1304
1305 if (!nfiles) {
1306 fprintf(stderr, "No files found\n");
1307 return 1;
1308 }
1309
1310 show_entries_rb();
d5396421
JA
1311 return 0;
1312}
1313
cb2a1a62 1314static int read_sort_events(int fd)
d5396421 1315{
cb2a1a62 1316 int events = 0;
d5396421 1317
412819ce 1318 do {
cb2a1a62
JA
1319 struct blk_io_trace *bit;
1320 struct trace *t;
412819ce 1321 int pdu_len;
51128a28 1322 __u32 magic;
d5396421 1323
cb2a1a62 1324 bit = malloc(sizeof(*bit));
d5396421 1325
cb2a1a62 1326 if (read_data(fd, bit, sizeof(*bit), !events))
c80c18a7 1327 break;
d5396421 1328
cb2a1a62 1329 magic = be32_to_cpu(bit->magic);
51128a28
JA
1330 if ((magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
1331 fprintf(stderr, "Bad magic %x\n", magic);
1332 break;
1333 }
1334
cb2a1a62 1335 pdu_len = be16_to_cpu(bit->pdu_len);
2ff323b0 1336 if (pdu_len) {
cb2a1a62 1337 void *ptr = realloc(bit, sizeof(*bit) + pdu_len);
d5396421 1338
cb2a1a62 1339 if (read_data(fd, ptr + sizeof(*bit), pdu_len, 1))
2ff323b0 1340 break;
d5396421 1341
cb2a1a62 1342 bit = ptr;
2ff323b0 1343 }
d5396421 1344
cb2a1a62
JA
1345 t = malloc(sizeof(*t));
1346 memset(t, 0, sizeof(*t));
1347 t->bit = bit;
1348 t->next = trace_list;
1349 trace_list = t;
1350
412819ce 1351 events++;
79f19470 1352 } while (!is_done() && events < rb_batch);
d5396421 1353
412819ce
JA
1354 return events;
1355}
d5396421 1356
412819ce
JA
1357static int do_stdin(void)
1358{
1359 int fd;
d5396421 1360
1f79c4a0 1361 fd = dup(STDIN_FILENO);
412819ce
JA
1362 do {
1363 int events;
d5396421 1364
cb2a1a62 1365 events = read_sort_events(fd);
412819ce
JA
1366 if (!events)
1367 break;
1368
cb2a1a62 1369 if (sort_entries() == -1)
2ff323b0
JA
1370 break;
1371
412819ce 1372 show_entries_rb();
d5396421
JA
1373 } while (1);
1374
1375 close(fd);
d5396421
JA
1376 return 0;
1377}
d0ca268b 1378
1f79c4a0 1379static void flush_output(void)
412819ce 1380{
152f6476 1381 fflush(ofp);
412819ce
JA
1382}
1383
1f79c4a0 1384static void handle_sigint(int sig)
412819ce
JA
1385{
1386 done = 1;
1387 flush_output();
1388}
1389
46e6968b
NS
1390/*
1391 * Extract start and duration times from a string, allowing
1392 * us to specify a time interval of interest within a trace.
1393 * Format: "duration" (start is zero) or "start:duration".
1394 */
1395static int find_stopwatch_interval(char *string)
1396{
1397 double value;
1398 char *sp;
1399
1400 value = strtod(string, &sp);
1401 if (sp == string) {
1402 fprintf(stderr,"Invalid stopwatch timer: %s\n", string);
1403 return 1;
1404 }
1405 if (*sp == ':') {
1406 stopwatch_start = DOUBLE_TO_NANO_ULL(value);
1407 string = sp + 1;
1408 value = strtod(string, &sp);
1409 if (sp == string || *sp != '\0') {
1410 fprintf(stderr,"Invalid stopwatch duration time: %s\n",
1411 string);
1412 return 1;
1413 }
1414 } else if (*sp != '\0') {
1415 fprintf(stderr,"Invalid stopwatch start timer: %s\n", string);
1416 return 1;
1417 }
1418 stopwatch_end = stopwatch_start + DOUBLE_TO_NANO_ULL(value);
1419 return 0;
1420}
1421
1f79c4a0
JA
1422static void usage(char *prog)
1423{
46e6968b
NS
1424 fprintf(stderr, "Usage: %s "
1425 "[-i <name>] [-o <output>] [-s] [-w N[:n]] <name>...\n",
1426 prog);
1f79c4a0
JA
1427}
1428
d5396421
JA
1429int main(int argc, char *argv[])
1430{
152f6476 1431 char *ofp_buffer;
a66877e6 1432 int c, ret, mode;
1e1c60f1 1433 int per_device_and_cpu_stats = 1;
d5396421
JA
1434
1435 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) != -1) {
1436 switch (c) {
1437 case 'i':
e7c9f3ff
NS
1438 if (!strcmp(optarg, "-") && !pipeline)
1439 pipeline = 1;
1440 else if (resize_devices(optarg) != 0)
1441 return 1;
d5396421
JA
1442 break;
1443 case 'o':
66efebf8 1444 output_name = optarg;
d5396421 1445 break;
79f19470
JA
1446 case 'b':
1447 rb_batch = atoi(optarg);
1448 if (rb_batch <= 0)
1449 rb_batch = RB_BATCH_DEFAULT;
1450 break;
152f6476
JA
1451 case 's':
1452 per_process_stats = 1;
1453 break;
7997c5b0
JA
1454 case 't':
1455 track_ios = 1;
1456 break;
1e1c60f1
NS
1457 case 'q':
1458 per_device_and_cpu_stats = 0;
1459 break;
46e6968b
NS
1460 case 'w':
1461 if (find_stopwatch_interval(optarg) != 0)
1462 return 1;
1463 break;
d5396421 1464 default:
1f79c4a0 1465 usage(argv[0]);
d5396421
JA
1466 return 1;
1467 }
d0ca268b
JA
1468 }
1469
e7c9f3ff
NS
1470 while (optind < argc) {
1471 if (!strcmp(argv[optind], "-") && !pipeline)
1472 pipeline = 1;
1473 else if (resize_devices(argv[optind]) != 0)
1474 return 1;
1475 optind++;
1476 }
1477
1478 if (!pipeline && !ndevices) {
1f79c4a0 1479 usage(argv[0]);
d5396421
JA
1480 return 1;
1481 }
1482
7997c5b0
JA
1483 memset(&rb_sort_root, 0, sizeof(rb_sort_root));
1484 memset(&rb_track_root, 0, sizeof(rb_track_root));
412819ce
JA
1485
1486 signal(SIGINT, handle_sigint);
1487 signal(SIGHUP, handle_sigint);
1488 signal(SIGTERM, handle_sigint);
d5396421 1489
d69db225
JA
1490 setlocale(LC_NUMERIC, "en_US");
1491
a66877e6 1492 if (!output_name) {
152f6476 1493 ofp = fdopen(STDOUT_FILENO, "w");
a66877e6
JA
1494 mode = _IOLBF;
1495 } else {
152f6476
JA
1496 char ofname[128];
1497
1498 snprintf(ofname, sizeof(ofname) - 1, "%s.log", output_name);
1499 ofp = fopen(ofname, "w");
a66877e6 1500 mode = _IOFBF;
152f6476
JA
1501 }
1502
1503 if (!ofp) {
1504 perror("fopen");
1505 return 1;
1506 }
1507
1508 ofp_buffer = malloc(4096);
a66877e6 1509 if (setvbuf(ofp, ofp_buffer, mode, 4096)) {
152f6476
JA
1510 perror("setvbuf");
1511 return 1;
1512 }
1513
e7c9f3ff 1514 if (pipeline)
d5396421
JA
1515 ret = do_stdin();
1516 else
1517 ret = do_file();
1518
152f6476
JA
1519 if (per_process_stats)
1520 show_process_stats();
1521
1e1c60f1
NS
1522 if (per_device_and_cpu_stats)
1523 show_device_and_cpu_stats();
152f6476 1524
412819ce 1525 flush_output();
d5396421 1526 return ret;
d0ca268b 1527}