iowatcher: Use blktrace_api.h
[blktrace.git] / iowatcher / blkparse.c
CommitLineData
9e066e23
CM
1/*
2 * Copyright (C) 2012 Fusion-io
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
660b0411 15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
9e066e23
CM
16 *
17 * Parts of this file were imported from Jens Axboe's blktrace sources (also GPL)
18 */
19#include <sys/types.h>
20#include <sys/stat.h>
21#include <fcntl.h>
22#include <unistd.h>
23#include <stdlib.h>
24#include <stdio.h>
25#include <math.h>
26#include <inttypes.h>
27#include <string.h>
28#include <asm/types.h>
29#include <errno.h>
30#include <sys/mman.h>
31#include <time.h>
32#include <math.h>
2203e914 33#include <dirent.h>
9e066e23
CM
34
35#include "plot.h"
36#include "blkparse.h"
37#include "list.h"
38#include "tracers.h"
2d6a6889 39#include "../blktrace_api.h"
9e066e23
CM
40
41#define IO_HASH_TABLE_BITS 11
42#define IO_HASH_TABLE_SIZE (1 << IO_HASH_TABLE_BITS)
43static struct list_head io_hash_table[IO_HASH_TABLE_SIZE];
44static u64 ios_in_flight = 0;
45
0a43b43f
JK
46#define PROCESS_HASH_TABLE_BITS 7
47#define PROCESS_HASH_TABLE_SIZE (1 << PROCESS_HASH_TABLE_BITS)
48static struct list_head process_hash_table[PROCESS_HASH_TABLE_SIZE];
49
f2e40ddd 50extern int plot_io_action;
0a43b43f 51extern int io_per_process;
9e066e23 52
9e066e23 53#define BLK_DATADIR(a) (((a) >> BLK_TC_SHIFT) & (BLK_TC_READ | BLK_TC_WRITE))
1582ecc9
JK
54#define BLK_TA_MASK ((1 << BLK_TC_SHIFT) - 1)
55
9e066e23
CM
56struct pending_io {
57 /* sector offset of this IO */
58 u64 sector;
59
2203e914
CM
60 /* dev_t for this IO */
61 u32 device;
62
9e066e23
CM
63 /* time this IO was dispatched */
64 u64 dispatch_time;
65 /* time this IO was finished */
66 u64 completion_time;
67 struct list_head hash_list;
0a43b43f
JK
68 /* process which queued this IO */
69 u32 pid;
70};
71
72struct pid_map {
73 struct list_head hash_list;
74 u32 pid;
75 int index;
76 char name[0];
9e066e23
CM
77};
78
6a079b02
JK
79u64 get_record_time(struct trace *trace)
80{
81 return trace->io->time;
82}
83
9e066e23
CM
84void init_io_hash_table(void)
85{
86 int i;
87 struct list_head *head;
88
89 for (i = 0; i < IO_HASH_TABLE_SIZE; i++) {
90 head = io_hash_table + i;
91 INIT_LIST_HEAD(head);
92 }
93}
94
95/* taken from the kernel hash.h */
96static inline u64 hash_sector(u64 val)
97{
98 u64 hash = val;
99
100 /* Sigh, gcc can't optimise this alone like it does for 32 bits. */
101 u64 n = hash;
102 n <<= 18;
103 hash -= n;
104 n <<= 33;
105 hash -= n;
106 n <<= 3;
107 hash += n;
108 n <<= 3;
109 hash -= n;
110 n <<= 4;
111 hash += n;
112 n <<= 2;
113 hash += n;
114
115 /* High bits are more random, so use them. */
116 return hash >> (64 - IO_HASH_TABLE_BITS);
117}
118
0a43b43f 119static int io_hash_table_insert(struct pending_io *ins_pio)
9e066e23
CM
120{
121 u64 sector = ins_pio->sector;
2203e914 122 u32 dev = ins_pio->device;
9e066e23
CM
123 int slot = hash_sector(sector);
124 struct list_head *head;
125 struct pending_io *pio;
126
127 head = io_hash_table + slot;
128 list_for_each_entry(pio, head, hash_list) {
2203e914 129 if (pio->sector == sector && pio->device == dev)
9e066e23
CM
130 return -EEXIST;
131 }
132 list_add_tail(&ins_pio->hash_list, head);
133 return 0;
134}
135
2203e914 136static struct pending_io *io_hash_table_search(u64 sector, u32 dev)
9e066e23
CM
137{
138 int slot = hash_sector(sector);
139 struct list_head *head;
140 struct pending_io *pio;
141
142 head = io_hash_table + slot;
143 list_for_each_entry(pio, head, hash_list) {
2203e914 144 if (pio->sector == sector && pio->device == dev)
9e066e23
CM
145 return pio;
146 }
147 return NULL;
148}
149
79d61530 150static struct pending_io *hash_queued_io(struct blk_io_trace *io)
9e066e23
CM
151{
152 struct pending_io *pio;
153 int ret;
154
155 pio = calloc(1, sizeof(*pio));
156 pio->sector = io->sector;
2203e914 157 pio->device = io->device;
0a43b43f 158 pio->pid = io->pid;
9e066e23 159
0a43b43f
JK
160 ret = io_hash_table_insert(pio);
161 if (ret < 0) {
162 /* crud, the IO is there already */
9e066e23 163 free(pio);
79d61530 164 return NULL;
9e066e23 165 }
79d61530 166 return pio;
0a43b43f
JK
167}
168
854a1f24 169static struct pending_io *hash_dispatched_io(struct blk_io_trace *io)
0a43b43f
JK
170{
171 struct pending_io *pio;
172
2203e914 173 pio = io_hash_table_search(io->sector, io->device);
79d61530
JK
174 if (!pio) {
175 pio = hash_queued_io(io);
176 if (!pio)
177 return NULL;
178 }
0a43b43f 179 pio->dispatch_time = io->time;
854a1f24 180 return pio;
9e066e23
CM
181}
182
183static struct pending_io *hash_completed_io(struct blk_io_trace *io)
184{
185 struct pending_io *pio;
186
2203e914 187 pio = io_hash_table_search(io->sector, io->device);
9e066e23
CM
188
189 if (!pio)
190 return NULL;
191 return pio;
192}
193
0a43b43f
JK
194void init_process_hash_table(void)
195{
196 int i;
197 struct list_head *head;
198
199 for (i = 0; i < PROCESS_HASH_TABLE_SIZE; i++) {
200 head = process_hash_table + i;
201 INIT_LIST_HEAD(head);
202 }
203}
204
205static u32 hash_pid(u32 pid)
206{
207 u32 hash = pid;
208
209 hash ^= pid >> 3;
210 hash ^= pid >> 3;
211 hash ^= pid >> 4;
212 hash ^= pid >> 6;
213 return (hash & (PROCESS_HASH_TABLE_SIZE - 1));
214}
215
216static struct pid_map *process_hash_search(u32 pid)
217{
218 int slot = hash_pid(pid);
219 struct list_head *head;
220 struct pid_map *pm;
221
222 head = process_hash_table + slot;
223 list_for_each_entry(pm, head, hash_list) {
224 if (pm->pid == pid)
225 return pm;
226 }
227 return NULL;
228}
229
230static struct pid_map *process_hash_insert(u32 pid, char *name)
231{
232 int slot = hash_pid(pid);
233 struct pid_map *pm;
234 int old_index = 0;
235 char buf[16];
236
237 pm = process_hash_search(pid);
238 if (pm) {
239 /* Entry exists and name shouldn't be changed? */
240 if (!name || !strcmp(name, pm->name))
241 return pm;
242 list_del(&pm->hash_list);
243 old_index = pm->index;
244 free(pm);
245 }
246 if (!name) {
247 sprintf(buf, "[%u]", pid);
248 name = buf;
249 }
250 pm = malloc(sizeof(struct pid_map) + strlen(name) + 1);
251 pm->pid = pid;
252 pm->index = old_index;
253 strcpy(pm->name, name);
254 list_add_tail(&pm->hash_list, process_hash_table + slot);
255
256 return pm;
257}
258
9e066e23
CM
259static void handle_notify(struct trace *trace)
260{
261 struct blk_io_trace *io = trace->io;
262 void *payload = (char *)io + sizeof(*io);
263 u32 two32[2];
264
0a43b43f
JK
265 if (io->action == BLK_TN_PROCESS) {
266 if (io_per_process)
267 process_hash_insert(io->pid, payload);
268 return;
269 }
9e066e23
CM
270
271 if (io->action != BLK_TN_TIMESTAMP)
272 return;
273
274 if (io->pdu_len != sizeof(two32))
275 return;
276
277 memcpy(two32, payload, sizeof(two32));
278 trace->start_timestamp = io->time;
279 trace->abs_start_time.tv_sec = two32[0];
280 trace->abs_start_time.tv_nsec = two32[1];
281 if (trace->abs_start_time.tv_nsec < 0) {
282 trace->abs_start_time.tv_sec--;
283 trace->abs_start_time.tv_nsec += 1000000000;
284 }
285}
286
287int next_record(struct trace *trace)
288{
289 int skip = trace->io->pdu_len;
290 u64 offset;
291
292 trace->cur += sizeof(*trace->io) + skip;
293 offset = trace->cur - trace->start;
294 if (offset >= trace->len)
295 return 1;
296
297 trace->io = (struct blk_io_trace *)trace->cur;
298 return 0;
299}
300
301void first_record(struct trace *trace)
302{
303 trace->cur = trace->start;
304 trace->io = (struct blk_io_trace *)trace->cur;
305}
306
5d9d4f4c 307static int is_io_event(struct blk_io_trace *test)
bfb0e441
CM
308{
309 char *message;
310 if (!(test->action & BLK_TC_ACT(BLK_TC_NOTIFY)))
311 return 1;
312 if (test->action == BLK_TN_MESSAGE) {
313 int len = test->pdu_len;
314 if (len < 3)
315 return 0;
316 message = (char *)(test + 1);
317 if (strncmp(message, "fio ", 4) == 0) {
318 return 1;
319 }
320 }
321 return 0;
322}
323
9e066e23
CM
324u64 find_last_time(struct trace *trace)
325{
326 char *p = trace->start + trace->len;
327 struct blk_io_trace *test;
328 int search_len = 0;
329 u64 found = 0;
330
331 if (trace->len < sizeof(*trace->io))
332 return 0;
333 p -= sizeof(*trace->io);
334 while (p >= trace->start) {
335 test = (struct blk_io_trace *)p;
bfb0e441 336 if (CHECK_MAGIC(test) && is_io_event(test)) {
9e066e23
CM
337 u64 offset = p - trace->start;
338 if (offset + sizeof(*test) + test->pdu_len == trace->len) {
339 return test->time;
340 }
341 }
342 p--;
343 search_len++;
344 if (search_len > 8192) {
345 break;
346 }
347 }
348
349 /* searching backwards didn't work out, we'll have to scan the file */
350 first_record(trace);
351 while (1) {
bfb0e441 352 if (is_io_event(trace->io))
9e066e23
CM
353 found = trace->io->time;
354 if (next_record(trace))
355 break;
356 }
357 first_record(trace);
358 return found;
359}
360
5d9d4f4c 361static int parse_fio_bank_message(struct trace *trace, u64 *bank_ret, u64 *offset_ret,
bfb0e441
CM
362 u64 *num_banks_ret)
363{
364 char *s;
365 char *next;
366 char *message;
367 struct blk_io_trace *test = trace->io;
368 int len = test->pdu_len;
369 u64 bank;
370 u64 offset;
371 u64 num_banks;
372
373 if (!(test->action & BLK_TC_ACT(BLK_TC_NOTIFY)))
374 return -1;
375 if (test->action != BLK_TN_MESSAGE)
376 return -1;
377
378 /* the message is fio rw bank offset num_banks */
379 if (len < 3)
380 return -1;
381 message = (char *)(test + 1);
382 if (strncmp(message, "fio r ", 6) != 0)
383 return -1;
384
385 message = strndup(message, len);
386 s = strchr(message, ' ');
387 if (!s)
388 goto out;
389 s++;
390 s = strchr(s, ' ');
391 if (!s)
392 goto out;
393
394 bank = strtoll(s, &next, 10);
395 if (s == next)
396 goto out;
397 s = next;
398
399 offset = strtoll(s, &next, 10);
400 if (s == next)
401 goto out;
402 s = next;
403
404 num_banks = strtoll(s, &next, 10);
405 if (s == next)
406 goto out;
407
408 *bank_ret = bank;
409 *offset_ret = offset;
410 *num_banks_ret = num_banks;
411
412 return 0;
413out:
414 free(message);
415 return -1;
416}
417
2203e914
CM
418static struct dev_info *lookup_dev(struct trace *trace, struct blk_io_trace *io)
419{
420 u32 dev = io->device;
421 int i;
422 struct dev_info *di = NULL;
423
424 for (i = 0; i < trace->num_devices; i++) {
425 if (trace->devices[i].device == dev) {
426 di = trace->devices + i;
427 goto found;
428 }
429 }
430 i = trace->num_devices++;
431 if (i >= MAX_DEVICES_PER_TRACE) {
432 fprintf(stderr, "Trace contains too many devices (%d)\n", i);
433 exit(1);
434 }
435 di = trace->devices + i;
436 di->device = dev;
437found:
438 return di;
439}
440
441static void map_devices(struct trace *trace)
442{
443 struct dev_info *di;
444 u64 found;
445 u64 map_start = 0;
446 int i;
447
448 first_record(trace);
449 while (1) {
450 if (!(trace->io->action & BLK_TC_ACT(BLK_TC_NOTIFY))) {
451 di = lookup_dev(trace, trace->io);
452 found = trace->io->sector << 9;
453 if (found < di->min)
454 di->min = found;
455
456 found += trace->io->bytes;
457 if (di->max < found)
458 di->max = found;
459 }
460 if (next_record(trace))
461 break;
462 }
463 first_record(trace);
464 for (i = 0; i < trace->num_devices; i++) {
465 di = trace->devices + i;
466 di->map = map_start;
467 map_start += di->max - di->min;
468 }
469}
470
5d9d4f4c 471static u64 map_io(struct trace *trace, struct blk_io_trace *io)
2203e914
CM
472{
473 struct dev_info *di = lookup_dev(trace, io);
474 u64 val = trace->io->sector << 9;
475 return di->map + val - di->min;
476}
477
9b9fa04b
JK
478void find_extreme_offsets(struct trace *trace, u64 *min_ret, u64 *max_ret, u64 *max_bank_ret,
479 u64 *max_offset_ret)
9e066e23
CM
480{
481 u64 found = 0;
9b9fa04b 482 u64 max = 0, min = ~(u64)0;
bfb0e441
CM
483 u64 max_bank = 0;
484 u64 max_bank_offset = 0;
485 u64 num_banks = 0;
2203e914
CM
486
487 map_devices(trace);
488
9e066e23
CM
489 first_record(trace);
490 while (1) {
491 if (!(trace->io->action & BLK_TC_ACT(BLK_TC_NOTIFY))) {
2203e914 492 found = map_io(trace, trace->io);
9b9fa04b
JK
493 if (found < min)
494 min = found;
9e066e23 495
9b9fa04b
JK
496 found += trace->io->bytes;
497 if (max < found)
9e066e23 498 max = found;
bfb0e441
CM
499 } else {
500 u64 bank;
501 u64 offset;
502 if (!parse_fio_bank_message(trace, &bank,
503 &offset, &num_banks)) {
504 if (bank > max_bank)
505 max_bank = bank;
506 if (offset > max_bank_offset)
507 max_bank_offset = offset;
508 }
9e066e23
CM
509 }
510 if (next_record(trace))
511 break;
512 }
513 first_record(trace);
9b9fa04b 514 *min_ret = min;
bfb0e441
CM
515 *max_ret = max;
516 *max_bank_ret = max_bank;
517 *max_offset_ret = max_bank_offset;
9e066e23
CM
518}
519
854a1f24
CM
520static void check_io_types(struct trace *trace)
521{
522 struct blk_io_trace *io = trace->io;
523 int action = io->action & BLK_TA_MASK;
524
525 if (!(io->action & BLK_TC_ACT(BLK_TC_NOTIFY))) {
526 switch (action) {
527 case __BLK_TA_COMPLETE:
528 trace->found_completion = 1;
529 break;
530 case __BLK_TA_ISSUE:
531 trace->found_issue = 1;
532 break;
533 case __BLK_TA_QUEUE:
534 trace->found_queue = 1;
535 break;
536 };
537 }
538}
539
540
9b9fa04b 541int filter_outliers(struct trace *trace, u64 min_offset, u64 max_offset,
9e066e23
CM
542 u64 *yzoom_min, u64 *yzoom_max)
543{
544 int hits[11];
545 u64 max_per_bucket[11];
9b9fa04b
JK
546 u64 min_per_bucket[11];
547 u64 bytes_per_bucket = (max_offset - min_offset + 1) / 10;
9e066e23
CM
548 int slot;
549 int fat_count = 0;
550
551 memset(hits, 0, sizeof(int) * 11);
552 memset(max_per_bucket, 0, sizeof(u64) * 11);
9b9fa04b 553 memset(min_per_bucket, 0xff, sizeof(u64) * 11);
9e066e23
CM
554 first_record(trace);
555 while (1) {
854a1f24 556 check_io_types(trace);
41fdf407
JK
557 if (!(trace->io->action & BLK_TC_ACT(BLK_TC_NOTIFY)) &&
558 (trace->io->action & BLK_TA_MASK) == __BLK_TA_QUEUE) {
2203e914 559 u64 off = map_io(trace, trace->io) - min_offset;
9b9fa04b
JK
560
561 slot = (int)(off / bytes_per_bucket);
562 hits[slot]++;
563 if (off < min_per_bucket[slot])
564 min_per_bucket[slot] = off;
565
854a1f24 566 off += trace->io->bytes;
9b9fa04b 567 slot = (int)(off / bytes_per_bucket);
9e066e23 568 hits[slot]++;
9b9fa04b
JK
569 if (off > max_per_bucket[slot])
570 max_per_bucket[slot] = off;
9e066e23
CM
571 }
572 if (next_record(trace))
573 break;
574 }
575 first_record(trace);
576 for (slot = 0; slot < 11; slot++) {
577 if (hits[slot] > fat_count) {
578 fat_count = hits[slot];
579 }
580 }
581
582 *yzoom_max = max_offset;
583 for (slot = 10; slot >= 0; slot--) {
584 double d = hits[slot];
585
586 if (d >= (double)fat_count * .05) {
9b9fa04b 587 *yzoom_max = max_per_bucket[slot] + min_offset;
9e066e23
CM
588 break;
589 }
590 }
591
9b9fa04b 592 *yzoom_min = min_offset;
9e066e23
CM
593 for (slot = 0; slot < 10; slot++) {
594 double d = hits[slot];
595
596 if (d >= (double)fat_count * .05) {
9b9fa04b 597 *yzoom_min = min_per_bucket[slot] + min_offset;
9e066e23
CM
598 break;
599 }
600 }
601 return 0;
602}
603
2203e914 604static char footer[] = ".blktrace.0";
c1ab63ed 605static int footer_len = sizeof(footer) - 1;
2203e914 606
c1ab63ed 607static int match_trace(char *name, int *len)
2203e914
CM
608{
609 int match_len;
2203e914
CM
610 int footer_start;
611
612 match_len = strlen(name);
613 if (match_len <= footer_len)
c1ab63ed 614 return 0;
2203e914
CM
615
616 footer_start = match_len - footer_len;
c1ab63ed
AP
617 if (strcmp(name + footer_start, footer) != 0)
618 return 0;
2203e914 619
c1ab63ed
AP
620 if (len)
621 *len = match_len;
622 return 1;
2203e914
CM
623}
624
c1ab63ed
AP
625struct tracelist {
626 struct tracelist *next;
627 char *name;
628};
2203e914 629
c1ab63ed
AP
630static struct tracelist *traces_list(char *dir_name, int *len)
631{
632 int count = 0;
633 struct tracelist *traces = NULL;
d6b58e52 634 int dlen = strlen(dir_name);
c1ab63ed 635 DIR *dir = opendir(dir_name);
2203e914
CM
636 if (!dir)
637 return NULL;
638
639 while (1) {
d6b58e52 640 int n = 0;
c1ab63ed
AP
641 struct tracelist *tl;
642 struct dirent *d = readdir(dir);
2203e914
CM
643 if (!d)
644 break;
645
d6b58e52 646 if (!match_trace(d->d_name, &n))
c1ab63ed
AP
647 continue;
648
d6b58e52 649 n += dlen + 1; /* dir + '/' + file */
c1ab63ed 650 /* Allocate space for tracelist + filename */
d6b58e52 651 tl = calloc(1, sizeof(struct tracelist) + (sizeof(char) * (n + 1)));
177d648b
AP
652 if (!tl) {
653 closedir(dir);
c1ab63ed 654 return NULL;
177d648b 655 }
c1ab63ed
AP
656 tl->next = traces;
657 tl->name = (char *)(tl + 1);
d6b58e52 658 snprintf(tl->name, n, "%s/%s", dir_name, d->d_name);
c1ab63ed
AP
659 traces = tl;
660 count++;
2203e914
CM
661 }
662
663 closedir(dir);
664
c1ab63ed
AP
665 if (len)
666 *len = count;
667
668 return traces;
669}
670
671static void traces_free(struct tracelist *traces)
672{
673 while (traces) {
674 struct tracelist *tl = traces;
675 traces = traces->next;
676 free(tl);
677 }
678}
679
d6b58e52 680static int dump_traces(struct tracelist *traces, int count, char *dumpfile)
c1ab63ed 681{
c1ab63ed 682 struct tracelist *tl;
c1ab63ed 683 char **argv = NULL;
c1ab63ed
AP
684 int argc = 0;
685 int i;
d6b58e52 686 int err = 0;
c1ab63ed 687
d6b58e52
AP
688 argc = count * 2; /* {"-i", trace } */
689 argc += 4; /* See below */
c1ab63ed
AP
690 argv = calloc(argc + 1, sizeof(char *));
691 if (!argv)
d6b58e52 692 return -errno;
c1ab63ed
AP
693
694 i = 0;
695 argv[i++] = "blkparse";
696 argv[i++] = "-O";
c1ab63ed
AP
697 argv[i++] = "-d";
698 argv[i++] = dumpfile;
699 for (tl = traces; tl != NULL; tl = tl->next) {
700 argv[i++] = "-i";
701 argv[i++] = tl->name;
702 }
2203e914 703
49559b61 704 err = run_program(argc, argv, 1, NULL, NULL);
ce225d50
AP
705 if (err)
706 fprintf(stderr, "%s exited with %d, expected 0\n", argv[0], err);
c1ab63ed 707 free(argv);
d6b58e52 708 return err;
2203e914
CM
709}
710
9e066e23
CM
711static char *find_trace_file(char *filename)
712{
713 int ret;
714 struct stat st;
9e066e23 715 char *dot;
2203e914 716 int found_dir = 0;
d6b58e52
AP
717 char *dumpfile;
718 int len = strlen(filename);
9e066e23 719
2203e914
CM
720 /* look for an exact match of whatever they pass in.
721 * If it is a file, assume it is the dump file.
722 * If a directory, remember that it existed so we
723 * can combine traces in that directory later
724 */
9e066e23 725 ret = stat(filename, &st);
2203e914
CM
726 if (ret == 0) {
727 if (S_ISREG(st.st_mode))
728 return strdup(filename);
729
730 if (S_ISDIR(st.st_mode))
731 found_dir = 1;
732 }
9e066e23 733
c1ab63ed
AP
734 if (found_dir) {
735 int i;
736 /* Eat up trailing '/'s */
d6b58e52 737 for (i = len - 1; filename[i] == '/'; i--)
c1ab63ed
AP
738 filename[i] = '\0';
739 }
740
2203e914
CM
741 /*
742 * try tacking .dump onto the end and see if that already
743 * has been generated
744 */
d6b58e52
AP
745 ret = asprintf(&dumpfile, "%s.dump", filename);
746 if (ret == -1) {
747 perror("Error building dump file name");
748 return NULL;
749 }
750 ret = stat(dumpfile, &st);
9e066e23 751 if (ret == 0)
d6b58e52 752 return dumpfile;
9e066e23 753
2203e914
CM
754 /*
755 * try to generate the .dump from all the traces in
756 * a single dir.
757 */
758 if (found_dir) {
d6b58e52
AP
759 int count;
760 struct tracelist *traces = traces_list(filename, &count);
761 if (traces) {
762 ret = dump_traces(traces, count, dumpfile);
763 traces_free(traces);
764 if (ret == 0)
765 return dumpfile;
766 }
2203e914 767 }
d6b58e52 768 free(dumpfile);
2203e914
CM
769
770 /*
771 * try to generate the .dump from all the blktrace
772 * files for a named trace
773 */
d6b58e52 774 dot = strrchr(filename, '.');
9e066e23 775 if (!dot || strcmp(".dump", dot) != 0) {
265fabd8 776 struct tracelist trace = {0 ,NULL};
d6b58e52
AP
777 if (dot && dot != filename)
778 len = dot - filename;
779
780 ret = asprintf(&trace.name, "%*s.blktrace.0", len, filename);
781 if (ret == -1)
782 return NULL;
783 ret = asprintf(&dumpfile, "%*s.dump", len, filename);
784 if (ret == -1) {
785 free(trace.name);
786 return NULL;
787 }
788
789 ret = dump_traces(&trace, 1, dumpfile);
9e066e23 790 if (ret == 0) {
d6b58e52
AP
791 free(trace.name);
792 return dumpfile;
9e066e23 793 }
d6b58e52
AP
794 free(trace.name);
795 free(dumpfile);
9e066e23 796 }
9e066e23
CM
797 return NULL;
798}
799struct trace *open_trace(char *filename)
800{
801 int fd;
802 char *p;
803 struct stat st;
804 int ret;
805 struct trace *trace;
806 char *found_filename;
807
808 trace = calloc(1, sizeof(*trace));
809 if (!trace) {
810 fprintf(stderr, "unable to allocate memory for trace\n");
811 return NULL;
812 }
813
814 found_filename = find_trace_file(filename);
815 if (!found_filename) {
816 fprintf(stderr, "Unable to find trace file %s\n", filename);
817 goto fail;
818 }
9e066e23
CM
819 filename = found_filename;
820
821 fd = open(filename, O_RDONLY);
822 if (fd < 0) {
823 fprintf(stderr, "Unable to open trace file %s err %s\n", filename, strerror(errno));
824 goto fail;
825 }
826 ret = fstat(fd, &st);
827 if (ret < 0) {
828 fprintf(stderr, "stat failed on %s err %s\n", filename, strerror(errno));
829 goto fail_fd;
830 }
831 p = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
832 if (p == MAP_FAILED) {
833 fprintf(stderr, "Unable to mmap trace file %s, err %s\n", filename, strerror(errno));
834 goto fail_fd;
835 }
836 trace->fd = fd;
837 trace->len = st.st_size;
838 trace->start = p;
839 trace->cur = p;
840 trace->io = (struct blk_io_trace *)p;
841 return trace;
842
843fail_fd:
844 close(fd);
845fail:
846 free(trace);
847 return NULL;
848}
849static inline int tput_event(struct trace *trace)
850{
851 if (trace->found_completion)
852 return __BLK_TA_COMPLETE;
853 if (trace->found_issue)
854 return __BLK_TA_ISSUE;
855 if (trace->found_queue)
856 return __BLK_TA_QUEUE;
857
858 return __BLK_TA_COMPLETE;
859}
860
f2e40ddd
JK
861int action_char_to_num(char action)
862{
863 switch (action) {
864 case 'Q':
865 return __BLK_TA_QUEUE;
866 case 'D':
867 return __BLK_TA_ISSUE;
868 case 'C':
869 return __BLK_TA_COMPLETE;
870 }
871 return -1;
872}
873
9e066e23
CM
874static inline int io_event(struct trace *trace)
875{
f2e40ddd
JK
876 if (plot_io_action)
877 return plot_io_action;
9e066e23
CM
878 if (trace->found_queue)
879 return __BLK_TA_QUEUE;
880 if (trace->found_issue)
881 return __BLK_TA_ISSUE;
882 if (trace->found_completion)
883 return __BLK_TA_COMPLETE;
884
885 return __BLK_TA_COMPLETE;
886}
887
2203e914
CM
888void add_tput(struct trace *trace, struct graph_line_data *writes_gld,
889 struct graph_line_data *reads_gld)
9e066e23
CM
890{
891 struct blk_io_trace *io = trace->io;
2203e914 892 struct graph_line_data *gld;
1582ecc9 893 int action = io->action & BLK_TA_MASK;
9e066e23
CM
894 int seconds;
895
896 if (io->action & BLK_TC_ACT(BLK_TC_NOTIFY))
897 return;
898
899 if (action != tput_event(trace))
900 return;
901
2203e914
CM
902 if (BLK_DATADIR(io->action) & BLK_TC_READ)
903 gld = reads_gld;
904 else
905 gld = writes_gld;
906
9e066e23 907 seconds = SECONDS(io->time);
9e066e23 908 gld->data[seconds].sum += io->bytes;
2203e914 909
9e066e23
CM
910 gld->data[seconds].count = 1;
911 if (gld->data[seconds].sum > gld->max)
912 gld->max = gld->data[seconds].sum;
913}
914
0a43b43f
JK
915#define GDD_PTR_ALLOC_STEP 16
916
917static struct pid_map *get_pid_map(struct trace_file *tf, u32 pid)
918{
919 struct pid_map *pm;
920
921 if (!io_per_process) {
922 if (!tf->io_plots)
923 tf->io_plots = 1;
924 return NULL;
925 }
926
927 pm = process_hash_insert(pid, NULL);
928 /* New entry? */
929 if (!pm->index) {
930 if (tf->io_plots == tf->io_plots_allocated) {
931 tf->io_plots_allocated += GDD_PTR_ALLOC_STEP;
932 tf->gdd_reads = realloc(tf->gdd_reads, tf->io_plots_allocated * sizeof(struct graph_dot_data *));
933 if (!tf->gdd_reads)
934 abort();
935 tf->gdd_writes = realloc(tf->gdd_writes, tf->io_plots_allocated * sizeof(struct graph_dot_data *));
936 if (!tf->gdd_writes)
937 abort();
938 memset(tf->gdd_reads + tf->io_plots_allocated - GDD_PTR_ALLOC_STEP,
939 0, GDD_PTR_ALLOC_STEP * sizeof(struct graph_dot_data *));
940 memset(tf->gdd_writes + tf->io_plots_allocated - GDD_PTR_ALLOC_STEP,
941 0, GDD_PTR_ALLOC_STEP * sizeof(struct graph_dot_data *));
942 }
943 pm->index = tf->io_plots++;
944
945 return pm;
946 }
947 return pm;
948}
949
950void add_io(struct trace *trace, struct trace_file *tf)
9e066e23
CM
951{
952 struct blk_io_trace *io = trace->io;
1582ecc9 953 int action = io->action & BLK_TA_MASK;
9e066e23 954 u64 offset;
0a43b43f
JK
955 int index;
956 char *label;
957 struct pid_map *pm;
9e066e23
CM
958
959 if (io->action & BLK_TC_ACT(BLK_TC_NOTIFY))
960 return;
961
962 if (action != io_event(trace))
963 return;
964
2203e914 965 offset = map_io(trace, io);
9e066e23 966
0a43b43f
JK
967 pm = get_pid_map(tf, io->pid);
968 if (!pm) {
969 index = 0;
970 label = "";
971 } else {
972 index = pm->index;
973 label = pm->name;
974 }
975 if (BLK_DATADIR(io->action) & BLK_TC_READ) {
976 if (!tf->gdd_reads[index])
977 tf->gdd_reads[index] = alloc_dot_data(tf->min_seconds, tf->max_seconds, tf->min_offset, tf->max_offset, tf->stop_seconds, pick_color(), strdup(label));
978 set_gdd_bit(tf->gdd_reads[index], offset, io->bytes, io->time);
979 } else if (BLK_DATADIR(io->action) & BLK_TC_WRITE) {
980 if (!tf->gdd_writes[index])
981 tf->gdd_writes[index] = alloc_dot_data(tf->min_seconds, tf->max_seconds, tf->min_offset, tf->max_offset, tf->stop_seconds, pick_color(), strdup(label));
982 set_gdd_bit(tf->gdd_writes[index], offset, io->bytes, io->time);
983 }
9e066e23
CM
984}
985
986void add_pending_io(struct trace *trace, struct graph_line_data *gld)
987{
22dd439a 988 unsigned int seconds;
9e066e23 989 struct blk_io_trace *io = trace->io;
1582ecc9 990 int action = io->action & BLK_TA_MASK;
9e066e23 991 double avg;
854a1f24 992 struct pending_io *pio;
9e066e23
CM
993
994 if (io->action & BLK_TC_ACT(BLK_TC_NOTIFY))
995 return;
996
0a43b43f 997 if (action == __BLK_TA_QUEUE) {
ca215e98
JK
998 if (io->sector == 0)
999 return;
f011d96f
JM
1000 /*
1001 * If D (issue) events are available, use them for I/O
1002 * accounting. Nothing needs to be done for Q.
1003 */
1004 if (trace->found_issue)
1005 return;
1006 /*
1007 * If there are no D or C events, then all that can be
1008 * done is to account the Q event (and make sure not to
1009 * add the I/O to the hash, because it will never be
1010 * removed).
1011 */
1012 if (!trace->found_completion)
1013 goto account_io;
1014 /*
1015 * When there are no ISSUE events, count depth and
1016 * latency from queue events.
1017 */
1018 pio = hash_queued_io(trace->io);
1019 if (pio) {
1020 pio->dispatch_time = io->time;
1021 goto account_io;
4019edff 1022 }
0a43b43f
JK
1023 return;
1024 }
ff045fe7
JK
1025 if (action == __BLK_TA_REQUEUE) {
1026 if (ios_in_flight > 0)
1027 ios_in_flight--;
1028 return;
1029 }
9e066e23
CM
1030 if (action != __BLK_TA_ISSUE)
1031 return;
1032
854a1f24
CM
1033 pio = hash_dispatched_io(trace->io);
1034 if (!pio)
9e066e23
CM
1035 return;
1036
854a1f24
CM
1037 if (!trace->found_completion) {
1038 list_del(&pio->hash_list);
1039 free(pio);
1040 }
1041
4019edff 1042account_io:
9e066e23
CM
1043 ios_in_flight++;
1044
6a079b02 1045 seconds = SECONDS(io->time);
9e066e23
CM
1046 gld->data[seconds].sum += ios_in_flight;
1047 gld->data[seconds].count++;
1048
1049 avg = (double)gld->data[seconds].sum / gld->data[seconds].count;
1050 if (gld->max < (u64)avg) {
1051 gld->max = avg;
1052 }
1053}
1054
1055void add_completed_io(struct trace *trace,
1056 struct graph_line_data *latency_gld)
1057{
1058 struct blk_io_trace *io = trace->io;
1059 int seconds;
1582ecc9 1060 int action = io->action & BLK_TA_MASK;
9e066e23
CM
1061 struct pending_io *pio;
1062 double avg;
1063 u64 latency;
1064
1065 if (io->action & BLK_TC_ACT(BLK_TC_NOTIFY))
1066 return;
1067
1068 if (action != __BLK_TA_COMPLETE)
1069 return;
1070
1071 seconds = SECONDS(io->time);
1072
1073 pio = hash_completed_io(trace->io);
1074 if (!pio)
1075 return;
1076
1077 if (ios_in_flight > 0)
1078 ios_in_flight--;
1079 if (io->time >= pio->dispatch_time) {
1080 latency = io->time - pio->dispatch_time;
1081 latency_gld->data[seconds].sum += latency;
1082 latency_gld->data[seconds].count++;
1083 }
1084
1085 list_del(&pio->hash_list);
1086 free(pio);
1087
1088 avg = (double)latency_gld->data[seconds].sum /
1089 latency_gld->data[seconds].count;
1090 if (latency_gld->max < (u64)avg) {
1091 latency_gld->max = avg;
1092 }
1093}
1094
1095void add_iop(struct trace *trace, struct graph_line_data *gld)
1096{
1097 struct blk_io_trace *io = trace->io;
1582ecc9 1098 int action = io->action & BLK_TA_MASK;
9e066e23
CM
1099 int seconds;
1100
1101 if (io->action & BLK_TC_ACT(BLK_TC_NOTIFY))
1102 return;
1103
1104 /* iops and tput use the same events */
1105 if (action != tput_event(trace))
1106 return;
1107
1108 seconds = SECONDS(io->time);
9e066e23
CM
1109 gld->data[seconds].sum += 1;
1110 gld->data[seconds].count = 1;
1111 if (gld->data[seconds].sum > gld->max)
1112 gld->max = gld->data[seconds].sum;
1113}
1114
1115void check_record(struct trace *trace)
1116{
9e066e23
CM
1117 handle_notify(trace);
1118}