iowatcher: Make seconds unsigned
[blktrace.git] / iowatcher / blkparse.c
CommitLineData
9e066e23
CM
1/*
2 * Copyright (C) 2012 Fusion-io
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
660b0411 15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
9e066e23
CM
16 *
17 * Parts of this file were imported from Jens Axboe's blktrace sources (also GPL)
18 */
19#include <sys/types.h>
20#include <sys/stat.h>
21#include <fcntl.h>
22#include <unistd.h>
23#include <stdlib.h>
24#include <stdio.h>
25#include <math.h>
26#include <inttypes.h>
27#include <string.h>
28#include <asm/types.h>
29#include <errno.h>
30#include <sys/mman.h>
31#include <time.h>
32#include <math.h>
2203e914 33#include <dirent.h>
9e066e23
CM
34
35#include "plot.h"
36#include "blkparse.h"
37#include "list.h"
38#include "tracers.h"
39
40#define IO_HASH_TABLE_BITS 11
41#define IO_HASH_TABLE_SIZE (1 << IO_HASH_TABLE_BITS)
42static struct list_head io_hash_table[IO_HASH_TABLE_SIZE];
43static u64 ios_in_flight = 0;
44
0a43b43f
JK
45#define PROCESS_HASH_TABLE_BITS 7
46#define PROCESS_HASH_TABLE_SIZE (1 << PROCESS_HASH_TABLE_BITS)
47static struct list_head process_hash_table[PROCESS_HASH_TABLE_SIZE];
48
f2e40ddd 49extern int plot_io_action;
0a43b43f 50extern int io_per_process;
9e066e23 51
2203e914
CM
52static const int line_len = 1024;
53static char line[1024];
54
9e066e23
CM
55/*
56 * Trace categories
57 */
58enum {
59 BLK_TC_READ = 1 << 0, /* reads */
60 BLK_TC_WRITE = 1 << 1, /* writes */
61 BLK_TC_FLUSH = 1 << 2, /* flush */
62 BLK_TC_SYNC = 1 << 3, /* sync */
63 BLK_TC_QUEUE = 1 << 4, /* queueing/merging */
64 BLK_TC_REQUEUE = 1 << 5, /* requeueing */
65 BLK_TC_ISSUE = 1 << 6, /* issue */
66 BLK_TC_COMPLETE = 1 << 7, /* completions */
67 BLK_TC_FS = 1 << 8, /* fs requests */
68 BLK_TC_PC = 1 << 9, /* pc requests */
69 BLK_TC_NOTIFY = 1 << 10, /* special message */
70 BLK_TC_AHEAD = 1 << 11, /* readahead */
71 BLK_TC_META = 1 << 12, /* metadata */
72 BLK_TC_DISCARD = 1 << 13, /* discard requests */
73 BLK_TC_DRV_DATA = 1 << 14, /* binary driver data */
74 BLK_TC_FUA = 1 << 15, /* fua requests */
75
76 BLK_TC_END = 1 << 15, /* we've run out of bits! */
77};
78
79#define BLK_TC_SHIFT (16)
80#define BLK_TC_ACT(act) ((act) << BLK_TC_SHIFT)
81#define BLK_DATADIR(a) (((a) >> BLK_TC_SHIFT) & (BLK_TC_READ | BLK_TC_WRITE))
82
83/*
84 * Basic trace actions
85 */
86enum {
87 __BLK_TA_QUEUE = 1, /* queued */
88 __BLK_TA_BACKMERGE, /* back merged to existing rq */
89 __BLK_TA_FRONTMERGE, /* front merge to existing rq */
90 __BLK_TA_GETRQ, /* allocated new request */
91 __BLK_TA_SLEEPRQ, /* sleeping on rq allocation */
92 __BLK_TA_REQUEUE, /* request requeued */
93 __BLK_TA_ISSUE, /* sent to driver */
94 __BLK_TA_COMPLETE, /* completed by driver */
95 __BLK_TA_PLUG, /* queue was plugged */
96 __BLK_TA_UNPLUG_IO, /* queue was unplugged by io */
97 __BLK_TA_UNPLUG_TIMER, /* queue was unplugged by timer */
98 __BLK_TA_INSERT, /* insert request */
99 __BLK_TA_SPLIT, /* bio was split */
100 __BLK_TA_BOUNCE, /* bio was bounced */
101 __BLK_TA_REMAP, /* bio was remapped */
102 __BLK_TA_ABORT, /* request aborted */
103 __BLK_TA_DRV_DATA, /* binary driver data */
104};
105
1582ecc9
JK
106#define BLK_TA_MASK ((1 << BLK_TC_SHIFT) - 1)
107
9e066e23
CM
108/*
109 * Notify events.
110 */
111enum blktrace_notify {
112 __BLK_TN_PROCESS = 0, /* establish pid/name mapping */
113 __BLK_TN_TIMESTAMP, /* include system clock */
114 __BLK_TN_MESSAGE, /* Character string message */
115};
116
117/*
118 * Trace actions in full. Additionally, read or write is masked
119 */
120#define BLK_TA_QUEUE (__BLK_TA_QUEUE | BLK_TC_ACT(BLK_TC_QUEUE))
121#define BLK_TA_BACKMERGE (__BLK_TA_BACKMERGE | BLK_TC_ACT(BLK_TC_QUEUE))
122#define BLK_TA_FRONTMERGE (__BLK_TA_FRONTMERGE | BLK_TC_ACT(BLK_TC_QUEUE))
123#define BLK_TA_GETRQ (__BLK_TA_GETRQ | BLK_TC_ACT(BLK_TC_QUEUE))
124#define BLK_TA_SLEEPRQ (__BLK_TA_SLEEPRQ | BLK_TC_ACT(BLK_TC_QUEUE))
125#define BLK_TA_REQUEUE (__BLK_TA_REQUEUE | BLK_TC_ACT(BLK_TC_REQUEUE))
126#define BLK_TA_ISSUE (__BLK_TA_ISSUE | BLK_TC_ACT(BLK_TC_ISSUE))
127#define BLK_TA_COMPLETE (__BLK_TA_COMPLETE| BLK_TC_ACT(BLK_TC_COMPLETE))
128#define BLK_TA_PLUG (__BLK_TA_PLUG | BLK_TC_ACT(BLK_TC_QUEUE))
129#define BLK_TA_UNPLUG_IO (__BLK_TA_UNPLUG_IO | BLK_TC_ACT(BLK_TC_QUEUE))
130#define BLK_TA_UNPLUG_TIMER (__BLK_TA_UNPLUG_TIMER | BLK_TC_ACT(BLK_TC_QUEUE))
131#define BLK_TA_INSERT (__BLK_TA_INSERT | BLK_TC_ACT(BLK_TC_QUEUE))
132#define BLK_TA_SPLIT (__BLK_TA_SPLIT)
133#define BLK_TA_BOUNCE (__BLK_TA_BOUNCE)
134#define BLK_TA_REMAP (__BLK_TA_REMAP | BLK_TC_ACT(BLK_TC_QUEUE))
135#define BLK_TA_ABORT (__BLK_TA_ABORT | BLK_TC_ACT(BLK_TC_QUEUE))
136#define BLK_TA_DRV_DATA (__BLK_TA_DRV_DATA | BLK_TC_ACT(BLK_TC_DRV_DATA))
137
138#define BLK_TN_PROCESS (__BLK_TN_PROCESS | BLK_TC_ACT(BLK_TC_NOTIFY))
139#define BLK_TN_TIMESTAMP (__BLK_TN_TIMESTAMP | BLK_TC_ACT(BLK_TC_NOTIFY))
140#define BLK_TN_MESSAGE (__BLK_TN_MESSAGE | BLK_TC_ACT(BLK_TC_NOTIFY))
141
142#define BLK_IO_TRACE_MAGIC 0x65617400
143#define BLK_IO_TRACE_VERSION 0x07
144/*
145 * The trace itself
146 */
147struct blk_io_trace {
148 __u32 magic; /* MAGIC << 8 | version */
149 __u32 sequence; /* event number */
150 __u64 time; /* in nanoseconds */
151 __u64 sector; /* disk offset */
152 __u32 bytes; /* transfer length */
153 __u32 action; /* what happened */
154 __u32 pid; /* who did it */
155 __u32 device; /* device identifier (dev_t) */
156 __u32 cpu; /* on what cpu did it happen */
157 __u16 error; /* completion error */
158 __u16 pdu_len; /* length of data after this trace */
159};
160
161struct pending_io {
162 /* sector offset of this IO */
163 u64 sector;
164
2203e914
CM
165 /* dev_t for this IO */
166 u32 device;
167
9e066e23
CM
168 /* time this IO was dispatched */
169 u64 dispatch_time;
170 /* time this IO was finished */
171 u64 completion_time;
172 struct list_head hash_list;
0a43b43f
JK
173 /* process which queued this IO */
174 u32 pid;
175};
176
177struct pid_map {
178 struct list_head hash_list;
179 u32 pid;
180 int index;
181 char name[0];
9e066e23
CM
182};
183
6a079b02
JK
184u64 get_record_time(struct trace *trace)
185{
186 return trace->io->time;
187}
188
9e066e23
CM
189void init_io_hash_table(void)
190{
191 int i;
192 struct list_head *head;
193
194 for (i = 0; i < IO_HASH_TABLE_SIZE; i++) {
195 head = io_hash_table + i;
196 INIT_LIST_HEAD(head);
197 }
198}
199
200/* taken from the kernel hash.h */
201static inline u64 hash_sector(u64 val)
202{
203 u64 hash = val;
204
205 /* Sigh, gcc can't optimise this alone like it does for 32 bits. */
206 u64 n = hash;
207 n <<= 18;
208 hash -= n;
209 n <<= 33;
210 hash -= n;
211 n <<= 3;
212 hash += n;
213 n <<= 3;
214 hash -= n;
215 n <<= 4;
216 hash += n;
217 n <<= 2;
218 hash += n;
219
220 /* High bits are more random, so use them. */
221 return hash >> (64 - IO_HASH_TABLE_BITS);
222}
223
0a43b43f 224static int io_hash_table_insert(struct pending_io *ins_pio)
9e066e23
CM
225{
226 u64 sector = ins_pio->sector;
2203e914 227 u32 dev = ins_pio->device;
9e066e23
CM
228 int slot = hash_sector(sector);
229 struct list_head *head;
230 struct pending_io *pio;
231
232 head = io_hash_table + slot;
233 list_for_each_entry(pio, head, hash_list) {
2203e914 234 if (pio->sector == sector && pio->device == dev)
9e066e23
CM
235 return -EEXIST;
236 }
237 list_add_tail(&ins_pio->hash_list, head);
238 return 0;
239}
240
2203e914 241static struct pending_io *io_hash_table_search(u64 sector, u32 dev)
9e066e23
CM
242{
243 int slot = hash_sector(sector);
244 struct list_head *head;
245 struct pending_io *pio;
246
247 head = io_hash_table + slot;
248 list_for_each_entry(pio, head, hash_list) {
2203e914 249 if (pio->sector == sector && pio->device == dev)
9e066e23
CM
250 return pio;
251 }
252 return NULL;
253}
254
79d61530 255static struct pending_io *hash_queued_io(struct blk_io_trace *io)
9e066e23
CM
256{
257 struct pending_io *pio;
258 int ret;
259
260 pio = calloc(1, sizeof(*pio));
261 pio->sector = io->sector;
2203e914 262 pio->device = io->device;
0a43b43f 263 pio->pid = io->pid;
9e066e23 264
0a43b43f
JK
265 ret = io_hash_table_insert(pio);
266 if (ret < 0) {
267 /* crud, the IO is there already */
9e066e23 268 free(pio);
79d61530 269 return NULL;
9e066e23 270 }
79d61530 271 return pio;
0a43b43f
JK
272}
273
854a1f24 274static struct pending_io *hash_dispatched_io(struct blk_io_trace *io)
0a43b43f
JK
275{
276 struct pending_io *pio;
277
2203e914 278 pio = io_hash_table_search(io->sector, io->device);
79d61530
JK
279 if (!pio) {
280 pio = hash_queued_io(io);
281 if (!pio)
282 return NULL;
283 }
0a43b43f 284 pio->dispatch_time = io->time;
854a1f24 285 return pio;
9e066e23
CM
286}
287
288static struct pending_io *hash_completed_io(struct blk_io_trace *io)
289{
290 struct pending_io *pio;
291
2203e914 292 pio = io_hash_table_search(io->sector, io->device);
9e066e23
CM
293
294 if (!pio)
295 return NULL;
296 return pio;
297}
298
0a43b43f
JK
299void init_process_hash_table(void)
300{
301 int i;
302 struct list_head *head;
303
304 for (i = 0; i < PROCESS_HASH_TABLE_SIZE; i++) {
305 head = process_hash_table + i;
306 INIT_LIST_HEAD(head);
307 }
308}
309
310static u32 hash_pid(u32 pid)
311{
312 u32 hash = pid;
313
314 hash ^= pid >> 3;
315 hash ^= pid >> 3;
316 hash ^= pid >> 4;
317 hash ^= pid >> 6;
318 return (hash & (PROCESS_HASH_TABLE_SIZE - 1));
319}
320
321static struct pid_map *process_hash_search(u32 pid)
322{
323 int slot = hash_pid(pid);
324 struct list_head *head;
325 struct pid_map *pm;
326
327 head = process_hash_table + slot;
328 list_for_each_entry(pm, head, hash_list) {
329 if (pm->pid == pid)
330 return pm;
331 }
332 return NULL;
333}
334
335static struct pid_map *process_hash_insert(u32 pid, char *name)
336{
337 int slot = hash_pid(pid);
338 struct pid_map *pm;
339 int old_index = 0;
340 char buf[16];
341
342 pm = process_hash_search(pid);
343 if (pm) {
344 /* Entry exists and name shouldn't be changed? */
345 if (!name || !strcmp(name, pm->name))
346 return pm;
347 list_del(&pm->hash_list);
348 old_index = pm->index;
349 free(pm);
350 }
351 if (!name) {
352 sprintf(buf, "[%u]", pid);
353 name = buf;
354 }
355 pm = malloc(sizeof(struct pid_map) + strlen(name) + 1);
356 pm->pid = pid;
357 pm->index = old_index;
358 strcpy(pm->name, name);
359 list_add_tail(&pm->hash_list, process_hash_table + slot);
360
361 return pm;
362}
363
9e066e23
CM
364static void handle_notify(struct trace *trace)
365{
366 struct blk_io_trace *io = trace->io;
367 void *payload = (char *)io + sizeof(*io);
368 u32 two32[2];
369
0a43b43f
JK
370 if (io->action == BLK_TN_PROCESS) {
371 if (io_per_process)
372 process_hash_insert(io->pid, payload);
373 return;
374 }
9e066e23
CM
375
376 if (io->action != BLK_TN_TIMESTAMP)
377 return;
378
379 if (io->pdu_len != sizeof(two32))
380 return;
381
382 memcpy(two32, payload, sizeof(two32));
383 trace->start_timestamp = io->time;
384 trace->abs_start_time.tv_sec = two32[0];
385 trace->abs_start_time.tv_nsec = two32[1];
386 if (trace->abs_start_time.tv_nsec < 0) {
387 trace->abs_start_time.tv_sec--;
388 trace->abs_start_time.tv_nsec += 1000000000;
389 }
390}
391
392int next_record(struct trace *trace)
393{
394 int skip = trace->io->pdu_len;
395 u64 offset;
396
397 trace->cur += sizeof(*trace->io) + skip;
398 offset = trace->cur - trace->start;
399 if (offset >= trace->len)
400 return 1;
401
402 trace->io = (struct blk_io_trace *)trace->cur;
403 return 0;
404}
405
406void first_record(struct trace *trace)
407{
408 trace->cur = trace->start;
409 trace->io = (struct blk_io_trace *)trace->cur;
410}
411
bfb0e441
CM
412int is_io_event(struct blk_io_trace *test)
413{
414 char *message;
415 if (!(test->action & BLK_TC_ACT(BLK_TC_NOTIFY)))
416 return 1;
417 if (test->action == BLK_TN_MESSAGE) {
418 int len = test->pdu_len;
419 if (len < 3)
420 return 0;
421 message = (char *)(test + 1);
422 if (strncmp(message, "fio ", 4) == 0) {
423 return 1;
424 }
425 }
426 return 0;
427}
428
9e066e23
CM
429u64 find_last_time(struct trace *trace)
430{
431 char *p = trace->start + trace->len;
432 struct blk_io_trace *test;
433 int search_len = 0;
434 u64 found = 0;
435
436 if (trace->len < sizeof(*trace->io))
437 return 0;
438 p -= sizeof(*trace->io);
439 while (p >= trace->start) {
440 test = (struct blk_io_trace *)p;
bfb0e441 441 if (CHECK_MAGIC(test) && is_io_event(test)) {
9e066e23
CM
442 u64 offset = p - trace->start;
443 if (offset + sizeof(*test) + test->pdu_len == trace->len) {
444 return test->time;
445 }
446 }
447 p--;
448 search_len++;
449 if (search_len > 8192) {
450 break;
451 }
452 }
453
454 /* searching backwards didn't work out, we'll have to scan the file */
455 first_record(trace);
456 while (1) {
bfb0e441 457 if (is_io_event(trace->io))
9e066e23
CM
458 found = trace->io->time;
459 if (next_record(trace))
460 break;
461 }
462 first_record(trace);
463 return found;
464}
465
bfb0e441
CM
466int parse_fio_bank_message(struct trace *trace, u64 *bank_ret, u64 *offset_ret,
467 u64 *num_banks_ret)
468{
469 char *s;
470 char *next;
471 char *message;
472 struct blk_io_trace *test = trace->io;
473 int len = test->pdu_len;
474 u64 bank;
475 u64 offset;
476 u64 num_banks;
477
478 if (!(test->action & BLK_TC_ACT(BLK_TC_NOTIFY)))
479 return -1;
480 if (test->action != BLK_TN_MESSAGE)
481 return -1;
482
483 /* the message is fio rw bank offset num_banks */
484 if (len < 3)
485 return -1;
486 message = (char *)(test + 1);
487 if (strncmp(message, "fio r ", 6) != 0)
488 return -1;
489
490 message = strndup(message, len);
491 s = strchr(message, ' ');
492 if (!s)
493 goto out;
494 s++;
495 s = strchr(s, ' ');
496 if (!s)
497 goto out;
498
499 bank = strtoll(s, &next, 10);
500 if (s == next)
501 goto out;
502 s = next;
503
504 offset = strtoll(s, &next, 10);
505 if (s == next)
506 goto out;
507 s = next;
508
509 num_banks = strtoll(s, &next, 10);
510 if (s == next)
511 goto out;
512
513 *bank_ret = bank;
514 *offset_ret = offset;
515 *num_banks_ret = num_banks;
516
517 return 0;
518out:
519 free(message);
520 return -1;
521}
522
2203e914
CM
523static struct dev_info *lookup_dev(struct trace *trace, struct blk_io_trace *io)
524{
525 u32 dev = io->device;
526 int i;
527 struct dev_info *di = NULL;
528
529 for (i = 0; i < trace->num_devices; i++) {
530 if (trace->devices[i].device == dev) {
531 di = trace->devices + i;
532 goto found;
533 }
534 }
535 i = trace->num_devices++;
536 if (i >= MAX_DEVICES_PER_TRACE) {
537 fprintf(stderr, "Trace contains too many devices (%d)\n", i);
538 exit(1);
539 }
540 di = trace->devices + i;
541 di->device = dev;
542found:
543 return di;
544}
545
546static void map_devices(struct trace *trace)
547{
548 struct dev_info *di;
549 u64 found;
550 u64 map_start = 0;
551 int i;
552
553 first_record(trace);
554 while (1) {
555 if (!(trace->io->action & BLK_TC_ACT(BLK_TC_NOTIFY))) {
556 di = lookup_dev(trace, trace->io);
557 found = trace->io->sector << 9;
558 if (found < di->min)
559 di->min = found;
560
561 found += trace->io->bytes;
562 if (di->max < found)
563 di->max = found;
564 }
565 if (next_record(trace))
566 break;
567 }
568 first_record(trace);
569 for (i = 0; i < trace->num_devices; i++) {
570 di = trace->devices + i;
571 di->map = map_start;
572 map_start += di->max - di->min;
573 }
574}
575
576u64 map_io(struct trace *trace, struct blk_io_trace *io)
577{
578 struct dev_info *di = lookup_dev(trace, io);
579 u64 val = trace->io->sector << 9;
580 return di->map + val - di->min;
581}
582
9b9fa04b
JK
583void find_extreme_offsets(struct trace *trace, u64 *min_ret, u64 *max_ret, u64 *max_bank_ret,
584 u64 *max_offset_ret)
9e066e23
CM
585{
586 u64 found = 0;
9b9fa04b 587 u64 max = 0, min = ~(u64)0;
bfb0e441
CM
588 u64 max_bank = 0;
589 u64 max_bank_offset = 0;
590 u64 num_banks = 0;
2203e914
CM
591
592 map_devices(trace);
593
9e066e23
CM
594 first_record(trace);
595 while (1) {
596 if (!(trace->io->action & BLK_TC_ACT(BLK_TC_NOTIFY))) {
2203e914 597 found = map_io(trace, trace->io);
9b9fa04b
JK
598 if (found < min)
599 min = found;
9e066e23 600
9b9fa04b
JK
601 found += trace->io->bytes;
602 if (max < found)
9e066e23 603 max = found;
bfb0e441
CM
604 } else {
605 u64 bank;
606 u64 offset;
607 if (!parse_fio_bank_message(trace, &bank,
608 &offset, &num_banks)) {
609 if (bank > max_bank)
610 max_bank = bank;
611 if (offset > max_bank_offset)
612 max_bank_offset = offset;
613 }
9e066e23
CM
614 }
615 if (next_record(trace))
616 break;
617 }
618 first_record(trace);
9b9fa04b 619 *min_ret = min;
bfb0e441
CM
620 *max_ret = max;
621 *max_bank_ret = max_bank;
622 *max_offset_ret = max_bank_offset;
9e066e23
CM
623}
624
854a1f24
CM
625static void check_io_types(struct trace *trace)
626{
627 struct blk_io_trace *io = trace->io;
628 int action = io->action & BLK_TA_MASK;
629
630 if (!(io->action & BLK_TC_ACT(BLK_TC_NOTIFY))) {
631 switch (action) {
632 case __BLK_TA_COMPLETE:
633 trace->found_completion = 1;
634 break;
635 case __BLK_TA_ISSUE:
636 trace->found_issue = 1;
637 break;
638 case __BLK_TA_QUEUE:
639 trace->found_queue = 1;
640 break;
641 };
642 }
643}
644
645
9b9fa04b 646int filter_outliers(struct trace *trace, u64 min_offset, u64 max_offset,
9e066e23
CM
647 u64 *yzoom_min, u64 *yzoom_max)
648{
649 int hits[11];
650 u64 max_per_bucket[11];
9b9fa04b
JK
651 u64 min_per_bucket[11];
652 u64 bytes_per_bucket = (max_offset - min_offset + 1) / 10;
9e066e23
CM
653 int slot;
654 int fat_count = 0;
655
656 memset(hits, 0, sizeof(int) * 11);
657 memset(max_per_bucket, 0, sizeof(u64) * 11);
9b9fa04b 658 memset(min_per_bucket, 0xff, sizeof(u64) * 11);
9e066e23
CM
659 first_record(trace);
660 while (1) {
854a1f24 661 check_io_types(trace);
41fdf407
JK
662 if (!(trace->io->action & BLK_TC_ACT(BLK_TC_NOTIFY)) &&
663 (trace->io->action & BLK_TA_MASK) == __BLK_TA_QUEUE) {
2203e914 664 u64 off = map_io(trace, trace->io) - min_offset;
9b9fa04b
JK
665
666 slot = (int)(off / bytes_per_bucket);
667 hits[slot]++;
668 if (off < min_per_bucket[slot])
669 min_per_bucket[slot] = off;
670
854a1f24 671 off += trace->io->bytes;
9b9fa04b 672 slot = (int)(off / bytes_per_bucket);
9e066e23 673 hits[slot]++;
9b9fa04b
JK
674 if (off > max_per_bucket[slot])
675 max_per_bucket[slot] = off;
9e066e23
CM
676 }
677 if (next_record(trace))
678 break;
679 }
680 first_record(trace);
681 for (slot = 0; slot < 11; slot++) {
682 if (hits[slot] > fat_count) {
683 fat_count = hits[slot];
684 }
685 }
686
687 *yzoom_max = max_offset;
688 for (slot = 10; slot >= 0; slot--) {
689 double d = hits[slot];
690
691 if (d >= (double)fat_count * .05) {
9b9fa04b 692 *yzoom_max = max_per_bucket[slot] + min_offset;
9e066e23
CM
693 break;
694 }
695 }
696
9b9fa04b 697 *yzoom_min = min_offset;
9e066e23
CM
698 for (slot = 0; slot < 10; slot++) {
699 double d = hits[slot];
700
701 if (d >= (double)fat_count * .05) {
9b9fa04b 702 *yzoom_min = min_per_bucket[slot] + min_offset;
9e066e23
CM
703 break;
704 }
705 }
706 return 0;
707}
708
2203e914
CM
709static char footer[] = ".blktrace.0";
710static int footer_len = sizeof(footer);
711
712static void match_trace(char *name, char **traces)
713{
714 int match_len;
715 char *match;
716 int footer_start;
717
718 match_len = strlen(name);
719 if (match_len <= footer_len)
720 return;
721
722 footer_start = match_len - footer_len;
723 if (strcmp(name + footer_start + 1, footer) != 0)
724 return;
725
726 match = strdup(name);
727 if (!match)
728 goto enomem;
729
730 match[footer_start + 1] = '\0';
731 snprintf(line, line_len, "%s -i '%s'", *traces ? *traces : "", match);
732 free(match);
733
734 match = strdup(line);
735 if (!match)
736 goto enomem;
737
738 free(*traces);
739 *traces = match;
740 return;
741
742enomem:
743 perror("memory allocation failed");
744 exit(1);
745 return;
746}
747
748static char *combine_blktrace_devs(char *dir_name)
749{
750 DIR *dir;
751 char *traces = NULL;
752 struct dirent *d;
753 int len;
754 int ret;
755
756 dir = opendir(dir_name);
757 if (!dir)
758 return NULL;
759
760 while (1) {
761 d = readdir(dir);
762 if (!d)
763 break;
764
765 len = strlen(d->d_name);
766 if (len > footer_len)
767 match_trace(d->d_name, &traces);
768 }
769
770 closedir(dir);
771
772 if (!traces)
773 return NULL;
774
775 snprintf(line, line_len, "blkparse -O %s -D %s -d '%s.%s'",
776 traces, dir_name, dir_name, "dump");
777
778 ret = system(line);
779 if (ret) {
780 fprintf(stderr, "blkparse failure %s\n", line);
781 exit(1);
782 }
783 snprintf(line, line_len, "%s.%s", dir_name, "dump");
784 return strdup(line);
785}
786
9e066e23
CM
787static char *find_trace_file(char *filename)
788{
789 int ret;
790 struct stat st;
9e066e23
CM
791 char *dot;
792 char *try;
2203e914 793 int found_dir = 0;
9e066e23 794
2203e914
CM
795 /* look for an exact match of whatever they pass in.
796 * If it is a file, assume it is the dump file.
797 * If a directory, remember that it existed so we
798 * can combine traces in that directory later
799 */
9e066e23 800 ret = stat(filename, &st);
2203e914
CM
801 if (ret == 0) {
802 if (S_ISREG(st.st_mode))
803 return strdup(filename);
804
805 if (S_ISDIR(st.st_mode))
806 found_dir = 1;
807 }
9e066e23 808
2203e914
CM
809 /*
810 * try tacking .dump onto the end and see if that already
811 * has been generated
812 */
813 snprintf(line, line_len, "%s.%s", filename, "dump");
e199d546 814 ret = stat(line, &st);
9e066e23
CM
815 if (ret == 0)
816 return strdup(line);
817
2203e914
CM
818 /*
819 * try to generate the .dump from all the traces in
820 * a single dir.
821 */
822 if (found_dir) {
823 try = combine_blktrace_devs(filename);
824 if (try)
825 return try;
826 }
827
828 /*
829 * try to generate the .dump from all the blktrace
830 * files for a named trace
831 */
9e066e23
CM
832 try = strdup(filename);
833 dot = strrchr(try, '.');
834 if (!dot || strcmp(".dump", dot) != 0) {
e95ba659 835 if (dot && dot != try)
9e066e23 836 *dot = '\0';
2203e914 837 snprintf(line, line_len, "%s%s", try, ".blktrace.0");
9e066e23
CM
838 ret = stat(line, &st);
839 if (ret == 0) {
840 blktrace_to_dump(try);
2203e914 841 snprintf(line, line_len, "%s.%s", try, "dump");
9e066e23
CM
842 ret = stat(line, &st);
843 if (ret == 0) {
844 free(try);
845 return strdup(line);
846 }
847 }
848 }
849 free(try);
850 return NULL;
851}
852struct trace *open_trace(char *filename)
853{
854 int fd;
855 char *p;
856 struct stat st;
857 int ret;
858 struct trace *trace;
859 char *found_filename;
860
861 trace = calloc(1, sizeof(*trace));
862 if (!trace) {
863 fprintf(stderr, "unable to allocate memory for trace\n");
864 return NULL;
865 }
866
867 found_filename = find_trace_file(filename);
868 if (!found_filename) {
869 fprintf(stderr, "Unable to find trace file %s\n", filename);
870 goto fail;
871 }
9e066e23
CM
872 filename = found_filename;
873
874 fd = open(filename, O_RDONLY);
875 if (fd < 0) {
876 fprintf(stderr, "Unable to open trace file %s err %s\n", filename, strerror(errno));
877 goto fail;
878 }
879 ret = fstat(fd, &st);
880 if (ret < 0) {
881 fprintf(stderr, "stat failed on %s err %s\n", filename, strerror(errno));
882 goto fail_fd;
883 }
884 p = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
885 if (p == MAP_FAILED) {
886 fprintf(stderr, "Unable to mmap trace file %s, err %s\n", filename, strerror(errno));
887 goto fail_fd;
888 }
889 trace->fd = fd;
890 trace->len = st.st_size;
891 trace->start = p;
892 trace->cur = p;
893 trace->io = (struct blk_io_trace *)p;
894 return trace;
895
896fail_fd:
897 close(fd);
898fail:
899 free(trace);
900 return NULL;
901}
902static inline int tput_event(struct trace *trace)
903{
904 if (trace->found_completion)
905 return __BLK_TA_COMPLETE;
906 if (trace->found_issue)
907 return __BLK_TA_ISSUE;
908 if (trace->found_queue)
909 return __BLK_TA_QUEUE;
910
911 return __BLK_TA_COMPLETE;
912}
913
f2e40ddd
JK
914int action_char_to_num(char action)
915{
916 switch (action) {
917 case 'Q':
918 return __BLK_TA_QUEUE;
919 case 'D':
920 return __BLK_TA_ISSUE;
921 case 'C':
922 return __BLK_TA_COMPLETE;
923 }
924 return -1;
925}
926
9e066e23
CM
927static inline int io_event(struct trace *trace)
928{
f2e40ddd
JK
929 if (plot_io_action)
930 return plot_io_action;
9e066e23
CM
931 if (trace->found_queue)
932 return __BLK_TA_QUEUE;
933 if (trace->found_issue)
934 return __BLK_TA_ISSUE;
935 if (trace->found_completion)
936 return __BLK_TA_COMPLETE;
937
938 return __BLK_TA_COMPLETE;
939}
940
2203e914
CM
941void add_tput(struct trace *trace, struct graph_line_data *writes_gld,
942 struct graph_line_data *reads_gld)
9e066e23
CM
943{
944 struct blk_io_trace *io = trace->io;
2203e914 945 struct graph_line_data *gld;
1582ecc9 946 int action = io->action & BLK_TA_MASK;
9e066e23
CM
947 int seconds;
948
949 if (io->action & BLK_TC_ACT(BLK_TC_NOTIFY))
950 return;
951
952 if (action != tput_event(trace))
953 return;
954
2203e914
CM
955 if (BLK_DATADIR(io->action) & BLK_TC_READ)
956 gld = reads_gld;
957 else
958 gld = writes_gld;
959
9e066e23 960 seconds = SECONDS(io->time);
9e066e23 961 gld->data[seconds].sum += io->bytes;
2203e914 962
9e066e23
CM
963 gld->data[seconds].count = 1;
964 if (gld->data[seconds].sum > gld->max)
965 gld->max = gld->data[seconds].sum;
966}
967
0a43b43f
JK
968#define GDD_PTR_ALLOC_STEP 16
969
970static struct pid_map *get_pid_map(struct trace_file *tf, u32 pid)
971{
972 struct pid_map *pm;
973
974 if (!io_per_process) {
975 if (!tf->io_plots)
976 tf->io_plots = 1;
977 return NULL;
978 }
979
980 pm = process_hash_insert(pid, NULL);
981 /* New entry? */
982 if (!pm->index) {
983 if (tf->io_plots == tf->io_plots_allocated) {
984 tf->io_plots_allocated += GDD_PTR_ALLOC_STEP;
985 tf->gdd_reads = realloc(tf->gdd_reads, tf->io_plots_allocated * sizeof(struct graph_dot_data *));
986 if (!tf->gdd_reads)
987 abort();
988 tf->gdd_writes = realloc(tf->gdd_writes, tf->io_plots_allocated * sizeof(struct graph_dot_data *));
989 if (!tf->gdd_writes)
990 abort();
991 memset(tf->gdd_reads + tf->io_plots_allocated - GDD_PTR_ALLOC_STEP,
992 0, GDD_PTR_ALLOC_STEP * sizeof(struct graph_dot_data *));
993 memset(tf->gdd_writes + tf->io_plots_allocated - GDD_PTR_ALLOC_STEP,
994 0, GDD_PTR_ALLOC_STEP * sizeof(struct graph_dot_data *));
995 }
996 pm->index = tf->io_plots++;
997
998 return pm;
999 }
1000 return pm;
1001}
1002
1003void add_io(struct trace *trace, struct trace_file *tf)
9e066e23
CM
1004{
1005 struct blk_io_trace *io = trace->io;
1582ecc9 1006 int action = io->action & BLK_TA_MASK;
9e066e23 1007 u64 offset;
0a43b43f
JK
1008 int index;
1009 char *label;
1010 struct pid_map *pm;
9e066e23
CM
1011
1012 if (io->action & BLK_TC_ACT(BLK_TC_NOTIFY))
1013 return;
1014
1015 if (action != io_event(trace))
1016 return;
1017
2203e914 1018 offset = map_io(trace, io);
9e066e23 1019
0a43b43f
JK
1020 pm = get_pid_map(tf, io->pid);
1021 if (!pm) {
1022 index = 0;
1023 label = "";
1024 } else {
1025 index = pm->index;
1026 label = pm->name;
1027 }
1028 if (BLK_DATADIR(io->action) & BLK_TC_READ) {
1029 if (!tf->gdd_reads[index])
1030 tf->gdd_reads[index] = alloc_dot_data(tf->min_seconds, tf->max_seconds, tf->min_offset, tf->max_offset, tf->stop_seconds, pick_color(), strdup(label));
1031 set_gdd_bit(tf->gdd_reads[index], offset, io->bytes, io->time);
1032 } else if (BLK_DATADIR(io->action) & BLK_TC_WRITE) {
1033 if (!tf->gdd_writes[index])
1034 tf->gdd_writes[index] = alloc_dot_data(tf->min_seconds, tf->max_seconds, tf->min_offset, tf->max_offset, tf->stop_seconds, pick_color(), strdup(label));
1035 set_gdd_bit(tf->gdd_writes[index], offset, io->bytes, io->time);
1036 }
9e066e23
CM
1037}
1038
1039void add_pending_io(struct trace *trace, struct graph_line_data *gld)
1040{
22dd439a 1041 unsigned int seconds;
9e066e23 1042 struct blk_io_trace *io = trace->io;
1582ecc9 1043 int action = io->action & BLK_TA_MASK;
9e066e23 1044 double avg;
854a1f24 1045 struct pending_io *pio;
9e066e23
CM
1046
1047 if (io->action & BLK_TC_ACT(BLK_TC_NOTIFY))
1048 return;
1049
0a43b43f 1050 if (action == __BLK_TA_QUEUE) {
854a1f24
CM
1051 if (trace->found_issue || trace->found_completion)
1052 hash_queued_io(trace->io);
0a43b43f
JK
1053 return;
1054 }
9e066e23
CM
1055 if (action != __BLK_TA_ISSUE)
1056 return;
1057
854a1f24
CM
1058 pio = hash_dispatched_io(trace->io);
1059 if (!pio)
9e066e23
CM
1060 return;
1061
854a1f24
CM
1062 if (!trace->found_completion) {
1063 list_del(&pio->hash_list);
1064 free(pio);
1065 }
1066
9e066e23
CM
1067 ios_in_flight++;
1068
6a079b02 1069 seconds = SECONDS(io->time);
9e066e23
CM
1070 gld->data[seconds].sum += ios_in_flight;
1071 gld->data[seconds].count++;
1072
1073 avg = (double)gld->data[seconds].sum / gld->data[seconds].count;
1074 if (gld->max < (u64)avg) {
1075 gld->max = avg;
1076 }
1077}
1078
1079void add_completed_io(struct trace *trace,
1080 struct graph_line_data *latency_gld)
1081{
1082 struct blk_io_trace *io = trace->io;
1083 int seconds;
1582ecc9 1084 int action = io->action & BLK_TA_MASK;
9e066e23
CM
1085 struct pending_io *pio;
1086 double avg;
1087 u64 latency;
1088
1089 if (io->action & BLK_TC_ACT(BLK_TC_NOTIFY))
1090 return;
1091
1092 if (action != __BLK_TA_COMPLETE)
1093 return;
1094
1095 seconds = SECONDS(io->time);
1096
1097 pio = hash_completed_io(trace->io);
1098 if (!pio)
1099 return;
1100
1101 if (ios_in_flight > 0)
1102 ios_in_flight--;
1103 if (io->time >= pio->dispatch_time) {
1104 latency = io->time - pio->dispatch_time;
1105 latency_gld->data[seconds].sum += latency;
1106 latency_gld->data[seconds].count++;
1107 }
1108
1109 list_del(&pio->hash_list);
1110 free(pio);
1111
1112 avg = (double)latency_gld->data[seconds].sum /
1113 latency_gld->data[seconds].count;
1114 if (latency_gld->max < (u64)avg) {
1115 latency_gld->max = avg;
1116 }
1117}
1118
1119void add_iop(struct trace *trace, struct graph_line_data *gld)
1120{
1121 struct blk_io_trace *io = trace->io;
1582ecc9 1122 int action = io->action & BLK_TA_MASK;
9e066e23
CM
1123 int seconds;
1124
1125 if (io->action & BLK_TC_ACT(BLK_TC_NOTIFY))
1126 return;
1127
1128 /* iops and tput use the same events */
1129 if (action != tput_event(trace))
1130 return;
1131
1132 seconds = SECONDS(io->time);
9e066e23
CM
1133 gld->data[seconds].sum += 1;
1134 gld->data[seconds].count = 1;
1135 if (gld->data[seconds].sum > gld->max)
1136 gld->max = gld->data[seconds].sum;
1137}
1138
1139void check_record(struct trace *trace)
1140{
9e066e23
CM
1141 handle_notify(trace);
1142}