iowatcher: Add bounds checking in find_step
[blktrace.git] / iowatcher / blkparse.c
CommitLineData
9e066e23
CM
1/*
2 * Copyright (C) 2012 Fusion-io
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
660b0411 15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
9e066e23
CM
16 *
17 * Parts of this file were imported from Jens Axboe's blktrace sources (also GPL)
18 */
19#include <sys/types.h>
20#include <sys/stat.h>
21#include <fcntl.h>
22#include <unistd.h>
23#include <stdlib.h>
24#include <stdio.h>
25#include <math.h>
26#include <inttypes.h>
27#include <string.h>
28#include <asm/types.h>
29#include <errno.h>
30#include <sys/mman.h>
31#include <time.h>
32#include <math.h>
2203e914 33#include <dirent.h>
9e066e23
CM
34
35#include "plot.h"
36#include "blkparse.h"
37#include "list.h"
38#include "tracers.h"
39
40#define IO_HASH_TABLE_BITS 11
41#define IO_HASH_TABLE_SIZE (1 << IO_HASH_TABLE_BITS)
42static struct list_head io_hash_table[IO_HASH_TABLE_SIZE];
43static u64 ios_in_flight = 0;
44
0a43b43f
JK
45#define PROCESS_HASH_TABLE_BITS 7
46#define PROCESS_HASH_TABLE_SIZE (1 << PROCESS_HASH_TABLE_BITS)
47static struct list_head process_hash_table[PROCESS_HASH_TABLE_SIZE];
48
f2e40ddd 49extern int plot_io_action;
0a43b43f 50extern int io_per_process;
9e066e23 51
2203e914
CM
52static const int line_len = 1024;
53static char line[1024];
54
9e066e23
CM
55/*
56 * Trace categories
57 */
58enum {
59 BLK_TC_READ = 1 << 0, /* reads */
60 BLK_TC_WRITE = 1 << 1, /* writes */
61 BLK_TC_FLUSH = 1 << 2, /* flush */
62 BLK_TC_SYNC = 1 << 3, /* sync */
63 BLK_TC_QUEUE = 1 << 4, /* queueing/merging */
64 BLK_TC_REQUEUE = 1 << 5, /* requeueing */
65 BLK_TC_ISSUE = 1 << 6, /* issue */
66 BLK_TC_COMPLETE = 1 << 7, /* completions */
67 BLK_TC_FS = 1 << 8, /* fs requests */
68 BLK_TC_PC = 1 << 9, /* pc requests */
69 BLK_TC_NOTIFY = 1 << 10, /* special message */
70 BLK_TC_AHEAD = 1 << 11, /* readahead */
71 BLK_TC_META = 1 << 12, /* metadata */
72 BLK_TC_DISCARD = 1 << 13, /* discard requests */
73 BLK_TC_DRV_DATA = 1 << 14, /* binary driver data */
74 BLK_TC_FUA = 1 << 15, /* fua requests */
75
76 BLK_TC_END = 1 << 15, /* we've run out of bits! */
77};
78
79#define BLK_TC_SHIFT (16)
80#define BLK_TC_ACT(act) ((act) << BLK_TC_SHIFT)
81#define BLK_DATADIR(a) (((a) >> BLK_TC_SHIFT) & (BLK_TC_READ | BLK_TC_WRITE))
82
83/*
84 * Basic trace actions
85 */
86enum {
87 __BLK_TA_QUEUE = 1, /* queued */
88 __BLK_TA_BACKMERGE, /* back merged to existing rq */
89 __BLK_TA_FRONTMERGE, /* front merge to existing rq */
90 __BLK_TA_GETRQ, /* allocated new request */
91 __BLK_TA_SLEEPRQ, /* sleeping on rq allocation */
92 __BLK_TA_REQUEUE, /* request requeued */
93 __BLK_TA_ISSUE, /* sent to driver */
94 __BLK_TA_COMPLETE, /* completed by driver */
95 __BLK_TA_PLUG, /* queue was plugged */
96 __BLK_TA_UNPLUG_IO, /* queue was unplugged by io */
97 __BLK_TA_UNPLUG_TIMER, /* queue was unplugged by timer */
98 __BLK_TA_INSERT, /* insert request */
99 __BLK_TA_SPLIT, /* bio was split */
100 __BLK_TA_BOUNCE, /* bio was bounced */
101 __BLK_TA_REMAP, /* bio was remapped */
102 __BLK_TA_ABORT, /* request aborted */
103 __BLK_TA_DRV_DATA, /* binary driver data */
104};
105
1582ecc9
JK
106#define BLK_TA_MASK ((1 << BLK_TC_SHIFT) - 1)
107
9e066e23
CM
108/*
109 * Notify events.
110 */
111enum blktrace_notify {
112 __BLK_TN_PROCESS = 0, /* establish pid/name mapping */
113 __BLK_TN_TIMESTAMP, /* include system clock */
114 __BLK_TN_MESSAGE, /* Character string message */
115};
116
117/*
118 * Trace actions in full. Additionally, read or write is masked
119 */
120#define BLK_TA_QUEUE (__BLK_TA_QUEUE | BLK_TC_ACT(BLK_TC_QUEUE))
121#define BLK_TA_BACKMERGE (__BLK_TA_BACKMERGE | BLK_TC_ACT(BLK_TC_QUEUE))
122#define BLK_TA_FRONTMERGE (__BLK_TA_FRONTMERGE | BLK_TC_ACT(BLK_TC_QUEUE))
123#define BLK_TA_GETRQ (__BLK_TA_GETRQ | BLK_TC_ACT(BLK_TC_QUEUE))
124#define BLK_TA_SLEEPRQ (__BLK_TA_SLEEPRQ | BLK_TC_ACT(BLK_TC_QUEUE))
125#define BLK_TA_REQUEUE (__BLK_TA_REQUEUE | BLK_TC_ACT(BLK_TC_REQUEUE))
126#define BLK_TA_ISSUE (__BLK_TA_ISSUE | BLK_TC_ACT(BLK_TC_ISSUE))
127#define BLK_TA_COMPLETE (__BLK_TA_COMPLETE| BLK_TC_ACT(BLK_TC_COMPLETE))
128#define BLK_TA_PLUG (__BLK_TA_PLUG | BLK_TC_ACT(BLK_TC_QUEUE))
129#define BLK_TA_UNPLUG_IO (__BLK_TA_UNPLUG_IO | BLK_TC_ACT(BLK_TC_QUEUE))
130#define BLK_TA_UNPLUG_TIMER (__BLK_TA_UNPLUG_TIMER | BLK_TC_ACT(BLK_TC_QUEUE))
131#define BLK_TA_INSERT (__BLK_TA_INSERT | BLK_TC_ACT(BLK_TC_QUEUE))
132#define BLK_TA_SPLIT (__BLK_TA_SPLIT)
133#define BLK_TA_BOUNCE (__BLK_TA_BOUNCE)
134#define BLK_TA_REMAP (__BLK_TA_REMAP | BLK_TC_ACT(BLK_TC_QUEUE))
135#define BLK_TA_ABORT (__BLK_TA_ABORT | BLK_TC_ACT(BLK_TC_QUEUE))
136#define BLK_TA_DRV_DATA (__BLK_TA_DRV_DATA | BLK_TC_ACT(BLK_TC_DRV_DATA))
137
138#define BLK_TN_PROCESS (__BLK_TN_PROCESS | BLK_TC_ACT(BLK_TC_NOTIFY))
139#define BLK_TN_TIMESTAMP (__BLK_TN_TIMESTAMP | BLK_TC_ACT(BLK_TC_NOTIFY))
140#define BLK_TN_MESSAGE (__BLK_TN_MESSAGE | BLK_TC_ACT(BLK_TC_NOTIFY))
141
142#define BLK_IO_TRACE_MAGIC 0x65617400
143#define BLK_IO_TRACE_VERSION 0x07
144/*
145 * The trace itself
146 */
147struct blk_io_trace {
148 __u32 magic; /* MAGIC << 8 | version */
149 __u32 sequence; /* event number */
150 __u64 time; /* in nanoseconds */
151 __u64 sector; /* disk offset */
152 __u32 bytes; /* transfer length */
153 __u32 action; /* what happened */
154 __u32 pid; /* who did it */
155 __u32 device; /* device identifier (dev_t) */
156 __u32 cpu; /* on what cpu did it happen */
157 __u16 error; /* completion error */
158 __u16 pdu_len; /* length of data after this trace */
159};
160
161struct pending_io {
162 /* sector offset of this IO */
163 u64 sector;
164
2203e914
CM
165 /* dev_t for this IO */
166 u32 device;
167
9e066e23
CM
168 /* time this IO was dispatched */
169 u64 dispatch_time;
170 /* time this IO was finished */
171 u64 completion_time;
172 struct list_head hash_list;
0a43b43f
JK
173 /* process which queued this IO */
174 u32 pid;
175};
176
177struct pid_map {
178 struct list_head hash_list;
179 u32 pid;
180 int index;
181 char name[0];
9e066e23
CM
182};
183
184#define MINORBITS 20
185#define MINORMASK ((1 << MINORBITS) - 1)
186#define SECONDS(x) ((unsigned long long)(x) / 1000000000)
187#define NANO_SECONDS(x) ((unsigned long long)(x) % 1000000000)
188#define DOUBLE_TO_NANO_ULL(d) ((unsigned long long)((d) * 1000000000))
189#define CHECK_MAGIC(t) (((t)->magic & 0xffffff00) == BLK_IO_TRACE_MAGIC)
190
191void init_io_hash_table(void)
192{
193 int i;
194 struct list_head *head;
195
196 for (i = 0; i < IO_HASH_TABLE_SIZE; i++) {
197 head = io_hash_table + i;
198 INIT_LIST_HEAD(head);
199 }
200}
201
202/* taken from the kernel hash.h */
203static inline u64 hash_sector(u64 val)
204{
205 u64 hash = val;
206
207 /* Sigh, gcc can't optimise this alone like it does for 32 bits. */
208 u64 n = hash;
209 n <<= 18;
210 hash -= n;
211 n <<= 33;
212 hash -= n;
213 n <<= 3;
214 hash += n;
215 n <<= 3;
216 hash -= n;
217 n <<= 4;
218 hash += n;
219 n <<= 2;
220 hash += n;
221
222 /* High bits are more random, so use them. */
223 return hash >> (64 - IO_HASH_TABLE_BITS);
224}
225
0a43b43f 226static int io_hash_table_insert(struct pending_io *ins_pio)
9e066e23
CM
227{
228 u64 sector = ins_pio->sector;
2203e914 229 u32 dev = ins_pio->device;
9e066e23
CM
230 int slot = hash_sector(sector);
231 struct list_head *head;
232 struct pending_io *pio;
233
234 head = io_hash_table + slot;
235 list_for_each_entry(pio, head, hash_list) {
2203e914 236 if (pio->sector == sector && pio->device == dev)
9e066e23
CM
237 return -EEXIST;
238 }
239 list_add_tail(&ins_pio->hash_list, head);
240 return 0;
241}
242
2203e914 243static struct pending_io *io_hash_table_search(u64 sector, u32 dev)
9e066e23
CM
244{
245 int slot = hash_sector(sector);
246 struct list_head *head;
247 struct pending_io *pio;
248
249 head = io_hash_table + slot;
250 list_for_each_entry(pio, head, hash_list) {
2203e914 251 if (pio->sector == sector && pio->device == dev)
9e066e23
CM
252 return pio;
253 }
254 return NULL;
255}
256
79d61530 257static struct pending_io *hash_queued_io(struct blk_io_trace *io)
9e066e23
CM
258{
259 struct pending_io *pio;
260 int ret;
261
262 pio = calloc(1, sizeof(*pio));
263 pio->sector = io->sector;
2203e914 264 pio->device = io->device;
0a43b43f 265 pio->pid = io->pid;
9e066e23 266
0a43b43f
JK
267 ret = io_hash_table_insert(pio);
268 if (ret < 0) {
269 /* crud, the IO is there already */
9e066e23 270 free(pio);
79d61530 271 return NULL;
9e066e23 272 }
79d61530 273 return pio;
0a43b43f
JK
274}
275
854a1f24 276static struct pending_io *hash_dispatched_io(struct blk_io_trace *io)
0a43b43f
JK
277{
278 struct pending_io *pio;
279
2203e914 280 pio = io_hash_table_search(io->sector, io->device);
79d61530
JK
281 if (!pio) {
282 pio = hash_queued_io(io);
283 if (!pio)
284 return NULL;
285 }
0a43b43f 286 pio->dispatch_time = io->time;
854a1f24 287 return pio;
9e066e23
CM
288}
289
290static struct pending_io *hash_completed_io(struct blk_io_trace *io)
291{
292 struct pending_io *pio;
293
2203e914 294 pio = io_hash_table_search(io->sector, io->device);
9e066e23
CM
295
296 if (!pio)
297 return NULL;
298 return pio;
299}
300
0a43b43f
JK
301void init_process_hash_table(void)
302{
303 int i;
304 struct list_head *head;
305
306 for (i = 0; i < PROCESS_HASH_TABLE_SIZE; i++) {
307 head = process_hash_table + i;
308 INIT_LIST_HEAD(head);
309 }
310}
311
312static u32 hash_pid(u32 pid)
313{
314 u32 hash = pid;
315
316 hash ^= pid >> 3;
317 hash ^= pid >> 3;
318 hash ^= pid >> 4;
319 hash ^= pid >> 6;
320 return (hash & (PROCESS_HASH_TABLE_SIZE - 1));
321}
322
323static struct pid_map *process_hash_search(u32 pid)
324{
325 int slot = hash_pid(pid);
326 struct list_head *head;
327 struct pid_map *pm;
328
329 head = process_hash_table + slot;
330 list_for_each_entry(pm, head, hash_list) {
331 if (pm->pid == pid)
332 return pm;
333 }
334 return NULL;
335}
336
337static struct pid_map *process_hash_insert(u32 pid, char *name)
338{
339 int slot = hash_pid(pid);
340 struct pid_map *pm;
341 int old_index = 0;
342 char buf[16];
343
344 pm = process_hash_search(pid);
345 if (pm) {
346 /* Entry exists and name shouldn't be changed? */
347 if (!name || !strcmp(name, pm->name))
348 return pm;
349 list_del(&pm->hash_list);
350 old_index = pm->index;
351 free(pm);
352 }
353 if (!name) {
354 sprintf(buf, "[%u]", pid);
355 name = buf;
356 }
357 pm = malloc(sizeof(struct pid_map) + strlen(name) + 1);
358 pm->pid = pid;
359 pm->index = old_index;
360 strcpy(pm->name, name);
361 list_add_tail(&pm->hash_list, process_hash_table + slot);
362
363 return pm;
364}
365
9e066e23
CM
366static void handle_notify(struct trace *trace)
367{
368 struct blk_io_trace *io = trace->io;
369 void *payload = (char *)io + sizeof(*io);
370 u32 two32[2];
371
0a43b43f
JK
372 if (io->action == BLK_TN_PROCESS) {
373 if (io_per_process)
374 process_hash_insert(io->pid, payload);
375 return;
376 }
9e066e23
CM
377
378 if (io->action != BLK_TN_TIMESTAMP)
379 return;
380
381 if (io->pdu_len != sizeof(two32))
382 return;
383
384 memcpy(two32, payload, sizeof(two32));
385 trace->start_timestamp = io->time;
386 trace->abs_start_time.tv_sec = two32[0];
387 trace->abs_start_time.tv_nsec = two32[1];
388 if (trace->abs_start_time.tv_nsec < 0) {
389 trace->abs_start_time.tv_sec--;
390 trace->abs_start_time.tv_nsec += 1000000000;
391 }
392}
393
394int next_record(struct trace *trace)
395{
396 int skip = trace->io->pdu_len;
397 u64 offset;
398
399 trace->cur += sizeof(*trace->io) + skip;
400 offset = trace->cur - trace->start;
401 if (offset >= trace->len)
402 return 1;
403
404 trace->io = (struct blk_io_trace *)trace->cur;
405 return 0;
406}
407
408void first_record(struct trace *trace)
409{
410 trace->cur = trace->start;
411 trace->io = (struct blk_io_trace *)trace->cur;
412}
413
bfb0e441
CM
414int is_io_event(struct blk_io_trace *test)
415{
416 char *message;
417 if (!(test->action & BLK_TC_ACT(BLK_TC_NOTIFY)))
418 return 1;
419 if (test->action == BLK_TN_MESSAGE) {
420 int len = test->pdu_len;
421 if (len < 3)
422 return 0;
423 message = (char *)(test + 1);
424 if (strncmp(message, "fio ", 4) == 0) {
425 return 1;
426 }
427 }
428 return 0;
429}
430
9e066e23
CM
431u64 find_last_time(struct trace *trace)
432{
433 char *p = trace->start + trace->len;
434 struct blk_io_trace *test;
435 int search_len = 0;
436 u64 found = 0;
437
438 if (trace->len < sizeof(*trace->io))
439 return 0;
440 p -= sizeof(*trace->io);
441 while (p >= trace->start) {
442 test = (struct blk_io_trace *)p;
bfb0e441 443 if (CHECK_MAGIC(test) && is_io_event(test)) {
9e066e23
CM
444 u64 offset = p - trace->start;
445 if (offset + sizeof(*test) + test->pdu_len == trace->len) {
446 return test->time;
447 }
448 }
449 p--;
450 search_len++;
451 if (search_len > 8192) {
452 break;
453 }
454 }
455
456 /* searching backwards didn't work out, we'll have to scan the file */
457 first_record(trace);
458 while (1) {
bfb0e441 459 if (is_io_event(trace->io))
9e066e23
CM
460 found = trace->io->time;
461 if (next_record(trace))
462 break;
463 }
464 first_record(trace);
465 return found;
466}
467
bfb0e441
CM
468int parse_fio_bank_message(struct trace *trace, u64 *bank_ret, u64 *offset_ret,
469 u64 *num_banks_ret)
470{
471 char *s;
472 char *next;
473 char *message;
474 struct blk_io_trace *test = trace->io;
475 int len = test->pdu_len;
476 u64 bank;
477 u64 offset;
478 u64 num_banks;
479
480 if (!(test->action & BLK_TC_ACT(BLK_TC_NOTIFY)))
481 return -1;
482 if (test->action != BLK_TN_MESSAGE)
483 return -1;
484
485 /* the message is fio rw bank offset num_banks */
486 if (len < 3)
487 return -1;
488 message = (char *)(test + 1);
489 if (strncmp(message, "fio r ", 6) != 0)
490 return -1;
491
492 message = strndup(message, len);
493 s = strchr(message, ' ');
494 if (!s)
495 goto out;
496 s++;
497 s = strchr(s, ' ');
498 if (!s)
499 goto out;
500
501 bank = strtoll(s, &next, 10);
502 if (s == next)
503 goto out;
504 s = next;
505
506 offset = strtoll(s, &next, 10);
507 if (s == next)
508 goto out;
509 s = next;
510
511 num_banks = strtoll(s, &next, 10);
512 if (s == next)
513 goto out;
514
515 *bank_ret = bank;
516 *offset_ret = offset;
517 *num_banks_ret = num_banks;
518
519 return 0;
520out:
521 free(message);
522 return -1;
523}
524
2203e914
CM
525static struct dev_info *lookup_dev(struct trace *trace, struct blk_io_trace *io)
526{
527 u32 dev = io->device;
528 int i;
529 struct dev_info *di = NULL;
530
531 for (i = 0; i < trace->num_devices; i++) {
532 if (trace->devices[i].device == dev) {
533 di = trace->devices + i;
534 goto found;
535 }
536 }
537 i = trace->num_devices++;
538 if (i >= MAX_DEVICES_PER_TRACE) {
539 fprintf(stderr, "Trace contains too many devices (%d)\n", i);
540 exit(1);
541 }
542 di = trace->devices + i;
543 di->device = dev;
544found:
545 return di;
546}
547
548static void map_devices(struct trace *trace)
549{
550 struct dev_info *di;
551 u64 found;
552 u64 map_start = 0;
553 int i;
554
555 first_record(trace);
556 while (1) {
557 if (!(trace->io->action & BLK_TC_ACT(BLK_TC_NOTIFY))) {
558 di = lookup_dev(trace, trace->io);
559 found = trace->io->sector << 9;
560 if (found < di->min)
561 di->min = found;
562
563 found += trace->io->bytes;
564 if (di->max < found)
565 di->max = found;
566 }
567 if (next_record(trace))
568 break;
569 }
570 first_record(trace);
571 for (i = 0; i < trace->num_devices; i++) {
572 di = trace->devices + i;
573 di->map = map_start;
574 map_start += di->max - di->min;
575 }
576}
577
578u64 map_io(struct trace *trace, struct blk_io_trace *io)
579{
580 struct dev_info *di = lookup_dev(trace, io);
581 u64 val = trace->io->sector << 9;
582 return di->map + val - di->min;
583}
584
9b9fa04b
JK
585void find_extreme_offsets(struct trace *trace, u64 *min_ret, u64 *max_ret, u64 *max_bank_ret,
586 u64 *max_offset_ret)
9e066e23
CM
587{
588 u64 found = 0;
9b9fa04b 589 u64 max = 0, min = ~(u64)0;
bfb0e441
CM
590 u64 max_bank = 0;
591 u64 max_bank_offset = 0;
592 u64 num_banks = 0;
2203e914
CM
593
594 map_devices(trace);
595
9e066e23
CM
596 first_record(trace);
597 while (1) {
598 if (!(trace->io->action & BLK_TC_ACT(BLK_TC_NOTIFY))) {
2203e914 599 found = map_io(trace, trace->io);
9b9fa04b
JK
600 if (found < min)
601 min = found;
9e066e23 602
9b9fa04b
JK
603 found += trace->io->bytes;
604 if (max < found)
9e066e23 605 max = found;
bfb0e441
CM
606 } else {
607 u64 bank;
608 u64 offset;
609 if (!parse_fio_bank_message(trace, &bank,
610 &offset, &num_banks)) {
611 if (bank > max_bank)
612 max_bank = bank;
613 if (offset > max_bank_offset)
614 max_bank_offset = offset;
615 }
9e066e23
CM
616 }
617 if (next_record(trace))
618 break;
619 }
620 first_record(trace);
9b9fa04b 621 *min_ret = min;
bfb0e441
CM
622 *max_ret = max;
623 *max_bank_ret = max_bank;
624 *max_offset_ret = max_bank_offset;
9e066e23
CM
625}
626
854a1f24
CM
627static void check_io_types(struct trace *trace)
628{
629 struct blk_io_trace *io = trace->io;
630 int action = io->action & BLK_TA_MASK;
631
632 if (!(io->action & BLK_TC_ACT(BLK_TC_NOTIFY))) {
633 switch (action) {
634 case __BLK_TA_COMPLETE:
635 trace->found_completion = 1;
636 break;
637 case __BLK_TA_ISSUE:
638 trace->found_issue = 1;
639 break;
640 case __BLK_TA_QUEUE:
641 trace->found_queue = 1;
642 break;
643 };
644 }
645}
646
647
9b9fa04b 648int filter_outliers(struct trace *trace, u64 min_offset, u64 max_offset,
9e066e23
CM
649 u64 *yzoom_min, u64 *yzoom_max)
650{
651 int hits[11];
652 u64 max_per_bucket[11];
9b9fa04b
JK
653 u64 min_per_bucket[11];
654 u64 bytes_per_bucket = (max_offset - min_offset + 1) / 10;
9e066e23
CM
655 int slot;
656 int fat_count = 0;
657
658 memset(hits, 0, sizeof(int) * 11);
659 memset(max_per_bucket, 0, sizeof(u64) * 11);
9b9fa04b 660 memset(min_per_bucket, 0xff, sizeof(u64) * 11);
9e066e23
CM
661 first_record(trace);
662 while (1) {
854a1f24 663 check_io_types(trace);
41fdf407
JK
664 if (!(trace->io->action & BLK_TC_ACT(BLK_TC_NOTIFY)) &&
665 (trace->io->action & BLK_TA_MASK) == __BLK_TA_QUEUE) {
2203e914 666 u64 off = map_io(trace, trace->io) - min_offset;
9b9fa04b
JK
667
668 slot = (int)(off / bytes_per_bucket);
669 hits[slot]++;
670 if (off < min_per_bucket[slot])
671 min_per_bucket[slot] = off;
672
854a1f24 673 off += trace->io->bytes;
9b9fa04b 674 slot = (int)(off / bytes_per_bucket);
9e066e23 675 hits[slot]++;
9b9fa04b
JK
676 if (off > max_per_bucket[slot])
677 max_per_bucket[slot] = off;
9e066e23
CM
678 }
679 if (next_record(trace))
680 break;
681 }
682 first_record(trace);
683 for (slot = 0; slot < 11; slot++) {
684 if (hits[slot] > fat_count) {
685 fat_count = hits[slot];
686 }
687 }
688
689 *yzoom_max = max_offset;
690 for (slot = 10; slot >= 0; slot--) {
691 double d = hits[slot];
692
693 if (d >= (double)fat_count * .05) {
9b9fa04b 694 *yzoom_max = max_per_bucket[slot] + min_offset;
9e066e23
CM
695 break;
696 }
697 }
698
9b9fa04b 699 *yzoom_min = min_offset;
9e066e23
CM
700 for (slot = 0; slot < 10; slot++) {
701 double d = hits[slot];
702
703 if (d >= (double)fat_count * .05) {
9b9fa04b 704 *yzoom_min = min_per_bucket[slot] + min_offset;
9e066e23
CM
705 break;
706 }
707 }
708 return 0;
709}
710
2203e914
CM
711static char footer[] = ".blktrace.0";
712static int footer_len = sizeof(footer);
713
714static void match_trace(char *name, char **traces)
715{
716 int match_len;
717 char *match;
718 int footer_start;
719
720 match_len = strlen(name);
721 if (match_len <= footer_len)
722 return;
723
724 footer_start = match_len - footer_len;
725 if (strcmp(name + footer_start + 1, footer) != 0)
726 return;
727
728 match = strdup(name);
729 if (!match)
730 goto enomem;
731
732 match[footer_start + 1] = '\0';
733 snprintf(line, line_len, "%s -i '%s'", *traces ? *traces : "", match);
734 free(match);
735
736 match = strdup(line);
737 if (!match)
738 goto enomem;
739
740 free(*traces);
741 *traces = match;
742 return;
743
744enomem:
745 perror("memory allocation failed");
746 exit(1);
747 return;
748}
749
750static char *combine_blktrace_devs(char *dir_name)
751{
752 DIR *dir;
753 char *traces = NULL;
754 struct dirent *d;
755 int len;
756 int ret;
757
758 dir = opendir(dir_name);
759 if (!dir)
760 return NULL;
761
762 while (1) {
763 d = readdir(dir);
764 if (!d)
765 break;
766
767 len = strlen(d->d_name);
768 if (len > footer_len)
769 match_trace(d->d_name, &traces);
770 }
771
772 closedir(dir);
773
774 if (!traces)
775 return NULL;
776
777 snprintf(line, line_len, "blkparse -O %s -D %s -d '%s.%s'",
778 traces, dir_name, dir_name, "dump");
779
780 ret = system(line);
781 if (ret) {
782 fprintf(stderr, "blkparse failure %s\n", line);
783 exit(1);
784 }
785 snprintf(line, line_len, "%s.%s", dir_name, "dump");
786 return strdup(line);
787}
788
9e066e23
CM
789static char *find_trace_file(char *filename)
790{
791 int ret;
792 struct stat st;
9e066e23
CM
793 char *dot;
794 char *try;
2203e914 795 int found_dir = 0;
9e066e23 796
2203e914
CM
797 /* look for an exact match of whatever they pass in.
798 * If it is a file, assume it is the dump file.
799 * If a directory, remember that it existed so we
800 * can combine traces in that directory later
801 */
9e066e23 802 ret = stat(filename, &st);
2203e914
CM
803 if (ret == 0) {
804 if (S_ISREG(st.st_mode))
805 return strdup(filename);
806
807 if (S_ISDIR(st.st_mode))
808 found_dir = 1;
809 }
9e066e23 810
2203e914
CM
811 /*
812 * try tacking .dump onto the end and see if that already
813 * has been generated
814 */
815 snprintf(line, line_len, "%s.%s", filename, "dump");
e199d546 816 ret = stat(line, &st);
9e066e23
CM
817 if (ret == 0)
818 return strdup(line);
819
2203e914
CM
820 /*
821 * try to generate the .dump from all the traces in
822 * a single dir.
823 */
824 if (found_dir) {
825 try = combine_blktrace_devs(filename);
826 if (try)
827 return try;
828 }
829
830 /*
831 * try to generate the .dump from all the blktrace
832 * files for a named trace
833 */
9e066e23
CM
834 try = strdup(filename);
835 dot = strrchr(try, '.');
836 if (!dot || strcmp(".dump", dot) != 0) {
e95ba659 837 if (dot && dot != try)
9e066e23 838 *dot = '\0';
2203e914 839 snprintf(line, line_len, "%s%s", try, ".blktrace.0");
9e066e23
CM
840 ret = stat(line, &st);
841 if (ret == 0) {
842 blktrace_to_dump(try);
2203e914 843 snprintf(line, line_len, "%s.%s", try, "dump");
9e066e23
CM
844 ret = stat(line, &st);
845 if (ret == 0) {
846 free(try);
847 return strdup(line);
848 }
849 }
850 }
851 free(try);
852 return NULL;
853}
854struct trace *open_trace(char *filename)
855{
856 int fd;
857 char *p;
858 struct stat st;
859 int ret;
860 struct trace *trace;
861 char *found_filename;
862
863 trace = calloc(1, sizeof(*trace));
864 if (!trace) {
865 fprintf(stderr, "unable to allocate memory for trace\n");
866 return NULL;
867 }
868
869 found_filename = find_trace_file(filename);
870 if (!found_filename) {
871 fprintf(stderr, "Unable to find trace file %s\n", filename);
872 goto fail;
873 }
9e066e23
CM
874 filename = found_filename;
875
876 fd = open(filename, O_RDONLY);
877 if (fd < 0) {
878 fprintf(stderr, "Unable to open trace file %s err %s\n", filename, strerror(errno));
879 goto fail;
880 }
881 ret = fstat(fd, &st);
882 if (ret < 0) {
883 fprintf(stderr, "stat failed on %s err %s\n", filename, strerror(errno));
884 goto fail_fd;
885 }
886 p = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
887 if (p == MAP_FAILED) {
888 fprintf(stderr, "Unable to mmap trace file %s, err %s\n", filename, strerror(errno));
889 goto fail_fd;
890 }
891 trace->fd = fd;
892 trace->len = st.st_size;
893 trace->start = p;
894 trace->cur = p;
895 trace->io = (struct blk_io_trace *)p;
896 return trace;
897
898fail_fd:
899 close(fd);
900fail:
901 free(trace);
902 return NULL;
903}
904static inline int tput_event(struct trace *trace)
905{
906 if (trace->found_completion)
907 return __BLK_TA_COMPLETE;
908 if (trace->found_issue)
909 return __BLK_TA_ISSUE;
910 if (trace->found_queue)
911 return __BLK_TA_QUEUE;
912
913 return __BLK_TA_COMPLETE;
914}
915
f2e40ddd
JK
916int action_char_to_num(char action)
917{
918 switch (action) {
919 case 'Q':
920 return __BLK_TA_QUEUE;
921 case 'D':
922 return __BLK_TA_ISSUE;
923 case 'C':
924 return __BLK_TA_COMPLETE;
925 }
926 return -1;
927}
928
9e066e23
CM
929static inline int io_event(struct trace *trace)
930{
f2e40ddd
JK
931 if (plot_io_action)
932 return plot_io_action;
9e066e23
CM
933 if (trace->found_queue)
934 return __BLK_TA_QUEUE;
935 if (trace->found_issue)
936 return __BLK_TA_ISSUE;
937 if (trace->found_completion)
938 return __BLK_TA_COMPLETE;
939
940 return __BLK_TA_COMPLETE;
941}
942
2203e914
CM
943void add_tput(struct trace *trace, struct graph_line_data *writes_gld,
944 struct graph_line_data *reads_gld)
9e066e23
CM
945{
946 struct blk_io_trace *io = trace->io;
2203e914 947 struct graph_line_data *gld;
1582ecc9 948 int action = io->action & BLK_TA_MASK;
9e066e23
CM
949 int seconds;
950
951 if (io->action & BLK_TC_ACT(BLK_TC_NOTIFY))
952 return;
953
954 if (action != tput_event(trace))
955 return;
956
2203e914
CM
957 if (BLK_DATADIR(io->action) & BLK_TC_READ)
958 gld = reads_gld;
959 else
960 gld = writes_gld;
961
9e066e23 962 seconds = SECONDS(io->time);
35686f9b
JK
963 if (seconds > gld->max_seconds)
964 return;
9e066e23
CM
965
966 gld->data[seconds].sum += io->bytes;
2203e914 967
9e066e23
CM
968 gld->data[seconds].count = 1;
969 if (gld->data[seconds].sum > gld->max)
970 gld->max = gld->data[seconds].sum;
971}
972
0a43b43f
JK
973#define GDD_PTR_ALLOC_STEP 16
974
975static struct pid_map *get_pid_map(struct trace_file *tf, u32 pid)
976{
977 struct pid_map *pm;
978
979 if (!io_per_process) {
980 if (!tf->io_plots)
981 tf->io_plots = 1;
982 return NULL;
983 }
984
985 pm = process_hash_insert(pid, NULL);
986 /* New entry? */
987 if (!pm->index) {
988 if (tf->io_plots == tf->io_plots_allocated) {
989 tf->io_plots_allocated += GDD_PTR_ALLOC_STEP;
990 tf->gdd_reads = realloc(tf->gdd_reads, tf->io_plots_allocated * sizeof(struct graph_dot_data *));
991 if (!tf->gdd_reads)
992 abort();
993 tf->gdd_writes = realloc(tf->gdd_writes, tf->io_plots_allocated * sizeof(struct graph_dot_data *));
994 if (!tf->gdd_writes)
995 abort();
996 memset(tf->gdd_reads + tf->io_plots_allocated - GDD_PTR_ALLOC_STEP,
997 0, GDD_PTR_ALLOC_STEP * sizeof(struct graph_dot_data *));
998 memset(tf->gdd_writes + tf->io_plots_allocated - GDD_PTR_ALLOC_STEP,
999 0, GDD_PTR_ALLOC_STEP * sizeof(struct graph_dot_data *));
1000 }
1001 pm->index = tf->io_plots++;
1002
1003 return pm;
1004 }
1005 return pm;
1006}
1007
1008void add_io(struct trace *trace, struct trace_file *tf)
9e066e23
CM
1009{
1010 struct blk_io_trace *io = trace->io;
1582ecc9 1011 int action = io->action & BLK_TA_MASK;
9e066e23 1012 u64 offset;
0a43b43f
JK
1013 int index;
1014 char *label;
1015 struct pid_map *pm;
9e066e23
CM
1016
1017 if (io->action & BLK_TC_ACT(BLK_TC_NOTIFY))
1018 return;
1019
1020 if (action != io_event(trace))
1021 return;
1022
2203e914 1023 offset = map_io(trace, io);
9e066e23 1024
0a43b43f
JK
1025 pm = get_pid_map(tf, io->pid);
1026 if (!pm) {
1027 index = 0;
1028 label = "";
1029 } else {
1030 index = pm->index;
1031 label = pm->name;
1032 }
1033 if (BLK_DATADIR(io->action) & BLK_TC_READ) {
1034 if (!tf->gdd_reads[index])
1035 tf->gdd_reads[index] = alloc_dot_data(tf->min_seconds, tf->max_seconds, tf->min_offset, tf->max_offset, tf->stop_seconds, pick_color(), strdup(label));
1036 set_gdd_bit(tf->gdd_reads[index], offset, io->bytes, io->time);
1037 } else if (BLK_DATADIR(io->action) & BLK_TC_WRITE) {
1038 if (!tf->gdd_writes[index])
1039 tf->gdd_writes[index] = alloc_dot_data(tf->min_seconds, tf->max_seconds, tf->min_offset, tf->max_offset, tf->stop_seconds, pick_color(), strdup(label));
1040 set_gdd_bit(tf->gdd_writes[index], offset, io->bytes, io->time);
1041 }
9e066e23
CM
1042}
1043
1044void add_pending_io(struct trace *trace, struct graph_line_data *gld)
1045{
9e066e23
CM
1046 int seconds;
1047 struct blk_io_trace *io = trace->io;
1582ecc9 1048 int action = io->action & BLK_TA_MASK;
9e066e23 1049 double avg;
854a1f24 1050 struct pending_io *pio;
9e066e23
CM
1051
1052 if (io->action & BLK_TC_ACT(BLK_TC_NOTIFY))
1053 return;
1054
0a43b43f 1055 if (action == __BLK_TA_QUEUE) {
854a1f24
CM
1056 if (trace->found_issue || trace->found_completion)
1057 hash_queued_io(trace->io);
0a43b43f
JK
1058 return;
1059 }
9e066e23
CM
1060 if (action != __BLK_TA_ISSUE)
1061 return;
1062
1063 seconds = SECONDS(io->time);
35686f9b
JK
1064 if (seconds > gld->max_seconds)
1065 return;
9e066e23 1066
854a1f24
CM
1067 pio = hash_dispatched_io(trace->io);
1068 if (!pio)
9e066e23
CM
1069 return;
1070
854a1f24
CM
1071 if (!trace->found_completion) {
1072 list_del(&pio->hash_list);
1073 free(pio);
1074 }
1075
9e066e23
CM
1076 ios_in_flight++;
1077
1078 gld->data[seconds].sum += ios_in_flight;
1079 gld->data[seconds].count++;
1080
1081 avg = (double)gld->data[seconds].sum / gld->data[seconds].count;
1082 if (gld->max < (u64)avg) {
1083 gld->max = avg;
1084 }
1085}
1086
1087void add_completed_io(struct trace *trace,
1088 struct graph_line_data *latency_gld)
1089{
1090 struct blk_io_trace *io = trace->io;
1091 int seconds;
1582ecc9 1092 int action = io->action & BLK_TA_MASK;
9e066e23
CM
1093 struct pending_io *pio;
1094 double avg;
1095 u64 latency;
1096
1097 if (io->action & BLK_TC_ACT(BLK_TC_NOTIFY))
1098 return;
1099
1100 if (action != __BLK_TA_COMPLETE)
1101 return;
1102
1103 seconds = SECONDS(io->time);
1104
1105 pio = hash_completed_io(trace->io);
1106 if (!pio)
1107 return;
1108
1109 if (ios_in_flight > 0)
1110 ios_in_flight--;
1111 if (io->time >= pio->dispatch_time) {
1112 latency = io->time - pio->dispatch_time;
1113 latency_gld->data[seconds].sum += latency;
1114 latency_gld->data[seconds].count++;
1115 }
1116
1117 list_del(&pio->hash_list);
1118 free(pio);
1119
1120 avg = (double)latency_gld->data[seconds].sum /
1121 latency_gld->data[seconds].count;
1122 if (latency_gld->max < (u64)avg) {
1123 latency_gld->max = avg;
1124 }
1125}
1126
1127void add_iop(struct trace *trace, struct graph_line_data *gld)
1128{
1129 struct blk_io_trace *io = trace->io;
1582ecc9 1130 int action = io->action & BLK_TA_MASK;
9e066e23
CM
1131 int seconds;
1132
1133 if (io->action & BLK_TC_ACT(BLK_TC_NOTIFY))
1134 return;
1135
1136 /* iops and tput use the same events */
1137 if (action != tput_event(trace))
1138 return;
1139
1140 seconds = SECONDS(io->time);
35686f9b
JK
1141 if (seconds > gld->max_seconds)
1142 return;
9e066e23
CM
1143
1144 gld->data[seconds].sum += 1;
1145 gld->data[seconds].count = 1;
1146 if (gld->data[seconds].sum > gld->max)
1147 gld->max = gld->data[seconds].sum;
1148}
1149
1150void check_record(struct trace *trace)
1151{
9e066e23
CM
1152 handle_notify(trace);
1153}