doc: btrace: fix wrong format on doc
[blktrace.git] / iowatcher / blkparse.c
... / ...
CommitLineData
1/*
2 * Copyright (C) 2012 Fusion-io
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16 *
17 * Parts of this file were imported from Jens Axboe's blktrace sources (also GPL)
18 */
19#include <sys/types.h>
20#include <sys/stat.h>
21#include <fcntl.h>
22#include <unistd.h>
23#include <stdlib.h>
24#include <stdio.h>
25#include <math.h>
26#include <inttypes.h>
27#include <string.h>
28#include <asm/types.h>
29#include <errno.h>
30#include <sys/mman.h>
31#include <time.h>
32#include <math.h>
33#include <dirent.h>
34
35#include "plot.h"
36#include "blkparse.h"
37#include "list.h"
38#include "tracers.h"
39#include "../blktrace_api.h"
40
41#define IO_HASH_TABLE_BITS 11
42#define IO_HASH_TABLE_SIZE (1 << IO_HASH_TABLE_BITS)
43static struct list_head io_hash_table[IO_HASH_TABLE_SIZE];
44static u64 ios_in_flight = 0;
45
46#define PROCESS_HASH_TABLE_BITS 7
47#define PROCESS_HASH_TABLE_SIZE (1 << PROCESS_HASH_TABLE_BITS)
48static struct list_head process_hash_table[PROCESS_HASH_TABLE_SIZE];
49
50extern int plot_io_action;
51extern int io_per_process;
52
53#define BLK_DATADIR(a) (((a) >> BLK_TC_SHIFT) & (BLK_TC_READ | BLK_TC_WRITE))
54#define BLK_TA_MASK (((1 << BLK_TC_SHIFT) - 1) & ~__BLK_TA_CGROUP)
55
56struct pending_io {
57 /* sector offset of this IO */
58 u64 sector;
59
60 /* dev_t for this IO */
61 u32 device;
62
63 /* time this IO was dispatched */
64 u64 dispatch_time;
65 /* time this IO was finished */
66 u64 completion_time;
67 struct list_head hash_list;
68 /* process which queued this IO */
69 u32 pid;
70};
71
72struct pid_map {
73 struct list_head hash_list;
74 u32 pid;
75 int index;
76 char name[0];
77};
78
79u64 get_record_time(struct trace *trace)
80{
81 return trace->io->time;
82}
83
84void init_io_hash_table(void)
85{
86 int i;
87 struct list_head *head;
88
89 for (i = 0; i < IO_HASH_TABLE_SIZE; i++) {
90 head = io_hash_table + i;
91 INIT_LIST_HEAD(head);
92 }
93}
94
95/* taken from the kernel hash.h */
96static inline u64 hash_sector(u64 val)
97{
98 u64 hash = val;
99
100 /* Sigh, gcc can't optimise this alone like it does for 32 bits. */
101 u64 n = hash;
102 n <<= 18;
103 hash -= n;
104 n <<= 33;
105 hash -= n;
106 n <<= 3;
107 hash += n;
108 n <<= 3;
109 hash -= n;
110 n <<= 4;
111 hash += n;
112 n <<= 2;
113 hash += n;
114
115 /* High bits are more random, so use them. */
116 return hash >> (64 - IO_HASH_TABLE_BITS);
117}
118
119static int io_hash_table_insert(struct pending_io *ins_pio)
120{
121 u64 sector = ins_pio->sector;
122 u32 dev = ins_pio->device;
123 int slot = hash_sector(sector);
124 struct list_head *head;
125 struct pending_io *pio;
126
127 head = io_hash_table + slot;
128 list_for_each_entry(pio, head, hash_list) {
129 if (pio->sector == sector && pio->device == dev)
130 return -EEXIST;
131 }
132 list_add_tail(&ins_pio->hash_list, head);
133 return 0;
134}
135
136static struct pending_io *io_hash_table_search(u64 sector, u32 dev)
137{
138 int slot = hash_sector(sector);
139 struct list_head *head;
140 struct pending_io *pio;
141
142 head = io_hash_table + slot;
143 list_for_each_entry(pio, head, hash_list) {
144 if (pio->sector == sector && pio->device == dev)
145 return pio;
146 }
147 return NULL;
148}
149
150static struct pending_io *hash_queued_io(struct blk_io_trace *io)
151{
152 struct pending_io *pio;
153 int ret;
154
155 pio = calloc(1, sizeof(*pio));
156 pio->sector = io->sector;
157 pio->device = io->device;
158 pio->pid = io->pid;
159
160 ret = io_hash_table_insert(pio);
161 if (ret < 0) {
162 /* crud, the IO is there already */
163 free(pio);
164 return NULL;
165 }
166 return pio;
167}
168
169static struct pending_io *hash_dispatched_io(struct blk_io_trace *io)
170{
171 struct pending_io *pio;
172
173 pio = io_hash_table_search(io->sector, io->device);
174 if (!pio) {
175 pio = hash_queued_io(io);
176 if (!pio)
177 return NULL;
178 }
179 pio->dispatch_time = io->time;
180 return pio;
181}
182
183static struct pending_io *hash_completed_io(struct blk_io_trace *io)
184{
185 struct pending_io *pio;
186
187 pio = io_hash_table_search(io->sector, io->device);
188
189 if (!pio)
190 return NULL;
191 return pio;
192}
193
194void init_process_hash_table(void)
195{
196 int i;
197 struct list_head *head;
198
199 for (i = 0; i < PROCESS_HASH_TABLE_SIZE; i++) {
200 head = process_hash_table + i;
201 INIT_LIST_HEAD(head);
202 }
203}
204
205static u32 hash_pid(u32 pid)
206{
207 u32 hash = pid;
208
209 hash ^= pid >> 3;
210 hash ^= pid >> 3;
211 hash ^= pid >> 4;
212 hash ^= pid >> 6;
213 return (hash & (PROCESS_HASH_TABLE_SIZE - 1));
214}
215
216static struct pid_map *process_hash_search(u32 pid)
217{
218 int slot = hash_pid(pid);
219 struct list_head *head;
220 struct pid_map *pm;
221
222 head = process_hash_table + slot;
223 list_for_each_entry(pm, head, hash_list) {
224 if (pm->pid == pid)
225 return pm;
226 }
227 return NULL;
228}
229
230static struct pid_map *process_hash_insert(u32 pid, char *name)
231{
232 int slot = hash_pid(pid);
233 struct pid_map *pm;
234 int old_index = 0;
235 char buf[16];
236
237 pm = process_hash_search(pid);
238 if (pm) {
239 /* Entry exists and name shouldn't be changed? */
240 if (!name || !strcmp(name, pm->name))
241 return pm;
242 list_del(&pm->hash_list);
243 old_index = pm->index;
244 free(pm);
245 }
246 if (!name) {
247 sprintf(buf, "[%u]", pid);
248 name = buf;
249 }
250 pm = malloc(sizeof(struct pid_map) + strlen(name) + 1);
251 pm->pid = pid;
252 pm->index = old_index;
253 strcpy(pm->name, name);
254 list_add_tail(&pm->hash_list, process_hash_table + slot);
255
256 return pm;
257}
258
259static void handle_notify(struct trace *trace)
260{
261 struct blk_io_trace *io = trace->io;
262 void *payload = (char *)io + sizeof(*io);
263 int pdu_len = io->pdu_len;
264 u32 two32[2];
265
266 if (io->action & __BLK_TN_CGROUP) {
267 payload += sizeof(struct blk_io_cgroup_payload);
268 pdu_len -= sizeof(struct blk_io_cgroup_payload);
269 }
270 if ((io->action & ~__BLK_TN_CGROUP) == BLK_TN_PROCESS) {
271 if (io_per_process)
272 process_hash_insert(io->pid, payload);
273 return;
274 }
275
276 if ((io->action & ~__BLK_TN_CGROUP) != BLK_TN_TIMESTAMP)
277 return;
278
279 if (pdu_len != sizeof(two32))
280 return;
281
282 memcpy(two32, payload, sizeof(two32));
283 trace->start_timestamp = io->time;
284 trace->abs_start_time.tv_sec = two32[0];
285 trace->abs_start_time.tv_nsec = two32[1];
286 if (trace->abs_start_time.tv_nsec < 0) {
287 trace->abs_start_time.tv_sec--;
288 trace->abs_start_time.tv_nsec += 1000000000;
289 }
290}
291
292int next_record(struct trace *trace)
293{
294 int skip = trace->io->pdu_len;
295 u64 offset;
296
297 trace->cur += sizeof(*trace->io) + skip;
298 offset = trace->cur - trace->start;
299 if (offset >= trace->len)
300 return 1;
301
302 trace->io = (struct blk_io_trace *)trace->cur;
303 return 0;
304}
305
306void first_record(struct trace *trace)
307{
308 trace->cur = trace->start;
309 trace->io = (struct blk_io_trace *)trace->cur;
310}
311
312static int is_io_event(struct blk_io_trace *test)
313{
314 char *message;
315 if (!(test->action & BLK_TC_ACT(BLK_TC_NOTIFY)))
316 return 1;
317 if ((test->action & ~__BLK_TN_CGROUP) == BLK_TN_MESSAGE) {
318 int len = test->pdu_len;
319
320 message = (char *)(test + 1);
321 if (test->action & __BLK_TN_CGROUP) {
322 len -= sizeof(struct blk_io_cgroup_payload);
323 message += sizeof(struct blk_io_cgroup_payload);
324 }
325 if (len < 3)
326 return 0;
327 if (strncmp(message, "fio ", 4) == 0) {
328 return 1;
329 }
330 }
331 return 0;
332}
333
334u64 find_last_time(struct trace *trace)
335{
336 char *p = trace->start + trace->len;
337 struct blk_io_trace *test;
338 int search_len = 0;
339 u64 found = 0;
340
341 if (trace->len < sizeof(*trace->io))
342 return 0;
343 p -= sizeof(*trace->io);
344 while (p >= trace->start) {
345 test = (struct blk_io_trace *)p;
346 if (CHECK_MAGIC(test) && is_io_event(test)) {
347 u64 offset = p - trace->start;
348 if (offset + sizeof(*test) + test->pdu_len == trace->len) {
349 return test->time;
350 }
351 }
352 p--;
353 search_len++;
354 if (search_len > 8192) {
355 break;
356 }
357 }
358
359 /* searching backwards didn't work out, we'll have to scan the file */
360 first_record(trace);
361 while (1) {
362 if (is_io_event(trace->io))
363 found = trace->io->time;
364 if (next_record(trace))
365 break;
366 }
367 first_record(trace);
368 return found;
369}
370
371static int parse_fio_bank_message(struct trace *trace, u64 *bank_ret, u64 *offset_ret,
372 u64 *num_banks_ret)
373{
374 char *s;
375 char *next;
376 char *message;
377 struct blk_io_trace *test = trace->io;
378 int len = test->pdu_len;
379 u64 bank;
380 u64 offset;
381 u64 num_banks;
382
383 if (!(test->action & BLK_TC_ACT(BLK_TC_NOTIFY)))
384 return -1;
385 if ((test->action & ~__BLK_TN_CGROUP) != BLK_TN_MESSAGE)
386 return -1;
387
388 message = (char *)(test + 1);
389 if (test->action & __BLK_TN_CGROUP) {
390 len -= sizeof(struct blk_io_cgroup_payload);
391 message += sizeof(struct blk_io_cgroup_payload);
392 }
393 /* the message is fio rw bank offset num_banks */
394 if (len < 3)
395 return -1;
396 if (strncmp(message, "fio r ", 6) != 0)
397 return -1;
398
399 message = strndup(message, len);
400 s = strchr(message, ' ');
401 if (!s)
402 goto out;
403 s++;
404 s = strchr(s, ' ');
405 if (!s)
406 goto out;
407
408 bank = strtoll(s, &next, 10);
409 if (s == next)
410 goto out;
411 s = next;
412
413 offset = strtoll(s, &next, 10);
414 if (s == next)
415 goto out;
416 s = next;
417
418 num_banks = strtoll(s, &next, 10);
419 if (s == next)
420 goto out;
421
422 *bank_ret = bank;
423 *offset_ret = offset;
424 *num_banks_ret = num_banks;
425
426 return 0;
427out:
428 free(message);
429 return -1;
430}
431
432static struct dev_info *lookup_dev(struct trace *trace, struct blk_io_trace *io)
433{
434 u32 dev = io->device;
435 int i;
436 struct dev_info *di = NULL;
437
438 for (i = 0; i < trace->num_devices; i++) {
439 if (trace->devices[i].device == dev) {
440 di = trace->devices + i;
441 goto found;
442 }
443 }
444 i = trace->num_devices++;
445 if (i >= MAX_DEVICES_PER_TRACE) {
446 fprintf(stderr, "Trace contains too many devices (%d)\n", i);
447 exit(1);
448 }
449 di = trace->devices + i;
450 di->device = dev;
451found:
452 return di;
453}
454
455static void map_devices(struct trace *trace)
456{
457 struct dev_info *di;
458 u64 found;
459 u64 map_start = 0;
460 int i;
461
462 first_record(trace);
463 while (1) {
464 if (!(trace->io->action & BLK_TC_ACT(BLK_TC_NOTIFY))) {
465 di = lookup_dev(trace, trace->io);
466 found = trace->io->sector << 9;
467 if (found < di->min)
468 di->min = found;
469
470 found += trace->io->bytes;
471 if (di->max < found)
472 di->max = found;
473 }
474 if (next_record(trace))
475 break;
476 }
477 first_record(trace);
478 for (i = 0; i < trace->num_devices; i++) {
479 di = trace->devices + i;
480 di->map = map_start;
481 map_start += di->max - di->min;
482 }
483}
484
485static u64 map_io(struct trace *trace, struct blk_io_trace *io)
486{
487 struct dev_info *di = lookup_dev(trace, io);
488 u64 val = trace->io->sector << 9;
489 return di->map + val - di->min;
490}
491
492void find_extreme_offsets(struct trace *trace, u64 *min_ret, u64 *max_ret, u64 *max_bank_ret,
493 u64 *max_offset_ret)
494{
495 u64 found = 0;
496 u64 max = 0, min = ~(u64)0;
497 u64 max_bank = 0;
498 u64 max_bank_offset = 0;
499 u64 num_banks = 0;
500
501 map_devices(trace);
502
503 first_record(trace);
504 while (1) {
505 if (!(trace->io->action & BLK_TC_ACT(BLK_TC_NOTIFY))) {
506 found = map_io(trace, trace->io);
507 if (found < min)
508 min = found;
509
510 found += trace->io->bytes;
511 if (max < found)
512 max = found;
513 } else {
514 u64 bank;
515 u64 offset;
516 if (!parse_fio_bank_message(trace, &bank,
517 &offset, &num_banks)) {
518 if (bank > max_bank)
519 max_bank = bank;
520 if (offset > max_bank_offset)
521 max_bank_offset = offset;
522 }
523 }
524 if (next_record(trace))
525 break;
526 }
527 first_record(trace);
528 *min_ret = min;
529 *max_ret = max;
530 *max_bank_ret = max_bank;
531 *max_offset_ret = max_bank_offset;
532}
533
534static void check_io_types(struct trace *trace)
535{
536 struct blk_io_trace *io = trace->io;
537 int action = io->action & BLK_TA_MASK;
538
539 if (!(io->action & BLK_TC_ACT(BLK_TC_NOTIFY))) {
540 switch (action) {
541 case __BLK_TA_COMPLETE:
542 trace->found_completion = 1;
543 break;
544 case __BLK_TA_ISSUE:
545 trace->found_issue = 1;
546 break;
547 case __BLK_TA_QUEUE:
548 trace->found_queue = 1;
549 break;
550 };
551 }
552}
553
554
555int filter_outliers(struct trace *trace, u64 min_offset, u64 max_offset,
556 u64 *yzoom_min, u64 *yzoom_max)
557{
558 int hits[11];
559 u64 max_per_bucket[11];
560 u64 min_per_bucket[11];
561 u64 bytes_per_bucket = (max_offset - min_offset + 1) / 10;
562 int slot;
563 int fat_count = 0;
564
565 memset(hits, 0, sizeof(int) * 11);
566 memset(max_per_bucket, 0, sizeof(u64) * 11);
567 memset(min_per_bucket, 0xff, sizeof(u64) * 11);
568 first_record(trace);
569 while (1) {
570 check_io_types(trace);
571 if (!(trace->io->action & BLK_TC_ACT(BLK_TC_NOTIFY)) &&
572 (trace->io->action & BLK_TA_MASK) == __BLK_TA_QUEUE) {
573 u64 off = map_io(trace, trace->io) - min_offset;
574
575 slot = (int)(off / bytes_per_bucket);
576 hits[slot]++;
577 if (off < min_per_bucket[slot])
578 min_per_bucket[slot] = off;
579
580 off += trace->io->bytes;
581 slot = (int)(off / bytes_per_bucket);
582 hits[slot]++;
583 if (off > max_per_bucket[slot])
584 max_per_bucket[slot] = off;
585 }
586 if (next_record(trace))
587 break;
588 }
589 first_record(trace);
590 for (slot = 0; slot < 11; slot++) {
591 if (hits[slot] > fat_count) {
592 fat_count = hits[slot];
593 }
594 }
595
596 *yzoom_max = max_offset;
597 for (slot = 10; slot >= 0; slot--) {
598 double d = hits[slot];
599
600 if (d >= (double)fat_count * .05) {
601 *yzoom_max = max_per_bucket[slot] + min_offset;
602 break;
603 }
604 }
605
606 *yzoom_min = min_offset;
607 for (slot = 0; slot < 10; slot++) {
608 double d = hits[slot];
609
610 if (d >= (double)fat_count * .05) {
611 *yzoom_min = min_per_bucket[slot] + min_offset;
612 break;
613 }
614 }
615 return 0;
616}
617
618static char footer[] = ".blktrace.0";
619static int footer_len = sizeof(footer) - 1;
620
621static int match_trace(char *name, int *len)
622{
623 int match_len;
624 int footer_start;
625
626 match_len = strlen(name);
627 if (match_len <= footer_len)
628 return 0;
629
630 footer_start = match_len - footer_len;
631 if (strcmp(name + footer_start, footer) != 0)
632 return 0;
633
634 if (len)
635 *len = match_len;
636 return 1;
637}
638
639struct tracelist {
640 struct tracelist *next;
641 char *name;
642};
643
644static struct tracelist *traces_list(char *dir_name, int *len)
645{
646 int count = 0;
647 struct tracelist *traces = NULL;
648 int dlen = strlen(dir_name);
649 DIR *dir = opendir(dir_name);
650 if (!dir)
651 return NULL;
652
653 while (1) {
654 int n = 0;
655 struct tracelist *tl;
656 struct dirent *d = readdir(dir);
657 if (!d)
658 break;
659
660 if (!match_trace(d->d_name, &n))
661 continue;
662
663 n += dlen + 1; /* dir + '/' + file */
664 /* Allocate space for tracelist + filename */
665 tl = calloc(1, sizeof(struct tracelist) + (sizeof(char) * (n + 1)));
666 if (!tl) {
667 closedir(dir);
668 return NULL;
669 }
670 tl->next = traces;
671 tl->name = (char *)(tl + 1);
672 snprintf(tl->name, n, "%s/%s", dir_name, d->d_name);
673 traces = tl;
674 count++;
675 }
676
677 closedir(dir);
678
679 if (len)
680 *len = count;
681
682 return traces;
683}
684
685static void traces_free(struct tracelist *traces)
686{
687 while (traces) {
688 struct tracelist *tl = traces;
689 traces = traces->next;
690 free(tl);
691 }
692}
693
694static int dump_traces(struct tracelist *traces, int count, char *dumpfile)
695{
696 struct tracelist *tl;
697 char **argv = NULL;
698 int argc = 0;
699 int i;
700 int err = 0;
701
702 argc = count * 2; /* {"-i", trace } */
703 argc += 4; /* See below */
704 argv = calloc(argc + 1, sizeof(char *));
705 if (!argv)
706 return -errno;
707
708 i = 0;
709 argv[i++] = "blkparse";
710 argv[i++] = "-O";
711 argv[i++] = "-d";
712 argv[i++] = dumpfile;
713 for (tl = traces; tl != NULL; tl = tl->next) {
714 argv[i++] = "-i";
715 argv[i++] = tl->name;
716 }
717
718 err = run_program(argc, argv, 1, NULL, NULL);
719 if (err)
720 fprintf(stderr, "%s exited with %d, expected 0\n", argv[0], err);
721 free(argv);
722 return err;
723}
724
725static char *find_trace_file(char *filename)
726{
727 int ret;
728 struct stat st;
729 char *dot;
730 int found_dir = 0;
731 char *dumpfile;
732 int len = strlen(filename);
733
734 /* look for an exact match of whatever they pass in.
735 * If it is a file, assume it is the dump file.
736 * If a directory, remember that it existed so we
737 * can combine traces in that directory later
738 */
739 ret = stat(filename, &st);
740 if (ret == 0) {
741 if (S_ISREG(st.st_mode))
742 return strdup(filename);
743
744 if (S_ISDIR(st.st_mode))
745 found_dir = 1;
746 }
747
748 if (found_dir) {
749 int i;
750 /* Eat up trailing '/'s */
751 for (i = len - 1; filename[i] == '/'; i--)
752 filename[i] = '\0';
753 }
754
755 /*
756 * try tacking .dump onto the end and see if that already
757 * has been generated
758 */
759 ret = asprintf(&dumpfile, "%s.dump", filename);
760 if (ret == -1) {
761 perror("Error building dump file name");
762 return NULL;
763 }
764 ret = stat(dumpfile, &st);
765 if (ret == 0)
766 return dumpfile;
767
768 /*
769 * try to generate the .dump from all the traces in
770 * a single dir.
771 */
772 if (found_dir) {
773 int count;
774 struct tracelist *traces = traces_list(filename, &count);
775 if (traces) {
776 ret = dump_traces(traces, count, dumpfile);
777 traces_free(traces);
778 if (ret == 0)
779 return dumpfile;
780 }
781 }
782 free(dumpfile);
783
784 /*
785 * try to generate the .dump from all the blktrace
786 * files for a named trace
787 */
788 dot = strrchr(filename, '.');
789 if (!dot || strcmp(".dump", dot) != 0) {
790 struct tracelist trace = {0 ,NULL};
791 if (dot && dot != filename)
792 len = dot - filename;
793
794 ret = asprintf(&trace.name, "%*s.blktrace.0", len, filename);
795 if (ret == -1)
796 return NULL;
797 ret = asprintf(&dumpfile, "%*s.dump", len, filename);
798 if (ret == -1) {
799 free(trace.name);
800 return NULL;
801 }
802
803 ret = dump_traces(&trace, 1, dumpfile);
804 if (ret == 0) {
805 free(trace.name);
806 return dumpfile;
807 }
808 free(trace.name);
809 free(dumpfile);
810 }
811 return NULL;
812}
813struct trace *open_trace(char *filename)
814{
815 int fd;
816 char *p;
817 struct stat st;
818 int ret;
819 struct trace *trace;
820 char *found_filename;
821
822 trace = calloc(1, sizeof(*trace));
823 if (!trace) {
824 fprintf(stderr, "unable to allocate memory for trace\n");
825 return NULL;
826 }
827
828 found_filename = find_trace_file(filename);
829 if (!found_filename) {
830 fprintf(stderr, "Unable to find trace file %s\n", filename);
831 goto fail;
832 }
833 filename = found_filename;
834
835 fd = open(filename, O_RDONLY);
836 if (fd < 0) {
837 fprintf(stderr, "Unable to open trace file %s err %s\n", filename, strerror(errno));
838 goto fail;
839 }
840 ret = fstat(fd, &st);
841 if (ret < 0) {
842 fprintf(stderr, "stat failed on %s err %s\n", filename, strerror(errno));
843 goto fail_fd;
844 }
845 p = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
846 if (p == MAP_FAILED) {
847 fprintf(stderr, "Unable to mmap trace file %s, err %s\n", filename, strerror(errno));
848 goto fail_fd;
849 }
850 trace->fd = fd;
851 trace->len = st.st_size;
852 trace->start = p;
853 trace->cur = p;
854 trace->io = (struct blk_io_trace *)p;
855 return trace;
856
857fail_fd:
858 close(fd);
859fail:
860 free(trace);
861 return NULL;
862}
863static inline int tput_event(struct trace *trace)
864{
865 if (trace->found_completion)
866 return __BLK_TA_COMPLETE;
867 if (trace->found_issue)
868 return __BLK_TA_ISSUE;
869 if (trace->found_queue)
870 return __BLK_TA_QUEUE;
871
872 return __BLK_TA_COMPLETE;
873}
874
875int action_char_to_num(char action)
876{
877 switch (action) {
878 case 'Q':
879 return __BLK_TA_QUEUE;
880 case 'D':
881 return __BLK_TA_ISSUE;
882 case 'C':
883 return __BLK_TA_COMPLETE;
884 }
885 return -1;
886}
887
888static inline int io_event(struct trace *trace)
889{
890 if (plot_io_action)
891 return plot_io_action;
892 if (trace->found_queue)
893 return __BLK_TA_QUEUE;
894 if (trace->found_issue)
895 return __BLK_TA_ISSUE;
896 if (trace->found_completion)
897 return __BLK_TA_COMPLETE;
898
899 return __BLK_TA_COMPLETE;
900}
901
902void add_tput(struct trace *trace, struct graph_line_data *writes_gld,
903 struct graph_line_data *reads_gld)
904{
905 struct blk_io_trace *io = trace->io;
906 struct graph_line_data *gld;
907 int action = io->action & BLK_TA_MASK;
908 int seconds;
909
910 if (io->action & BLK_TC_ACT(BLK_TC_NOTIFY))
911 return;
912
913 if (action != tput_event(trace))
914 return;
915
916 if (BLK_DATADIR(io->action) & BLK_TC_READ)
917 gld = reads_gld;
918 else
919 gld = writes_gld;
920
921 seconds = SECONDS(io->time);
922 gld->data[seconds].sum += io->bytes;
923
924 gld->data[seconds].count = 1;
925 if (gld->data[seconds].sum > gld->max)
926 gld->max = gld->data[seconds].sum;
927}
928
929#define GDD_PTR_ALLOC_STEP 16
930
931static struct pid_map *get_pid_map(struct trace_file *tf, u32 pid)
932{
933 struct pid_map *pm;
934
935 if (!io_per_process) {
936 if (!tf->io_plots)
937 tf->io_plots = 1;
938 return NULL;
939 }
940
941 pm = process_hash_insert(pid, NULL);
942 /* New entry? */
943 if (!pm->index) {
944 if (tf->io_plots == tf->io_plots_allocated) {
945 tf->io_plots_allocated += GDD_PTR_ALLOC_STEP;
946 tf->gdd_reads = realloc(tf->gdd_reads, tf->io_plots_allocated * sizeof(struct graph_dot_data *));
947 if (!tf->gdd_reads)
948 abort();
949 tf->gdd_writes = realloc(tf->gdd_writes, tf->io_plots_allocated * sizeof(struct graph_dot_data *));
950 if (!tf->gdd_writes)
951 abort();
952 memset(tf->gdd_reads + tf->io_plots_allocated - GDD_PTR_ALLOC_STEP,
953 0, GDD_PTR_ALLOC_STEP * sizeof(struct graph_dot_data *));
954 memset(tf->gdd_writes + tf->io_plots_allocated - GDD_PTR_ALLOC_STEP,
955 0, GDD_PTR_ALLOC_STEP * sizeof(struct graph_dot_data *));
956 }
957 pm->index = tf->io_plots++;
958
959 return pm;
960 }
961 return pm;
962}
963
964void add_io(struct trace *trace, struct trace_file *tf)
965{
966 struct blk_io_trace *io = trace->io;
967 int action = io->action & BLK_TA_MASK;
968 u64 offset;
969 int index;
970 char *label;
971 struct pid_map *pm;
972
973 if (io->action & BLK_TC_ACT(BLK_TC_NOTIFY))
974 return;
975
976 if (action != io_event(trace))
977 return;
978
979 offset = map_io(trace, io);
980
981 pm = get_pid_map(tf, io->pid);
982 if (!pm) {
983 index = 0;
984 label = "";
985 } else {
986 index = pm->index;
987 label = pm->name;
988 }
989 if (BLK_DATADIR(io->action) & BLK_TC_READ) {
990 if (!tf->gdd_reads[index])
991 tf->gdd_reads[index] = alloc_dot_data(tf->min_seconds, tf->max_seconds, tf->min_offset, tf->max_offset, tf->stop_seconds, pick_color(), strdup(label));
992 set_gdd_bit(tf->gdd_reads[index], offset, io->bytes, io->time);
993 } else if (BLK_DATADIR(io->action) & BLK_TC_WRITE) {
994 if (!tf->gdd_writes[index])
995 tf->gdd_writes[index] = alloc_dot_data(tf->min_seconds, tf->max_seconds, tf->min_offset, tf->max_offset, tf->stop_seconds, pick_color(), strdup(label));
996 set_gdd_bit(tf->gdd_writes[index], offset, io->bytes, io->time);
997 }
998}
999
1000void add_pending_io(struct trace *trace, struct graph_line_data *gld)
1001{
1002 unsigned int seconds;
1003 struct blk_io_trace *io = trace->io;
1004 int action = io->action & BLK_TA_MASK;
1005 double avg;
1006 struct pending_io *pio;
1007
1008 if (io->action & BLK_TC_ACT(BLK_TC_NOTIFY))
1009 return;
1010
1011 if (action == __BLK_TA_QUEUE) {
1012 if (io->sector == 0)
1013 return;
1014 /*
1015 * If D (issue) events are available, use them for I/O
1016 * accounting. Nothing needs to be done for Q.
1017 */
1018 if (trace->found_issue)
1019 return;
1020 /*
1021 * If there are no D or C events, then all that can be
1022 * done is to account the Q event (and make sure not to
1023 * add the I/O to the hash, because it will never be
1024 * removed).
1025 */
1026 if (!trace->found_completion)
1027 goto account_io;
1028 /*
1029 * When there are no ISSUE events, count depth and
1030 * latency from queue events.
1031 */
1032 pio = hash_queued_io(trace->io);
1033 if (pio) {
1034 pio->dispatch_time = io->time;
1035 goto account_io;
1036 }
1037 return;
1038 }
1039 if (action == __BLK_TA_REQUEUE) {
1040 if (ios_in_flight > 0)
1041 ios_in_flight--;
1042 return;
1043 }
1044 if (action != __BLK_TA_ISSUE)
1045 return;
1046
1047 pio = hash_dispatched_io(trace->io);
1048 if (!pio)
1049 return;
1050
1051 if (!trace->found_completion) {
1052 list_del(&pio->hash_list);
1053 free(pio);
1054 }
1055
1056account_io:
1057 ios_in_flight++;
1058
1059 seconds = SECONDS(io->time);
1060 gld->data[seconds].sum += ios_in_flight;
1061 gld->data[seconds].count++;
1062
1063 avg = (double)gld->data[seconds].sum / gld->data[seconds].count;
1064 if (gld->max < (u64)avg) {
1065 gld->max = avg;
1066 }
1067}
1068
1069void add_completed_io(struct trace *trace,
1070 struct graph_line_data *latency_gld)
1071{
1072 struct blk_io_trace *io = trace->io;
1073 int seconds;
1074 int action = io->action & BLK_TA_MASK;
1075 struct pending_io *pio;
1076 double avg;
1077 u64 latency;
1078
1079 if (io->action & BLK_TC_ACT(BLK_TC_NOTIFY))
1080 return;
1081
1082 if (action != __BLK_TA_COMPLETE)
1083 return;
1084
1085 seconds = SECONDS(io->time);
1086
1087 pio = hash_completed_io(trace->io);
1088 if (!pio)
1089 return;
1090
1091 if (ios_in_flight > 0)
1092 ios_in_flight--;
1093 if (io->time >= pio->dispatch_time) {
1094 latency = io->time - pio->dispatch_time;
1095 latency_gld->data[seconds].sum += latency;
1096 latency_gld->data[seconds].count++;
1097 }
1098
1099 list_del(&pio->hash_list);
1100 free(pio);
1101
1102 avg = (double)latency_gld->data[seconds].sum /
1103 latency_gld->data[seconds].count;
1104 if (latency_gld->max < (u64)avg) {
1105 latency_gld->max = avg;
1106 }
1107}
1108
1109void add_iop(struct trace *trace, struct graph_line_data *gld)
1110{
1111 struct blk_io_trace *io = trace->io;
1112 int action = io->action & BLK_TA_MASK;
1113 int seconds;
1114
1115 if (io->action & BLK_TC_ACT(BLK_TC_NOTIFY))
1116 return;
1117
1118 /* iops and tput use the same events */
1119 if (action != tput_event(trace))
1120 return;
1121
1122 seconds = SECONDS(io->time);
1123 gld->data[seconds].sum += 1;
1124 gld->data[seconds].count = 1;
1125 if (gld->data[seconds].sum > gld->max)
1126 gld->max = gld->data[seconds].sum;
1127}
1128
1129void check_record(struct trace *trace)
1130{
1131 handle_notify(trace);
1132}