iowatcher: Fix up some strcpy and strcat usage
[blktrace.git] / iowatcher / blkparse.c
1 /*
2  * Copyright (C) 2012 Fusion-io
3  *
4  *  This program is free software; you can redistribute it and/or
5  *  modify it under the terms of the GNU General Public
6  *  License v2 as published by the Free Software Foundation.
7  *
8  *  This program is distributed in the hope that it will be useful,
9  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
10  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11  *  GNU General Public License for more details.
12  *
13  *  You should have received a copy of the GNU General Public License
14  *  along with this program; if not, write to the Free Software
15  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16  *
17  *  Parts of this file were imported from Jens Axboe's blktrace sources (also GPL)
18  */
19 #include <sys/types.h>
20 #include <sys/stat.h>
21 #include <fcntl.h>
22 #include <unistd.h>
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include <math.h>
26 #include <inttypes.h>
27 #include <string.h>
28 #include <asm/types.h>
29 #include <errno.h>
30 #include <sys/mman.h>
31 #include <time.h>
32 #include <math.h>
33 #include <dirent.h>
34
35 #include "plot.h"
36 #include "blkparse.h"
37 #include "list.h"
38 #include "tracers.h"
39
40 #define IO_HASH_TABLE_BITS  11
41 #define IO_HASH_TABLE_SIZE (1 << IO_HASH_TABLE_BITS)
42 static struct list_head io_hash_table[IO_HASH_TABLE_SIZE];
43 static u64 ios_in_flight = 0;
44
45 #define PROCESS_HASH_TABLE_BITS 7
46 #define PROCESS_HASH_TABLE_SIZE (1 << PROCESS_HASH_TABLE_BITS)
47 static struct list_head process_hash_table[PROCESS_HASH_TABLE_SIZE];
48
49 extern int plot_io_action;
50 extern int io_per_process;
51
52 /*
53  * Trace categories
54  */
55 enum {
56         BLK_TC_READ     = 1 << 0,       /* reads */
57         BLK_TC_WRITE    = 1 << 1,       /* writes */
58         BLK_TC_FLUSH    = 1 << 2,       /* flush */
59         BLK_TC_SYNC     = 1 << 3,       /* sync */
60         BLK_TC_QUEUE    = 1 << 4,       /* queueing/merging */
61         BLK_TC_REQUEUE  = 1 << 5,       /* requeueing */
62         BLK_TC_ISSUE    = 1 << 6,       /* issue */
63         BLK_TC_COMPLETE = 1 << 7,       /* completions */
64         BLK_TC_FS       = 1 << 8,       /* fs requests */
65         BLK_TC_PC       = 1 << 9,       /* pc requests */
66         BLK_TC_NOTIFY   = 1 << 10,      /* special message */
67         BLK_TC_AHEAD    = 1 << 11,      /* readahead */
68         BLK_TC_META     = 1 << 12,      /* metadata */
69         BLK_TC_DISCARD  = 1 << 13,      /* discard requests */
70         BLK_TC_DRV_DATA = 1 << 14,      /* binary driver data */
71         BLK_TC_FUA      = 1 << 15,      /* fua requests */
72
73         BLK_TC_END      = 1 << 15,      /* we've run out of bits! */
74 };
75
76 #define BLK_TC_SHIFT            (16)
77 #define BLK_TC_ACT(act)         ((act) << BLK_TC_SHIFT)
78 #define BLK_DATADIR(a) (((a) >> BLK_TC_SHIFT) & (BLK_TC_READ | BLK_TC_WRITE))
79
80 /*
81  * Basic trace actions
82  */
83 enum {
84         __BLK_TA_QUEUE = 1,             /* queued */
85         __BLK_TA_BACKMERGE,             /* back merged to existing rq */
86         __BLK_TA_FRONTMERGE,            /* front merge to existing rq */
87         __BLK_TA_GETRQ,                 /* allocated new request */
88         __BLK_TA_SLEEPRQ,               /* sleeping on rq allocation */
89         __BLK_TA_REQUEUE,               /* request requeued */
90         __BLK_TA_ISSUE,                 /* sent to driver */
91         __BLK_TA_COMPLETE,              /* completed by driver */
92         __BLK_TA_PLUG,                  /* queue was plugged */
93         __BLK_TA_UNPLUG_IO,             /* queue was unplugged by io */
94         __BLK_TA_UNPLUG_TIMER,          /* queue was unplugged by timer */
95         __BLK_TA_INSERT,                /* insert request */
96         __BLK_TA_SPLIT,                 /* bio was split */
97         __BLK_TA_BOUNCE,                /* bio was bounced */
98         __BLK_TA_REMAP,                 /* bio was remapped */
99         __BLK_TA_ABORT,                 /* request aborted */
100         __BLK_TA_DRV_DATA,              /* binary driver data */
101 };
102
103 #define BLK_TA_MASK ((1 << BLK_TC_SHIFT) - 1)
104
105 /*
106  * Notify events.
107  */
108 enum blktrace_notify {
109         __BLK_TN_PROCESS = 0,           /* establish pid/name mapping */
110         __BLK_TN_TIMESTAMP,             /* include system clock */
111         __BLK_TN_MESSAGE,               /* Character string message */
112 };
113
114 /*
115  * Trace actions in full. Additionally, read or write is masked
116  */
117 #define BLK_TA_QUEUE            (__BLK_TA_QUEUE | BLK_TC_ACT(BLK_TC_QUEUE))
118 #define BLK_TA_BACKMERGE        (__BLK_TA_BACKMERGE | BLK_TC_ACT(BLK_TC_QUEUE))
119 #define BLK_TA_FRONTMERGE       (__BLK_TA_FRONTMERGE | BLK_TC_ACT(BLK_TC_QUEUE))
120 #define BLK_TA_GETRQ            (__BLK_TA_GETRQ | BLK_TC_ACT(BLK_TC_QUEUE))
121 #define BLK_TA_SLEEPRQ          (__BLK_TA_SLEEPRQ | BLK_TC_ACT(BLK_TC_QUEUE))
122 #define BLK_TA_REQUEUE          (__BLK_TA_REQUEUE | BLK_TC_ACT(BLK_TC_REQUEUE))
123 #define BLK_TA_ISSUE            (__BLK_TA_ISSUE | BLK_TC_ACT(BLK_TC_ISSUE))
124 #define BLK_TA_COMPLETE         (__BLK_TA_COMPLETE| BLK_TC_ACT(BLK_TC_COMPLETE))
125 #define BLK_TA_PLUG             (__BLK_TA_PLUG | BLK_TC_ACT(BLK_TC_QUEUE))
126 #define BLK_TA_UNPLUG_IO        (__BLK_TA_UNPLUG_IO | BLK_TC_ACT(BLK_TC_QUEUE))
127 #define BLK_TA_UNPLUG_TIMER     (__BLK_TA_UNPLUG_TIMER | BLK_TC_ACT(BLK_TC_QUEUE))
128 #define BLK_TA_INSERT           (__BLK_TA_INSERT | BLK_TC_ACT(BLK_TC_QUEUE))
129 #define BLK_TA_SPLIT            (__BLK_TA_SPLIT)
130 #define BLK_TA_BOUNCE           (__BLK_TA_BOUNCE)
131 #define BLK_TA_REMAP            (__BLK_TA_REMAP | BLK_TC_ACT(BLK_TC_QUEUE))
132 #define BLK_TA_ABORT            (__BLK_TA_ABORT | BLK_TC_ACT(BLK_TC_QUEUE))
133 #define BLK_TA_DRV_DATA         (__BLK_TA_DRV_DATA | BLK_TC_ACT(BLK_TC_DRV_DATA))
134
135 #define BLK_TN_PROCESS          (__BLK_TN_PROCESS | BLK_TC_ACT(BLK_TC_NOTIFY))
136 #define BLK_TN_TIMESTAMP        (__BLK_TN_TIMESTAMP | BLK_TC_ACT(BLK_TC_NOTIFY))
137 #define BLK_TN_MESSAGE          (__BLK_TN_MESSAGE | BLK_TC_ACT(BLK_TC_NOTIFY))
138
139 #define BLK_IO_TRACE_MAGIC      0x65617400
140 #define BLK_IO_TRACE_VERSION    0x07
141 /*
142  * The trace itself
143  */
144 struct blk_io_trace {
145         __u32 magic;            /* MAGIC << 8 | version */
146         __u32 sequence;         /* event number */
147         __u64 time;             /* in nanoseconds */
148         __u64 sector;           /* disk offset */
149         __u32 bytes;            /* transfer length */
150         __u32 action;           /* what happened */
151         __u32 pid;              /* who did it */
152         __u32 device;           /* device identifier (dev_t) */
153         __u32 cpu;              /* on what cpu did it happen */
154         __u16 error;            /* completion error */
155         __u16 pdu_len;          /* length of data after this trace */
156 };
157
158 struct pending_io {
159         /* sector offset of this IO */
160         u64 sector;
161
162         /* dev_t for this IO */
163         u32 device;
164
165         /* time this IO was dispatched */
166         u64 dispatch_time;
167         /* time this IO was finished */
168         u64 completion_time;
169         struct list_head hash_list;
170         /* process which queued this IO */
171         u32 pid;
172 };
173
174 struct pid_map {
175         struct list_head hash_list;
176         u32 pid;
177         int index;
178         char name[0];
179 };
180
181 u64 get_record_time(struct trace *trace)
182 {
183         return trace->io->time;
184 }
185
186 void init_io_hash_table(void)
187 {
188         int i;
189         struct list_head *head;
190
191         for (i = 0; i < IO_HASH_TABLE_SIZE; i++) {
192                 head = io_hash_table + i;
193                 INIT_LIST_HEAD(head);
194         }
195 }
196
197 /* taken from the kernel hash.h */
198 static inline u64 hash_sector(u64 val)
199 {
200         u64 hash = val;
201
202         /*  Sigh, gcc can't optimise this alone like it does for 32 bits. */
203         u64 n = hash;
204         n <<= 18;
205         hash -= n;
206         n <<= 33;
207         hash -= n;
208         n <<= 3;
209         hash += n;
210         n <<= 3;
211         hash -= n;
212         n <<= 4;
213         hash += n;
214         n <<= 2;
215         hash += n;
216
217         /* High bits are more random, so use them. */
218         return hash >> (64 - IO_HASH_TABLE_BITS);
219 }
220
221 static int io_hash_table_insert(struct pending_io *ins_pio)
222 {
223         u64 sector = ins_pio->sector;
224         u32 dev = ins_pio->device;
225         int slot = hash_sector(sector);
226         struct list_head *head;
227         struct pending_io *pio;
228
229         head = io_hash_table + slot;
230         list_for_each_entry(pio, head, hash_list) {
231                 if (pio->sector == sector && pio->device == dev)
232                         return -EEXIST;
233         }
234         list_add_tail(&ins_pio->hash_list, head);
235         return 0;
236 }
237
238 static struct pending_io *io_hash_table_search(u64 sector, u32 dev)
239 {
240         int slot = hash_sector(sector);
241         struct list_head *head;
242         struct pending_io *pio;
243
244         head = io_hash_table + slot;
245         list_for_each_entry(pio, head, hash_list) {
246                 if (pio->sector == sector && pio->device == dev)
247                         return pio;
248         }
249         return NULL;
250 }
251
252 static struct pending_io *hash_queued_io(struct blk_io_trace *io)
253 {
254         struct pending_io *pio;
255         int ret;
256
257         pio = calloc(1, sizeof(*pio));
258         pio->sector = io->sector;
259         pio->device = io->device;
260         pio->pid = io->pid;
261
262         ret = io_hash_table_insert(pio);
263         if (ret < 0) {
264                 /* crud, the IO is there already */
265                 free(pio);
266                 return NULL;
267         }
268         return pio;
269 }
270
271 static struct pending_io *hash_dispatched_io(struct blk_io_trace *io)
272 {
273         struct pending_io *pio;
274
275         pio = io_hash_table_search(io->sector, io->device);
276         if (!pio) {
277                 pio = hash_queued_io(io);
278                 if (!pio)
279                         return NULL;
280         }
281         pio->dispatch_time = io->time;
282         return pio;
283 }
284
285 static struct pending_io *hash_completed_io(struct blk_io_trace *io)
286 {
287         struct pending_io *pio;
288
289         pio = io_hash_table_search(io->sector, io->device);
290
291         if (!pio)
292                 return NULL;
293         return pio;
294 }
295
296 void init_process_hash_table(void)
297 {
298         int i;
299         struct list_head *head;
300
301         for (i = 0; i < PROCESS_HASH_TABLE_SIZE; i++) {
302                 head = process_hash_table + i;
303                 INIT_LIST_HEAD(head);
304         }
305 }
306
307 static u32 hash_pid(u32 pid)
308 {
309         u32 hash = pid;
310
311         hash ^= pid >> 3;
312         hash ^= pid >> 3;
313         hash ^= pid >> 4;
314         hash ^= pid >> 6;
315         return (hash & (PROCESS_HASH_TABLE_SIZE - 1));
316 }
317
318 static struct pid_map *process_hash_search(u32 pid)
319 {
320         int slot = hash_pid(pid);
321         struct list_head *head;
322         struct pid_map *pm;
323
324         head = process_hash_table + slot;
325         list_for_each_entry(pm, head, hash_list) {
326                 if (pm->pid == pid)
327                         return pm;
328         }
329         return NULL;
330 }
331
332 static struct pid_map *process_hash_insert(u32 pid, char *name)
333 {
334         int slot = hash_pid(pid);
335         struct pid_map *pm;
336         int old_index = 0;
337         char buf[16];
338
339         pm = process_hash_search(pid);
340         if (pm) {
341                 /* Entry exists and name shouldn't be changed? */
342                 if (!name || !strcmp(name, pm->name))
343                         return pm;
344                 list_del(&pm->hash_list);
345                 old_index = pm->index;
346                 free(pm);
347         }
348         if (!name) {
349                 sprintf(buf, "[%u]", pid);
350                 name = buf;
351         }
352         pm = malloc(sizeof(struct pid_map) + strlen(name) + 1);
353         pm->pid = pid;
354         pm->index = old_index;
355         strcpy(pm->name, name);
356         list_add_tail(&pm->hash_list, process_hash_table + slot);
357
358         return pm;
359 }
360
361 static void handle_notify(struct trace *trace)
362 {
363         struct blk_io_trace *io = trace->io;
364         void *payload = (char *)io + sizeof(*io);
365         u32 two32[2];
366
367         if (io->action == BLK_TN_PROCESS) {
368                 if (io_per_process)
369                         process_hash_insert(io->pid, payload);
370                 return;
371         }
372
373         if (io->action != BLK_TN_TIMESTAMP)
374                 return;
375
376         if (io->pdu_len != sizeof(two32))
377                 return;
378
379         memcpy(two32, payload, sizeof(two32));
380         trace->start_timestamp = io->time;
381         trace->abs_start_time.tv_sec = two32[0];
382         trace->abs_start_time.tv_nsec = two32[1];
383         if (trace->abs_start_time.tv_nsec < 0) {
384                 trace->abs_start_time.tv_sec--;
385                 trace->abs_start_time.tv_nsec += 1000000000;
386         }
387 }
388
389 int next_record(struct trace *trace)
390 {
391         int skip = trace->io->pdu_len;
392         u64 offset;
393
394         trace->cur += sizeof(*trace->io) + skip;
395         offset = trace->cur - trace->start;
396         if (offset >= trace->len)
397                 return 1;
398
399         trace->io = (struct blk_io_trace *)trace->cur;
400         return 0;
401 }
402
403 void first_record(struct trace *trace)
404 {
405         trace->cur = trace->start;
406         trace->io = (struct blk_io_trace *)trace->cur;
407 }
408
409 static int is_io_event(struct blk_io_trace *test)
410 {
411         char *message;
412         if (!(test->action & BLK_TC_ACT(BLK_TC_NOTIFY)))
413                 return 1;
414         if (test->action == BLK_TN_MESSAGE) {
415                 int len = test->pdu_len;
416                 if (len < 3)
417                         return 0;
418                 message = (char *)(test + 1);
419                 if (strncmp(message, "fio ", 4) == 0) {
420                         return 1;
421                 }
422         }
423         return 0;
424 }
425
426 u64 find_last_time(struct trace *trace)
427 {
428         char *p = trace->start + trace->len;
429         struct blk_io_trace *test;
430         int search_len = 0;
431         u64 found = 0;
432
433         if (trace->len < sizeof(*trace->io))
434                 return 0;
435         p -= sizeof(*trace->io);
436         while (p >= trace->start) {
437                 test = (struct blk_io_trace *)p;
438                 if (CHECK_MAGIC(test) && is_io_event(test)) {
439                         u64 offset = p - trace->start;
440                         if (offset + sizeof(*test) + test->pdu_len == trace->len) {
441                                 return test->time;
442                         }
443                 }
444                 p--;
445                 search_len++;
446                 if (search_len > 8192) {
447                         break;
448                 }
449         }
450
451         /* searching backwards didn't work out, we'll have to scan the file */
452         first_record(trace);
453         while (1) {
454                 if (is_io_event(trace->io))
455                         found = trace->io->time;
456                 if (next_record(trace))
457                         break;
458         }
459         first_record(trace);
460         return found;
461 }
462
463 static int parse_fio_bank_message(struct trace *trace, u64 *bank_ret, u64 *offset_ret,
464                            u64 *num_banks_ret)
465 {
466         char *s;
467         char *next;
468         char *message;
469         struct blk_io_trace *test = trace->io;
470         int len = test->pdu_len;
471         u64 bank;
472         u64 offset;
473         u64 num_banks;
474
475         if (!(test->action & BLK_TC_ACT(BLK_TC_NOTIFY)))
476                 return -1;
477         if (test->action != BLK_TN_MESSAGE)
478                 return -1;
479
480         /* the message is fio rw bank offset num_banks */
481         if (len < 3)
482                 return -1;
483         message = (char *)(test + 1);
484         if (strncmp(message, "fio r ", 6) != 0)
485                 return -1;
486
487         message = strndup(message, len);
488         s = strchr(message, ' ');
489         if (!s)
490                 goto out;
491         s++;
492         s = strchr(s, ' ');
493         if (!s)
494                 goto out;
495
496         bank = strtoll(s, &next, 10);
497         if (s == next)
498                 goto out;
499         s = next;
500
501         offset = strtoll(s, &next, 10);
502         if (s == next)
503                 goto out;
504         s = next;
505
506         num_banks = strtoll(s, &next, 10);
507         if (s == next)
508                 goto out;
509
510         *bank_ret = bank;
511         *offset_ret = offset;
512         *num_banks_ret = num_banks;
513
514         return 0;
515 out:
516         free(message);
517         return -1;
518 }
519
520 static struct dev_info *lookup_dev(struct trace *trace, struct blk_io_trace *io)
521 {
522         u32 dev = io->device;
523         int i;
524         struct dev_info *di = NULL;
525
526         for (i = 0; i < trace->num_devices; i++) {
527                 if (trace->devices[i].device == dev) {
528                         di = trace->devices + i;
529                         goto found;
530                 }
531         }
532         i = trace->num_devices++;
533         if (i >= MAX_DEVICES_PER_TRACE) {
534                 fprintf(stderr, "Trace contains too many devices (%d)\n", i);
535                 exit(1);
536         }
537         di = trace->devices + i;
538         di->device = dev;
539 found:
540         return di;
541 }
542
543 static void map_devices(struct trace *trace)
544 {
545         struct dev_info *di;
546         u64 found;
547         u64 map_start = 0;
548         int i;
549
550         first_record(trace);
551         while (1) {
552                 if (!(trace->io->action & BLK_TC_ACT(BLK_TC_NOTIFY))) {
553                         di = lookup_dev(trace, trace->io);
554                         found = trace->io->sector << 9;
555                         if (found < di->min)
556                                 di->min = found;
557
558                         found += trace->io->bytes;
559                         if (di->max < found)
560                                 di->max = found;
561                 }
562                 if (next_record(trace))
563                         break;
564         }
565         first_record(trace);
566         for (i = 0; i < trace->num_devices; i++) {
567                 di = trace->devices + i;
568                 di->map = map_start;
569                 map_start += di->max - di->min;
570         }
571 }
572
573 static u64 map_io(struct trace *trace, struct blk_io_trace *io)
574 {
575         struct dev_info *di = lookup_dev(trace, io);
576         u64 val = trace->io->sector << 9;
577         return di->map + val - di->min;
578 }
579
580 void find_extreme_offsets(struct trace *trace, u64 *min_ret, u64 *max_ret, u64 *max_bank_ret,
581                           u64 *max_offset_ret)
582 {
583         u64 found = 0;
584         u64 max = 0, min = ~(u64)0;
585         u64 max_bank = 0;
586         u64 max_bank_offset = 0;
587         u64 num_banks = 0;
588
589         map_devices(trace);
590
591         first_record(trace);
592         while (1) {
593                 if (!(trace->io->action & BLK_TC_ACT(BLK_TC_NOTIFY))) {
594                         found = map_io(trace, trace->io);
595                         if (found < min)
596                                 min = found;
597
598                         found += trace->io->bytes;
599                         if (max < found)
600                                 max = found;
601                 } else {
602                         u64 bank;
603                         u64 offset;
604                         if (!parse_fio_bank_message(trace, &bank,
605                                                     &offset, &num_banks)) {
606                                 if (bank > max_bank)
607                                         max_bank = bank;
608                                 if (offset > max_bank_offset)
609                                         max_bank_offset = offset;
610                         }
611                 }
612                 if (next_record(trace))
613                         break;
614         }
615         first_record(trace);
616         *min_ret = min;
617         *max_ret = max;
618         *max_bank_ret = max_bank;
619         *max_offset_ret = max_bank_offset;
620 }
621
622 static void check_io_types(struct trace *trace)
623 {
624         struct blk_io_trace *io = trace->io;
625         int action = io->action & BLK_TA_MASK;
626
627         if (!(io->action & BLK_TC_ACT(BLK_TC_NOTIFY))) {
628                 switch (action) {
629                 case __BLK_TA_COMPLETE:
630                         trace->found_completion = 1;
631                         break;
632                 case __BLK_TA_ISSUE:
633                         trace->found_issue = 1;
634                         break;
635                 case __BLK_TA_QUEUE:
636                         trace->found_queue = 1;
637                         break;
638                 };
639         }
640 }
641
642
643 int filter_outliers(struct trace *trace, u64 min_offset, u64 max_offset,
644                     u64 *yzoom_min, u64 *yzoom_max)
645 {
646         int hits[11];
647         u64 max_per_bucket[11];
648         u64 min_per_bucket[11];
649         u64 bytes_per_bucket = (max_offset - min_offset + 1) / 10;
650         int slot;
651         int fat_count = 0;
652
653         memset(hits, 0, sizeof(int) * 11);
654         memset(max_per_bucket, 0, sizeof(u64) * 11);
655         memset(min_per_bucket, 0xff, sizeof(u64) * 11);
656         first_record(trace);
657         while (1) {
658                 check_io_types(trace);
659                 if (!(trace->io->action & BLK_TC_ACT(BLK_TC_NOTIFY)) &&
660                     (trace->io->action & BLK_TA_MASK) == __BLK_TA_QUEUE) {
661                         u64 off = map_io(trace, trace->io) - min_offset;
662
663                         slot = (int)(off / bytes_per_bucket);
664                         hits[slot]++;
665                         if (off < min_per_bucket[slot])
666                                 min_per_bucket[slot] = off;
667
668                         off += trace->io->bytes;
669                         slot = (int)(off / bytes_per_bucket);
670                         hits[slot]++;
671                         if (off > max_per_bucket[slot])
672                                 max_per_bucket[slot] = off;
673                 }
674                 if (next_record(trace))
675                         break;
676         }
677         first_record(trace);
678         for (slot = 0; slot < 11; slot++) {
679                 if (hits[slot] > fat_count) {
680                         fat_count = hits[slot];
681                 }
682         }
683
684         *yzoom_max = max_offset;
685         for (slot = 10; slot >= 0; slot--) {
686                 double d = hits[slot];
687
688                 if (d >= (double)fat_count * .05) {
689                         *yzoom_max = max_per_bucket[slot] + min_offset;
690                         break;
691                 }
692         }
693
694         *yzoom_min = min_offset;
695         for (slot = 0; slot < 10; slot++) {
696                 double d = hits[slot];
697
698                 if (d >= (double)fat_count * .05) {
699                         *yzoom_min = min_per_bucket[slot] + min_offset;
700                         break;
701                 }
702         }
703         return 0;
704 }
705
706 static char footer[] = ".blktrace.0";
707 static int footer_len = sizeof(footer) - 1;
708
709 static int match_trace(char *name, int *len)
710 {
711         int match_len;
712         int footer_start;
713
714         match_len = strlen(name);
715         if (match_len <= footer_len)
716                 return 0;
717
718         footer_start = match_len - footer_len;
719         if (strcmp(name + footer_start, footer) != 0)
720                 return 0;
721
722         if (len)
723                 *len = match_len;
724         return 1;
725 }
726
727 struct tracelist {
728         struct tracelist *next;
729         char *name;
730 };
731
732 static struct tracelist *traces_list(char *dir_name, int *len)
733 {
734         int count = 0;
735         struct tracelist *traces = NULL;
736         int dlen = strlen(dir_name);
737         DIR *dir = opendir(dir_name);
738         if (!dir)
739                 return NULL;
740
741         while (1) {
742                 int n = 0;
743                 struct tracelist *tl;
744                 struct dirent *d = readdir(dir);
745                 if (!d)
746                         break;
747
748                 if (!match_trace(d->d_name, &n))
749                         continue;
750
751                 n += dlen + 1; /* dir + '/' + file */
752                 /* Allocate space for tracelist + filename */
753                 tl = calloc(1, sizeof(struct tracelist) + (sizeof(char) * (n + 1)));
754                 if (!tl) {
755                         closedir(dir);
756                         return NULL;
757                 }
758                 tl->next = traces;
759                 tl->name = (char *)(tl + 1);
760                 snprintf(tl->name, n, "%s/%s", dir_name, d->d_name);
761                 traces = tl;
762                 count++;
763         }
764
765         closedir(dir);
766
767         if (len)
768                 *len = count;
769
770         return traces;
771 }
772
773 static void traces_free(struct tracelist *traces)
774 {
775         while (traces) {
776                 struct tracelist *tl = traces;
777                 traces = traces->next;
778                 free(tl);
779         }
780 }
781
782 static int dump_traces(struct tracelist *traces, int count, char *dumpfile)
783 {
784         struct tracelist *tl;
785         char **argv = NULL;
786         int argc = 0;
787         int i;
788         int err = 0;
789
790         argc = count * 2; /* {"-i", trace } */
791         argc += 4; /* See below */
792         argv = calloc(argc + 1, sizeof(char *));
793         if (!argv)
794                 return -errno;
795
796         i = 0;
797         argv[i++] = "blkparse";
798         argv[i++] = "-O";
799         argv[i++] = "-d";
800         argv[i++] = dumpfile;
801         for (tl = traces; tl != NULL; tl = tl->next) {
802                 argv[i++] = "-i";
803                 argv[i++] = tl->name;
804         }
805
806         err = run_program(argc, argv, 1, NULL, NULL);
807         if (err)
808                 fprintf(stderr, "%s exited with %d, expected 0\n", argv[0], err);
809         free(argv);
810         return err;
811 }
812
813 static char *find_trace_file(char *filename)
814 {
815         int ret;
816         struct stat st;
817         char *dot;
818         int found_dir = 0;
819         char *dumpfile;
820         int len = strlen(filename);
821
822         /* look for an exact match of whatever they pass in.
823          * If it is a file, assume it is the dump file.
824          * If a directory, remember that it existed so we
825          * can combine traces in that directory later
826          */
827         ret = stat(filename, &st);
828         if (ret == 0) {
829                 if (S_ISREG(st.st_mode))
830                         return strdup(filename);
831
832                 if (S_ISDIR(st.st_mode))
833                         found_dir = 1;
834         }
835
836         if (found_dir) {
837                 int i;
838                 /* Eat up trailing '/'s */
839                 for (i = len - 1; filename[i] == '/'; i--)
840                         filename[i] = '\0';
841         }
842
843         /*
844          * try tacking .dump onto the end and see if that already
845          * has been generated
846          */
847         ret = asprintf(&dumpfile, "%s.dump", filename);
848         if (ret == -1) {
849                 perror("Error building dump file name");
850                 return NULL;
851         }
852         ret = stat(dumpfile, &st);
853         if (ret == 0)
854                 return dumpfile;
855
856         /*
857          * try to generate the .dump from all the traces in
858          * a single dir.
859          */
860         if (found_dir) {
861                 int count;
862                 struct tracelist *traces = traces_list(filename, &count);
863                 if (traces) {
864                         ret = dump_traces(traces, count, dumpfile);
865                         traces_free(traces);
866                         if (ret == 0)
867                                 return dumpfile;
868                 }
869         }
870         free(dumpfile);
871
872         /*
873          * try to generate the .dump from all the blktrace
874          * files for a named trace
875          */
876         dot = strrchr(filename, '.');
877         if (!dot || strcmp(".dump", dot) != 0) {
878                 struct tracelist trace = {0};
879                 if (dot && dot != filename)
880                         len = dot - filename;
881
882                 ret = asprintf(&trace.name, "%*s.blktrace.0", len, filename);
883                 if (ret == -1)
884                         return NULL;
885                 ret = asprintf(&dumpfile, "%*s.dump", len, filename);
886                 if (ret == -1) {
887                         free(trace.name);
888                         return NULL;
889                 }
890
891                 ret = dump_traces(&trace, 1, dumpfile);
892                 if (ret == 0) {
893                         free(trace.name);
894                         return dumpfile;
895                 }
896                 free(trace.name);
897                 free(dumpfile);
898         }
899         return NULL;
900 }
901 struct trace *open_trace(char *filename)
902 {
903         int fd;
904         char *p;
905         struct stat st;
906         int ret;
907         struct trace *trace;
908         char *found_filename;
909
910         trace = calloc(1, sizeof(*trace));
911         if (!trace) {
912                 fprintf(stderr, "unable to allocate memory for trace\n");
913                 return NULL;
914         }
915
916         found_filename = find_trace_file(filename);
917         if (!found_filename) {
918                 fprintf(stderr, "Unable to find trace file %s\n", filename);
919                 goto fail;
920         }
921         filename = found_filename;
922
923         fd = open(filename, O_RDONLY);
924         if (fd < 0) {
925                 fprintf(stderr, "Unable to open trace file %s err %s\n", filename, strerror(errno));
926                 goto fail;
927         }
928         ret = fstat(fd, &st);
929         if (ret < 0) {
930                 fprintf(stderr, "stat failed on %s err %s\n", filename, strerror(errno));
931                 goto fail_fd;
932         }
933         p = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
934         if (p == MAP_FAILED) {
935                 fprintf(stderr, "Unable to mmap trace file %s, err %s\n", filename, strerror(errno));
936                 goto fail_fd;
937         }
938         trace->fd = fd;
939         trace->len = st.st_size;
940         trace->start = p;
941         trace->cur = p;
942         trace->io = (struct blk_io_trace *)p;
943         return trace;
944
945 fail_fd:
946         close(fd);
947 fail:
948         free(trace);
949         return NULL;
950 }
951 static inline int tput_event(struct trace *trace)
952 {
953         if (trace->found_completion)
954                 return __BLK_TA_COMPLETE;
955         if (trace->found_issue)
956                 return __BLK_TA_ISSUE;
957         if (trace->found_queue)
958                 return __BLK_TA_QUEUE;
959
960         return __BLK_TA_COMPLETE;
961 }
962
963 int action_char_to_num(char action)
964 {
965         switch (action) {
966         case 'Q':
967                 return __BLK_TA_QUEUE;
968         case 'D':
969                 return __BLK_TA_ISSUE;
970         case 'C':
971                 return __BLK_TA_COMPLETE;
972         }
973         return -1;
974 }
975
976 static inline int io_event(struct trace *trace)
977 {
978         if (plot_io_action)
979                 return plot_io_action;
980         if (trace->found_queue)
981                 return __BLK_TA_QUEUE;
982         if (trace->found_issue)
983                 return __BLK_TA_ISSUE;
984         if (trace->found_completion)
985                 return __BLK_TA_COMPLETE;
986
987         return __BLK_TA_COMPLETE;
988 }
989
990 void add_tput(struct trace *trace, struct graph_line_data *writes_gld,
991               struct graph_line_data *reads_gld)
992 {
993         struct blk_io_trace *io = trace->io;
994         struct graph_line_data *gld;
995         int action = io->action & BLK_TA_MASK;
996         int seconds;
997
998         if (io->action & BLK_TC_ACT(BLK_TC_NOTIFY))
999                 return;
1000
1001         if (action != tput_event(trace))
1002                 return;
1003
1004         if (BLK_DATADIR(io->action) & BLK_TC_READ)
1005                 gld = reads_gld;
1006         else
1007                 gld = writes_gld;
1008
1009         seconds = SECONDS(io->time);
1010         gld->data[seconds].sum += io->bytes;
1011
1012         gld->data[seconds].count = 1;
1013         if (gld->data[seconds].sum > gld->max)
1014                 gld->max = gld->data[seconds].sum;
1015 }
1016
1017 #define GDD_PTR_ALLOC_STEP 16
1018
1019 static struct pid_map *get_pid_map(struct trace_file *tf, u32 pid)
1020 {
1021         struct pid_map *pm;
1022
1023         if (!io_per_process) {
1024                 if (!tf->io_plots)
1025                         tf->io_plots = 1;
1026                 return NULL;
1027         }
1028
1029         pm = process_hash_insert(pid, NULL);
1030         /* New entry? */
1031         if (!pm->index) {
1032                 if (tf->io_plots == tf->io_plots_allocated) {
1033                         tf->io_plots_allocated += GDD_PTR_ALLOC_STEP;
1034                         tf->gdd_reads = realloc(tf->gdd_reads, tf->io_plots_allocated * sizeof(struct graph_dot_data *));
1035                         if (!tf->gdd_reads)
1036                                 abort();
1037                         tf->gdd_writes = realloc(tf->gdd_writes, tf->io_plots_allocated * sizeof(struct graph_dot_data *));
1038                         if (!tf->gdd_writes)
1039                                 abort();
1040                         memset(tf->gdd_reads + tf->io_plots_allocated - GDD_PTR_ALLOC_STEP,
1041                                0, GDD_PTR_ALLOC_STEP * sizeof(struct graph_dot_data *));
1042                         memset(tf->gdd_writes + tf->io_plots_allocated - GDD_PTR_ALLOC_STEP,
1043                                0, GDD_PTR_ALLOC_STEP * sizeof(struct graph_dot_data *));
1044                 }
1045                 pm->index = tf->io_plots++;
1046
1047                 return pm;
1048         }
1049         return pm;
1050 }
1051
1052 void add_io(struct trace *trace, struct trace_file *tf)
1053 {
1054         struct blk_io_trace *io = trace->io;
1055         int action = io->action & BLK_TA_MASK;
1056         u64 offset;
1057         int index;
1058         char *label;
1059         struct pid_map *pm;
1060
1061         if (io->action & BLK_TC_ACT(BLK_TC_NOTIFY))
1062                 return;
1063
1064         if (action != io_event(trace))
1065                 return;
1066
1067         offset = map_io(trace, io);
1068
1069         pm = get_pid_map(tf, io->pid);
1070         if (!pm) {
1071                 index = 0;
1072                 label = "";
1073         } else {
1074                 index = pm->index;
1075                 label = pm->name;
1076         }
1077         if (BLK_DATADIR(io->action) & BLK_TC_READ) {
1078                 if (!tf->gdd_reads[index])
1079                         tf->gdd_reads[index] = alloc_dot_data(tf->min_seconds, tf->max_seconds, tf->min_offset, tf->max_offset, tf->stop_seconds, pick_color(), strdup(label));
1080                 set_gdd_bit(tf->gdd_reads[index], offset, io->bytes, io->time);
1081         } else if (BLK_DATADIR(io->action) & BLK_TC_WRITE) {
1082                 if (!tf->gdd_writes[index])
1083                         tf->gdd_writes[index] = alloc_dot_data(tf->min_seconds, tf->max_seconds, tf->min_offset, tf->max_offset, tf->stop_seconds, pick_color(), strdup(label));
1084                 set_gdd_bit(tf->gdd_writes[index], offset, io->bytes, io->time);
1085         }
1086 }
1087
1088 void add_pending_io(struct trace *trace, struct graph_line_data *gld)
1089 {
1090         unsigned int seconds;
1091         struct blk_io_trace *io = trace->io;
1092         int action = io->action & BLK_TA_MASK;
1093         double avg;
1094         struct pending_io *pio;
1095
1096         if (io->action & BLK_TC_ACT(BLK_TC_NOTIFY))
1097                 return;
1098
1099         if (action == __BLK_TA_QUEUE) {
1100                 if (trace->found_issue || trace->found_completion)
1101                         hash_queued_io(trace->io);
1102                 return;
1103         }
1104         if (action == __BLK_TA_REQUEUE) {
1105                 if (ios_in_flight > 0)
1106                         ios_in_flight--;
1107                 return;
1108         }
1109         if (action != __BLK_TA_ISSUE)
1110                 return;
1111
1112         pio = hash_dispatched_io(trace->io);
1113         if (!pio)
1114                 return;
1115
1116         if (!trace->found_completion) {
1117                 list_del(&pio->hash_list);
1118                 free(pio);
1119         }
1120
1121         ios_in_flight++;
1122
1123         seconds = SECONDS(io->time);
1124         gld->data[seconds].sum += ios_in_flight;
1125         gld->data[seconds].count++;
1126
1127         avg = (double)gld->data[seconds].sum / gld->data[seconds].count;
1128         if (gld->max < (u64)avg) {
1129                 gld->max = avg;
1130         }
1131 }
1132
1133 void add_completed_io(struct trace *trace,
1134                       struct graph_line_data *latency_gld)
1135 {
1136         struct blk_io_trace *io = trace->io;
1137         int seconds;
1138         int action = io->action & BLK_TA_MASK;
1139         struct pending_io *pio;
1140         double avg;
1141         u64 latency;
1142
1143         if (io->action & BLK_TC_ACT(BLK_TC_NOTIFY))
1144                 return;
1145
1146         if (action != __BLK_TA_COMPLETE)
1147                 return;
1148
1149         seconds = SECONDS(io->time);
1150
1151         pio = hash_completed_io(trace->io);
1152         if (!pio)
1153                 return;
1154
1155         if (ios_in_flight > 0)
1156                 ios_in_flight--;
1157         if (io->time >= pio->dispatch_time) {
1158                 latency = io->time - pio->dispatch_time;
1159                 latency_gld->data[seconds].sum += latency;
1160                 latency_gld->data[seconds].count++;
1161         }
1162
1163         list_del(&pio->hash_list);
1164         free(pio);
1165
1166         avg = (double)latency_gld->data[seconds].sum /
1167                 latency_gld->data[seconds].count;
1168         if (latency_gld->max < (u64)avg) {
1169                 latency_gld->max = avg;
1170         }
1171 }
1172
1173 void add_iop(struct trace *trace, struct graph_line_data *gld)
1174 {
1175         struct blk_io_trace *io = trace->io;
1176         int action = io->action & BLK_TA_MASK;
1177         int seconds;
1178
1179         if (io->action & BLK_TC_ACT(BLK_TC_NOTIFY))
1180                 return;
1181
1182         /* iops and tput use the same events */
1183         if (action != tput_event(trace))
1184                 return;
1185
1186         seconds = SECONDS(io->time);
1187         gld->data[seconds].sum += 1;
1188         gld->data[seconds].count = 1;
1189         if (gld->data[seconds].sum > gld->max)
1190                 gld->max = gld->data[seconds].sum;
1191 }
1192
1193 void check_record(struct trace *trace)
1194 {
1195         handle_notify(trace);
1196 }