btrace2fio: add support for collapsing close entries
[fio.git] / t / btrace2fio.c
CommitLineData
40bafa33
JA
1#include <stdio.h>
2#include <stdio.h>
3#include <unistd.h>
4#include <inttypes.h>
ccfcf5bf 5#include <math.h>
40bafa33
JA
6#include <assert.h>
7
8#include "../io_ddir.h"
9#include "../flist.h"
10#include "../hash.h"
11#include "../fifo.h"
12#include "../blktrace_api.h"
13#include "../os/os.h"
14#include "../log.h"
15#include "../lib/linux-dev-lookup.h"
16
17#define TRACE_FIFO_SIZE 8192
18
19static unsigned int rt_threshold = 1000000;
20static unsigned int ios_threshold = 10;
8fc46b4d
JA
21static unsigned int rate_threshold;
22static unsigned int set_rate;
85fcdac8 23static unsigned int max_depth = 256;
40bafa33 24static int output_ascii = 1;
35661615 25static char *filename;
40bafa33 26
ccfcf5bf
JA
27/*
28 * Collapse defaults
29 */
30static unsigned int collapse_entries = 0;
31static unsigned int depth_diff = 1;
32static unsigned int random_diff = 5;
33
40bafa33
JA
34struct bs {
35 unsigned int bs;
36 unsigned int nr;
37 int merges;
38};
39
40struct trace_file {
41 char *name;
42 int major, minor;
43};
44
45struct btrace_out {
46 unsigned long ios[DDIR_RWDIR_CNT];
40bafa33
JA
47 unsigned long merges[DDIR_RWDIR_CNT];
48
49 uint64_t last_end[DDIR_RWDIR_CNT];
50 uint64_t seq[DDIR_RWDIR_CNT];
51
52 struct bs *bs[DDIR_RWDIR_CNT];
53 unsigned int nr_bs[DDIR_RWDIR_CNT];
54
55 int inflight;
56 unsigned int depth;
85fcdac8
JA
57 int depth_disabled;
58 int complete_seen;
59
8fc46b4d
JA
60 uint64_t first_ttime[DDIR_RWDIR_CNT];
61 uint64_t last_ttime[DDIR_RWDIR_CNT];
62 uint64_t kb[DDIR_RWDIR_CNT];
40bafa33 63
40bafa33
JA
64 uint64_t start_delay;
65};
66
67struct btrace_pid {
68 struct flist_head hash_list;
69 struct flist_head pid_list;
70 pid_t pid;
65ff9cd5 71
ccfcf5bf
JA
72 pid_t *merge_pids;
73 unsigned int nr_merge_pids;
74
65ff9cd5
JA
75 struct trace_file *files;
76 int nr_files;
77 unsigned int last_major, last_minor;
ccfcf5bf
JA
78 int numjobs;
79 int ignore;
65ff9cd5 80
40bafa33
JA
81 struct btrace_out o;
82};
83
84struct inflight {
85 struct flist_head list;
86 struct btrace_pid *p;
87 uint64_t end_sector;
88};
89
90#define PID_HASH_BITS 10
91#define PID_HASH_SIZE (1U << PID_HASH_BITS)
92
93static struct flist_head pid_hash[PID_HASH_SIZE];
94static FLIST_HEAD(pid_list);
95
17bf0853
JA
96#define INFLIGHT_HASH_BITS 8
97#define INFLIGHT_HASH_SIZE (1U << INFLIGHT_HASH_BITS)
98static struct flist_head inflight_hash[INFLIGHT_HASH_SIZE];
40bafa33
JA
99
100static uint64_t first_ttime = -1ULL;
101
102static struct inflight *inflight_find(uint64_t sector)
103{
17bf0853 104 struct flist_head *inflight_list;
40bafa33
JA
105 struct flist_head *e;
106
17bf0853
JA
107 inflight_list = &inflight_hash[hash_long(sector, INFLIGHT_HASH_BITS)];
108
109 flist_for_each(e, inflight_list) {
40bafa33
JA
110 struct inflight *i = flist_entry(e, struct inflight, list);
111
112 if (i->end_sector == sector)
113 return i;
114 }
115
116 return NULL;
117}
118
119static void inflight_remove(struct inflight *i)
120{
121 struct btrace_out *o = &i->p->o;
122
123 o->inflight--;
124 assert(o->inflight >= 0);
125 flist_del(&i->list);
126 free(i);
127}
128
17bf0853 129static void __inflight_add(struct inflight *i)
40bafa33 130{
17bf0853
JA
131 struct flist_head *list;
132
133 list = &inflight_hash[hash_long(i->end_sector, INFLIGHT_HASH_BITS)];
134 flist_add_tail(&i->list, list);
40bafa33
JA
135}
136
137static void inflight_add(struct btrace_pid *p, uint64_t sector, uint32_t len)
138{
139 struct btrace_out *o = &p->o;
140 struct inflight *i;
141
142 i = calloc(1, sizeof(*i));
143 i->p = p;
144 o->inflight++;
85fcdac8
JA
145 if (!o->depth_disabled) {
146 o->depth = max((int) o->depth, o->inflight);
147 if (o->depth >= max_depth && !o->complete_seen) {
148 o->depth_disabled = 1;
149 o->depth = max_depth;
150 }
151 }
40bafa33 152 i->end_sector = sector + (len >> 9);
17bf0853
JA
153 __inflight_add(i);
154}
155
156static void inflight_merge(struct inflight *i, int rw, unsigned int size)
157{
158 i->p->o.merges[rw]++;
159 if (size) {
160 i->end_sector += (size >> 9);
161 flist_del(&i->list);
162 __inflight_add(i);
163 }
40bafa33
JA
164}
165
166/*
167 * fifo refill frontend, to avoid reading data in trace sized bites
168 */
169static int refill_fifo(struct fifo *fifo, int fd)
170{
171 char buf[TRACE_FIFO_SIZE];
172 unsigned int total;
173 int ret;
174
175 total = sizeof(buf);
176 if (total > fifo_room(fifo))
177 total = fifo_room(fifo);
178
179 ret = read(fd, buf, total);
180 if (ret < 0) {
181 perror("read refill");
182 return -1;
183 }
184
185 if (ret > 0)
186 ret = fifo_put(fifo, buf, ret);
187
188 return ret;
189}
190
191/*
192 * Retrieve 'len' bytes from the fifo, refilling if necessary.
193 */
194static int trace_fifo_get(struct fifo *fifo, int fd, void *buf,
195 unsigned int len)
196{
197 if (fifo_len(fifo) < len) {
198 int ret = refill_fifo(fifo, fd);
199
200 if (ret < 0)
201 return ret;
202 }
203
204 return fifo_get(fifo, buf, len);
205}
206
207/*
208 * Just discard the pdu by seeking past it.
209 */
210static int discard_pdu(struct fifo *fifo, int fd, struct blk_io_trace *t)
211{
212 if (t->pdu_len == 0)
213 return 0;
214
215 return trace_fifo_get(fifo, fd, NULL, t->pdu_len);
216}
217
8fc46b4d 218static int handle_trace_notify(struct blk_io_trace *t)
40bafa33
JA
219{
220 switch (t->action) {
221 case BLK_TN_PROCESS:
222 //printf("got process notify: %x, %d\n", t->action, t->pid);
223 break;
224 case BLK_TN_TIMESTAMP:
225 //printf("got timestamp notify: %x, %d\n", t->action, t->pid);
226 break;
227 case BLK_TN_MESSAGE:
228 break;
229 default:
17bf0853 230 log_err("unknown trace act %x\n", t->action);
8fc46b4d 231 return 1;
40bafa33 232 }
8fc46b4d
JA
233
234 return 0;
40bafa33
JA
235}
236
237static void __add_bs(struct btrace_out *o, unsigned int len, int rw)
238{
239 o->bs[rw] = realloc(o->bs[rw], (o->nr_bs[rw] + 1) * sizeof(struct bs));
240 o->bs[rw][o->nr_bs[rw]].bs = len;
241 o->bs[rw][o->nr_bs[rw]].nr = 1;
242 o->nr_bs[rw]++;
243}
244
245static void add_bs(struct btrace_out *o, unsigned int len, int rw)
246{
247 struct bs *bs = o->bs[rw];
248 int i;
249
250 if (!o->nr_bs[rw]) {
251 __add_bs(o, len, rw);
252 return;
253 }
254
255 for (i = 0; i < o->nr_bs[rw]; i++) {
256 if (bs[i].bs == len) {
257 bs[i].nr++;
258 return;
259 }
260 }
261
262 __add_bs(o, len, rw);
263}
264
265#define FMINORBITS 20
266#define FMINORMASK ((1U << FMINORBITS) - 1)
267#define FMAJOR(dev) ((unsigned int) ((dev) >> FMINORBITS))
268#define FMINOR(dev) ((unsigned int) ((dev) & FMINORMASK))
269
8fc46b4d 270static int btrace_add_file(struct btrace_pid *p, uint32_t devno)
40bafa33
JA
271{
272 unsigned int maj = FMAJOR(devno);
273 unsigned int min = FMINOR(devno);
274 struct trace_file *f;
275 unsigned int i;
276 char dev[256];
277
35661615 278 if (filename)
8fc46b4d 279 return 0;
65ff9cd5 280 if (p->last_major == maj && p->last_minor == min)
8fc46b4d 281 return 0;
40bafa33 282
65ff9cd5
JA
283 p->last_major = maj;
284 p->last_minor = min;
40bafa33
JA
285
286 /*
287 * check for this file in our list
288 */
65ff9cd5
JA
289 for (i = 0; i < p->nr_files; i++) {
290 f = &p->files[i];
40bafa33
JA
291
292 if (f->major == maj && f->minor == min)
8fc46b4d 293 return 0;
40bafa33
JA
294 }
295
296 strcpy(dev, "/dev");
297 if (!blktrace_lookup_device(NULL, dev, maj, min)) {
298 log_err("fio: failed to find device %u/%u\n", maj, min);
8fc46b4d
JA
299 if (!output_ascii) {
300 log_err("fio: use -d to specify device\n");
301 return 1;
302 }
303 return 0;
40bafa33
JA
304 }
305
65ff9cd5
JA
306 p->files = realloc(p->files, (p->nr_files + 1) * sizeof(*f));
307 f = &p->files[p->nr_files];
40bafa33
JA
308 f->name = strdup(dev);
309 f->major = maj;
310 f->minor = min;
65ff9cd5 311 p->nr_files++;
8fc46b4d
JA
312 return 0;
313}
314
315static int t_to_rwdir(struct blk_io_trace *t)
316{
317 if (t->action & BLK_TC_ACT(BLK_TC_DISCARD))
318 return DDIR_TRIM;
319
320 return (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
40bafa33
JA
321}
322
8fc46b4d 323static int handle_trace_discard(struct blk_io_trace *t, struct btrace_pid *p)
40bafa33 324{
65ff9cd5
JA
325 struct btrace_out *o = &p->o;
326
8fc46b4d
JA
327 if (btrace_add_file(p, t->device))
328 return 1;
40bafa33 329
8fc46b4d
JA
330 if (o->first_ttime[2] == -1ULL)
331 o->first_ttime[2] = t->time;
40bafa33
JA
332
333 o->ios[DDIR_TRIM]++;
334 add_bs(o, t->bytes, DDIR_TRIM);
8fc46b4d 335 return 0;
40bafa33
JA
336}
337
8fc46b4d 338static int handle_trace_fs(struct blk_io_trace *t, struct btrace_pid *p)
40bafa33 339{
65ff9cd5 340 struct btrace_out *o = &p->o;
40bafa33
JA
341 int rw;
342
8fc46b4d
JA
343 if (btrace_add_file(p, t->device))
344 return 1;
40bafa33
JA
345
346 first_ttime = min(first_ttime, (uint64_t) t->time);
347
40bafa33
JA
348 rw = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
349
8fc46b4d
JA
350 if (o->first_ttime[rw] == -1ULL)
351 o->first_ttime[rw] = t->time;
352
40bafa33
JA
353 add_bs(o, t->bytes, rw);
354 o->ios[rw]++;
355
356 if (t->sector == o->last_end[rw] || o->last_end[rw] == -1ULL)
357 o->seq[rw]++;
358
359 o->last_end[rw] = t->sector + (t->bytes >> 9);
8fc46b4d 360 return 0;
40bafa33
JA
361}
362
8fc46b4d 363static int handle_queue_trace(struct blk_io_trace *t, struct btrace_pid *p)
40bafa33
JA
364{
365 if (t->action & BLK_TC_ACT(BLK_TC_NOTIFY))
8fc46b4d 366 return handle_trace_notify(t);
40bafa33 367 else if (t->action & BLK_TC_ACT(BLK_TC_DISCARD))
8fc46b4d 368 return handle_trace_discard(t, p);
40bafa33 369 else
8fc46b4d 370 return handle_trace_fs(t, p);
40bafa33
JA
371}
372
8fc46b4d 373static int handle_trace(struct blk_io_trace *t, struct btrace_pid *p)
40bafa33
JA
374{
375 unsigned int act = t->action & 0xffff;
8fc46b4d 376 int ret = 0;
40bafa33
JA
377
378 if (act == __BLK_TA_QUEUE) {
379 inflight_add(p, t->sector, t->bytes);
8fc46b4d 380 ret = handle_queue_trace(t, p);
40bafa33
JA
381 } else if (act == __BLK_TA_BACKMERGE) {
382 struct inflight *i;
383
384 i = inflight_find(t->sector + (t->bytes >> 9));
385 if (i)
386 inflight_remove(i);
387
388 i = inflight_find(t->sector);
8fc46b4d
JA
389 if (i)
390 inflight_merge(i, t_to_rwdir(t), t->bytes);
40bafa33
JA
391 } else if (act == __BLK_TA_FRONTMERGE) {
392 struct inflight *i;
393
394 i = inflight_find(t->sector + (t->bytes >> 9));
395 if (i)
396 inflight_remove(i);
397
398 i = inflight_find(t->sector);
8fc46b4d
JA
399 if (i)
400 inflight_merge(i, t_to_rwdir(t), 0);
40bafa33
JA
401 } else if (act == __BLK_TA_COMPLETE) {
402 struct inflight *i;
403
404 i = inflight_find(t->sector + (t->bytes >> 9));
cea475cd 405 if (i) {
8fc46b4d 406 i->p->o.kb[t_to_rwdir(t)] += (t->bytes >> 10);
85fcdac8 407 i->p->o.complete_seen = 1;
40bafa33 408 inflight_remove(i);
cea475cd 409 }
40bafa33 410 }
8fc46b4d
JA
411
412 return ret;
40bafa33
JA
413}
414
415static void byteswap_trace(struct blk_io_trace *t)
416{
417 t->magic = fio_swap32(t->magic);
418 t->sequence = fio_swap32(t->sequence);
419 t->time = fio_swap64(t->time);
420 t->sector = fio_swap64(t->sector);
421 t->bytes = fio_swap32(t->bytes);
422 t->action = fio_swap32(t->action);
423 t->pid = fio_swap32(t->pid);
424 t->device = fio_swap32(t->device);
425 t->cpu = fio_swap32(t->cpu);
426 t->error = fio_swap16(t->error);
427 t->pdu_len = fio_swap16(t->pdu_len);
428}
429
430static struct btrace_pid *pid_hash_find(pid_t pid, struct flist_head *list)
431{
432 struct flist_head *e;
433 struct btrace_pid *p;
434
435 flist_for_each(e, list) {
436 p = flist_entry(e, struct btrace_pid, hash_list);
437 if (p->pid == pid)
438 return p;
439 }
440
441 return NULL;
442}
443
444static struct btrace_pid *pid_hash_get(pid_t pid)
445{
446 struct flist_head *hash_list;
447 struct btrace_pid *p;
448
449 hash_list = &pid_hash[hash_long(pid, PID_HASH_BITS)];
450
451 p = pid_hash_find(pid, hash_list);
452 if (!p) {
453 int i;
454
455 p = calloc(1, sizeof(*p));
40bafa33 456
8fc46b4d
JA
457 for (i = 0; i < DDIR_RWDIR_CNT; i++) {
458 p->o.first_ttime[i] = -1ULL;
459 p->o.last_ttime[i] = -1ULL;
40bafa33 460 p->o.last_end[i] = -1ULL;
8fc46b4d 461 }
40bafa33
JA
462
463 p->pid = pid;
ccfcf5bf 464 p->numjobs = 1;
40bafa33
JA
465 flist_add_tail(&p->hash_list, hash_list);
466 flist_add_tail(&p->pid_list, &pid_list);
467 }
468
469 return p;
470}
471
472/*
473 * Load a blktrace file by reading all the blk_io_trace entries, and storing
474 * them as io_pieces like the fio text version would do.
475 */
8a1db9a1 476static int load_blktrace(const char *fname, int need_swap)
40bafa33
JA
477{
478 struct btrace_pid *p;
479 unsigned long traces;
480 struct blk_io_trace t;
481 struct fifo *fifo;
8fc46b4d 482 int fd, ret = 0;
40bafa33 483
8a1db9a1 484 fd = open(fname, O_RDONLY);
40bafa33
JA
485 if (fd < 0) {
486 perror("open trace file\n");
487 return 1;
488 }
489
490 fifo = fifo_alloc(TRACE_FIFO_SIZE);
491
492 traces = 0;
493 do {
8a1db9a1 494 ret = trace_fifo_get(fifo, fd, &t, sizeof(t));
40bafa33
JA
495 if (ret < 0)
496 goto err;
497 else if (!ret)
498 break;
499 else if (ret < (int) sizeof(t)) {
17bf0853 500 log_err("fio: short fifo get\n");
40bafa33
JA
501 break;
502 }
503
504 if (need_swap)
505 byteswap_trace(&t);
506
507 if ((t.magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
17bf0853 508 log_err("fio: bad magic in blktrace data: %x\n", t.magic);
40bafa33
JA
509 goto err;
510 }
511 if ((t.magic & 0xff) != BLK_IO_TRACE_VERSION) {
17bf0853 512 log_err("fio: bad blktrace version %d\n", t.magic & 0xff);
40bafa33
JA
513 goto err;
514 }
515 ret = discard_pdu(fifo, fd, &t);
516 if (ret < 0) {
17bf0853 517 log_err("blktrace lseek\n");
40bafa33
JA
518 goto err;
519 } else if (t.pdu_len != ret) {
17bf0853 520 log_err("fio: discarded %d of %d\n", ret, t.pdu_len);
40bafa33
JA
521 goto err;
522 }
523
524 p = pid_hash_get(t.pid);
8fc46b4d
JA
525 ret = handle_trace(&t, p);
526 if (ret)
527 break;
528 p->o.last_ttime[t_to_rwdir(&t)] = t.time;
40bafa33
JA
529 traces++;
530 } while (1);
531
532 fifo_free(fifo);
533 close(fd);
534
8fc46b4d
JA
535 if (ret)
536 return ret;
537
40bafa33
JA
538 if (output_ascii)
539 printf("Traces loaded: %lu\n", traces);
540
541 return 0;
542err:
543 close(fd);
544 fifo_free(fifo);
545 return 1;
546}
547
548static int bs_cmp(const void *ba, const void *bb)
549{
550 const struct bs *bsa = ba;
551 const struct bs *bsb = bb;
552
553 return bsb->nr - bsa->nr;
554}
555
8fc46b4d
JA
556static unsigned long o_to_kb_rate(struct btrace_out *o, int rw)
557{
558 uint64_t usec = (o->last_ttime[rw] - o->first_ttime[rw]) / 1000ULL;
559 uint64_t val;
560
97b32195
JA
561 if (!usec)
562 return 0;
563
564 usec /= 1000;
8fc46b4d
JA
565 if (!usec)
566 return 0;
567
568 val = o->kb[rw] * 1000ULL;
97b32195 569 return val / usec;
8fc46b4d
JA
570}
571
572static uint64_t o_first_ttime(struct btrace_out *o)
573{
574 uint64_t first;
575
576 first = min(o->first_ttime[0], o->first_ttime[1]);
577 return min(first, o->first_ttime[2]);
578}
579
580static uint64_t o_longest_ttime(struct btrace_out *o)
581{
582 uint64_t ret = 0;
583 int i;
584
585 for (i = 0; i < DDIR_RWDIR_CNT; i++) {
586 uint64_t diff;
587
588 diff = o->last_ttime[i] - o->first_ttime[i];
589 ret = max(diff, ret);
590 }
591
592 return ret;
593}
594
40bafa33
JA
595static void __output_p_ascii(struct btrace_pid *p, unsigned long *ios)
596{
597 const char *msg[] = { "reads", "writes", "trims" };
598 struct btrace_out *o = &p->o;
cea475cd 599 unsigned long total, usec;
40bafa33
JA
600 int i, j;
601
ccfcf5bf
JA
602 printf("[pid:\t%u", p->pid);
603 if (p->nr_merge_pids)
604 for (i = 0; i < p->nr_merge_pids; i++)
605 printf(", %u", p->merge_pids[i]);
606 printf("]\n");
40bafa33
JA
607
608 total = ddir_rw_sum(o->ios);
609 for (i = 0; i < DDIR_RWDIR_CNT; i++) {
610 float perc;
611
612 if (!o->ios[i])
613 continue;
614
615 ios[i] += o->ios[i] + o->merges[i];
616 printf("%s\n", msg[i]);
617 perc = ((float) o->ios[i] * 100.0) / (float) total;
618 printf("\tios: %lu (perc=%3.2f%%)\n", o->ios[i], perc);
619 perc = ((float) o->merges[i] * 100.0) / (float) total;
620 printf("\tmerges: %lu (perc=%3.2f%%)\n", o->merges[i], perc);
621 perc = ((float) o->seq[i] * 100.0) / (float) o->ios[i];
761c2729 622 printf("\tseq: %lu (perc=%3.2f%%)\n", (unsigned long) o->seq[i], perc);
8fc46b4d 623 printf("\trate: %lu KB/sec\n", o_to_kb_rate(o, i));
40bafa33
JA
624
625 for (j = 0; j < o->nr_bs[i]; j++) {
626 struct bs *bs = &o->bs[i][j];
627
628 perc = (((float) bs->nr * 100.0) / (float) o->ios[i]);
629 printf("\tbs=%u, perc=%3.2f%%\n", bs->bs, perc);
630 }
631 }
632
633 printf("depth:\t%u\n", o->depth);
8fc46b4d 634 usec = o_longest_ttime(o) / 1000ULL;
cea475cd 635 printf("usec:\t%lu (delay=%llu)\n", usec, (unsigned long long) o->start_delay);
40bafa33
JA
636
637 printf("files:\t");
65ff9cd5
JA
638 for (i = 0; i < p->nr_files; i++)
639 printf("%s,", p->files[i].name);
40bafa33
JA
640 printf("\n");
641
642 printf("\n");
643}
644
645static int __output_p_fio(struct btrace_pid *p, unsigned long *ios)
646{
647 struct btrace_out *o = &p->o;
648 unsigned long total;
1a8cad44 649 unsigned long long time;
40bafa33
JA
650 float perc;
651 int i, j;
652
653 if ((o->ios[0] + o->ios[1]) && o->ios[2]) {
654 log_err("fio: trace has both read/write and trim\n");
655 return 1;
656 }
8fc46b4d
JA
657 if (!p->nr_files) {
658 log_err("fio: no devices found\n");
659 return 1;
660 }
40bafa33 661
ccfcf5bf
JA
662 printf("[pid%u", p->pid);
663 if (p->nr_merge_pids)
664 for (i = 0; i < p->nr_merge_pids; i++)
665 printf(",pid%u", p->merge_pids[i]);
666 printf("]\n");
667
668 printf("numjobs=%u\n", p->numjobs);
40bafa33
JA
669 printf("direct=1\n");
670 if (o->depth == 1)
671 printf("ioengine=sync\n");
672 else
673 printf("ioengine=libaio\niodepth=%u\n", o->depth);
674
675 if (o->ios[0] && !o->ios[1])
676 printf("rw=randread\n");
677 else if (!o->ios[0] && o->ios[1])
678 printf("rw=randwrite\n");
679 else if (o->ios[2])
680 printf("rw=randtrim\n");
681 else {
682 printf("rw=randrw\n");
683 total = ddir_rw_sum(o->ios);
684 perc = ((float) o->ios[0] * 100.0) / (float) total;
ccfcf5bf 685 printf("rwmixread=%u\n", (int) floor(perc + 0.50));
40bafa33
JA
686 }
687
35661615 688 printf("percentage_random=");
40bafa33
JA
689 for (i = 0; i < DDIR_RWDIR_CNT; i++) {
690 if (o->seq[i] && o->ios[i]) {
691 perc = ((float) o->seq[i] * 100.0) / (float) o->ios[i];
692 if (perc >= 99.0)
693 perc = 100.0;
694 } else
695 perc = 100.0;
696
697 if (i)
698 printf(",");
35661615 699 perc = 100.0 - perc;
ccfcf5bf 700 printf("%u", (int) floor(perc + 0.5));
40bafa33
JA
701 }
702 printf("\n");
703
704 printf("filename=");
65ff9cd5 705 for (i = 0; i < p->nr_files; i++) {
40bafa33
JA
706 if (i)
707 printf(":");
65ff9cd5 708 printf("%s", p->files[i].name);
40bafa33
JA
709 }
710 printf("\n");
711
ccfcf5bf
JA
712 if (o->start_delay / 1000000ULL)
713 printf("startdelay=%llus\n", o->start_delay / 1000000ULL);
40bafa33 714
8fc46b4d 715 time = o_longest_ttime(o);
1a8cad44
JA
716 time = (time + 1000000000ULL - 1) / 1000000000ULL;
717 printf("runtime=%llus\n", time);
718
40bafa33
JA
719 printf("bssplit=");
720 for (i = 0; i < DDIR_RWDIR_CNT; i++) {
721
722 if (i && o->nr_bs[i - 1] && o->nr_bs[i])
723 printf(",");
724
725 for (j = 0; j < o->nr_bs[i]; j++) {
726 struct bs *bs = &o->bs[i][j];
727
728 perc = (((float) bs->nr * 100.0) / (float) o->ios[i]);
729 if (perc < 1.00)
730 continue;
731 if (j)
732 printf(":");
733 if (j + 1 == o->nr_bs[i])
734 printf("%u/", bs->bs);
735 else
ccfcf5bf 736 printf("%u/%u", bs->bs, (int) floor(perc + 0.5));
40bafa33
JA
737 }
738 }
8fc46b4d 739 printf("\n");
40bafa33 740
8fc46b4d
JA
741 if (set_rate) {
742 printf("rate=");
743 for (i = 0; i < DDIR_RWDIR_CNT; i++) {
744 unsigned long rate;
745
746 rate = o_to_kb_rate(o, i);
747 if (i)
748 printf(",");
749 if (rate)
750 printf("%luk", rate);
751 }
752 printf("\n");
753 }
754
755 printf("\n");
40bafa33
JA
756 return 0;
757}
758
759static int __output_p(struct btrace_pid *p, unsigned long *ios)
760{
761 struct btrace_out *o = &p->o;
762 int i, ret = 0;
763
764 for (i = 0; i < DDIR_RWDIR_CNT; i++) {
765 if (o->nr_bs[i] <= 1)
766 continue;
767 qsort(o->bs[i], o->nr_bs[i], sizeof(struct bs), bs_cmp);
768 }
769
35661615 770 if (filename) {
65ff9cd5
JA
771 p->files = malloc(sizeof(struct trace_file));
772 p->nr_files++;
773 p->files[0].name = filename;
35661615
JA
774 }
775
40bafa33
JA
776 if (output_ascii)
777 __output_p_ascii(p, ios);
778 else
779 ret = __output_p_fio(p, ios);
780
781 return ret;
782}
783
8fc46b4d
JA
784static void remove_ddir(struct btrace_out *o, int rw)
785{
786 o->ios[rw] = 0;
787}
788
40bafa33
JA
789static int prune_entry(struct btrace_out *o)
790{
8fc46b4d 791 unsigned long rate;
40bafa33 792 uint64_t time;
8fc46b4d 793 int i;
40bafa33
JA
794
795 if (ddir_rw_sum(o->ios) < ios_threshold)
796 return 1;
797
8fc46b4d 798 time = o_longest_ttime(o) / 1000ULL;
40bafa33
JA
799 if (time < rt_threshold)
800 return 1;
801
8fc46b4d
JA
802 rate = 0;
803 for (i = 0; i < DDIR_RWDIR_CNT; i++) {
804 unsigned long this_rate;
805
806 this_rate = o_to_kb_rate(o, i);
807 if (this_rate < rate_threshold) {
808 remove_ddir(o, i);
809 this_rate = 0;
810 }
811 rate += this_rate;
812 }
813
814 if (rate < rate_threshold)
815 return 1;
816
40bafa33
JA
817 return 0;
818}
819
820static int entry_cmp(void *priv, struct flist_head *a, struct flist_head *b)
821{
822 struct btrace_pid *pa = flist_entry(a, struct btrace_pid, pid_list);
823 struct btrace_pid *pb = flist_entry(b, struct btrace_pid, pid_list);
824
825 return ddir_rw_sum(pb->o.ios) - ddir_rw_sum(pa->o.ios);
826}
827
17bf0853
JA
828static void free_p(struct btrace_pid *p)
829{
830 struct btrace_out *o = &p->o;
831 int i;
832
65ff9cd5
JA
833 for (i = 0; i < p->nr_files; i++) {
834 if (p->files[i].name && p->files[i].name != filename)
835 free(p->files[i].name);
17bf0853
JA
836 }
837
838 for (i = 0; i < DDIR_RWDIR_CNT; i++)
839 free(o->bs[i]);
840
65ff9cd5 841 free(p->files);
17bf0853
JA
842 flist_del(&p->pid_list);
843 flist_del(&p->hash_list);
844 free(p);
845}
846
ccfcf5bf
JA
847static int entries_close(struct btrace_pid *pida, struct btrace_pid *pidb)
848{
849 float perca, percb, fdiff;
850 int i, idiff;
851
852 for (i = 0; i < DDIR_RWDIR_CNT; i++) {
853 if ((pida->o.ios[i] && !pidb->o.ios[i]) ||
854 (pidb->o.ios[i] && !pida->o.ios[i]))
855 return 0;
856 if (pida->o.ios[i] && pidb->o.ios[i]) {
857 perca = ((float) pida->o.seq[i] * 100.0) / (float) pida->o.ios[i];
858 percb = ((float) pidb->o.seq[i] * 100.0) / (float) pidb->o.ios[i];
859 fdiff = perca - percb;
860 if (fabs(fdiff) > random_diff)
861 return 0;
862 }
863
864 idiff = pida->o.depth - pidb->o.depth;
865 if (abs(idiff) > depth_diff)
866 return 0;
867 }
868
869 return 1;
870}
871
872static void merge_bs(struct bs **bsap, unsigned int *nr_bsap,
873 struct bs *bsb, unsigned int nr_bsb)
874{
875 struct bs *bsa = *bsap;
876 unsigned int nr_bsa = *nr_bsap;
877 int a, b;
878
879 for (b = 0; b < nr_bsb; b++) {
880 int next, found = 0;
881
882 for (a = 0; a < nr_bsa; a++) {
883 if (bsb[b].bs != bsa[a].bs)
884 continue;
885
886 bsa[a].nr += bsb[b].nr;
887 bsa[a].merges += bsb[b].merges;
888 found = 1;
889 break;
890 }
891
892 if (found)
893 continue;
894
895 next = *nr_bsap;
896 bsa = realloc(bsa, (next + 1) * sizeof(struct bs));
897 bsa[next].bs = bsb[b].bs;
898 bsa[next].nr = bsb[b].nr;
899 (*nr_bsap)++;
900 *bsap = bsa;
901 }
902}
903
904static int merge_entries(struct btrace_pid *pida, struct btrace_pid *pidb)
905{
906 int i;
907
908 if (!entries_close(pida, pidb))
909 return 0;
910
911 pida->nr_merge_pids++;
912 pida->merge_pids = realloc(pida->merge_pids, pida->nr_merge_pids * sizeof(pid_t));
913 pida->merge_pids[pida->nr_merge_pids - 1] = pidb->pid;
914
915 for (i = 0; i < DDIR_RWDIR_CNT; i++) {
916 struct btrace_out *oa = &pida->o;
917 struct btrace_out *ob = &pidb->o;
918
919 oa->ios[i] += ob->ios[i];
920 oa->merges[i] += ob->merges[i];
921 oa->seq[i] += ob->seq[i];
922 oa->kb[i] += ob->kb[i];
923 oa->first_ttime[i] = min(oa->first_ttime[i], ob->first_ttime[i]);
924 oa->last_ttime[i] = max(oa->last_ttime[i], ob->last_ttime[i]);
925 merge_bs(&oa->bs[i], &oa->nr_bs[i], ob->bs[i], ob->nr_bs[i]);
926 }
927
928 pida->o.start_delay = min(pida->o.start_delay, pidb->o.start_delay);
929 pida->o.depth = (pida->o.depth + pidb->o.depth) / 2;
930 return 1;
931}
932
933static void check_merges(struct btrace_pid *p, struct flist_head *pid_list)
934{
935 struct flist_head *e, *tmp;
936
937 if (p->ignore)
938 return;
939
940 flist_for_each_safe(e, tmp, pid_list) {
941 struct btrace_pid *pidb;
942
943 pidb = flist_entry(e, struct btrace_pid, pid_list);
944 if (pidb == p)
945 continue;
946
947 if (merge_entries(p, pidb)) {
948 pidb->ignore = 1;
949 p->numjobs++;
950 }
951 }
952}
953
40bafa33
JA
954static int output_p(void)
955{
956 unsigned long ios[DDIR_RWDIR_CNT];
957 struct flist_head *e, *tmp;
85fcdac8 958 int depth_disabled = 0;
40bafa33
JA
959 int ret = 0;
960
961 flist_for_each_safe(e, tmp, &pid_list) {
962 struct btrace_pid *p;
963
964 p = flist_entry(e, struct btrace_pid, pid_list);
965 if (prune_entry(&p->o)) {
17bf0853 966 free_p(p);
40bafa33
JA
967 continue;
968 }
8fc46b4d 969 p->o.start_delay = (o_first_ttime(&p->o) / 1000ULL) - first_ttime;
85fcdac8 970 depth_disabled += p->o.depth_disabled;
40bafa33
JA
971 }
972
ccfcf5bf
JA
973 if (collapse_entries) {
974 struct btrace_pid *p;
975
976 flist_for_each_safe(e, tmp, &pid_list) {
977 p = flist_entry(e, struct btrace_pid, pid_list);
978 check_merges(p, &pid_list);
979 }
980
981 flist_for_each_safe(e, tmp, &pid_list) {
982 p = flist_entry(e, struct btrace_pid, pid_list);
983 if (p->ignore)
984 free_p(p);
985 }
986 }
987
85fcdac8
JA
988 if (depth_disabled)
989 log_err("fio: missing completion traces, depths capped at %u\n", max_depth);
990
40bafa33
JA
991 memset(ios, 0, sizeof(ios));
992
993 flist_sort(NULL, &pid_list, entry_cmp);
994
995 flist_for_each(e, &pid_list) {
996 struct btrace_pid *p;
997
998 p = flist_entry(e, struct btrace_pid, pid_list);
999 ret |= __output_p(p, ios);
8fc46b4d
JA
1000 if (ret && !output_ascii)
1001 break;
40bafa33
JA
1002 }
1003
1004 if (output_ascii)
1005 printf("Total: reads=%lu, writes=%lu\n", ios[0], ios[1]);
1006
1007 return ret;
1008}
1009
1010static int usage(char *argv[])
1011{
ccfcf5bf 1012 log_err("%s: [options] <blktrace bin file>\n", argv[0]);
17bf0853
JA
1013 log_err("\t-t\tUsec threshold to ignore task\n");
1014 log_err("\t-n\tNumber IOS threshold to ignore task\n");
1015 log_err("\t-f\tFio job file output\n");
1016 log_err("\t-d\tUse this file/device for replay\n");
8fc46b4d 1017 log_err("\t-r\tIgnore jobs with less than this KB/sec rate\n");
ccfcf5bf 1018 log_err("\t-R\tSet rate in fio job (def=%u)\n", set_rate);
85fcdac8 1019 log_err("\t-D\tCap queue depth at this value (def=%u)\n", max_depth);
ccfcf5bf
JA
1020 log_err("\t-c\tCollapse \"identical\" jobs (def=%u)\n", collapse_entries);
1021 log_err("\t-u\tDepth difference for collapse (def=%u)\n", depth_diff);
1022 log_err("\t-x\tRandom difference for collapse (def=%u)\n", random_diff);
40bafa33
JA
1023 return 1;
1024}
1025
17bf0853 1026static int trace_needs_swap(const char *trace_file, int *swap)
40bafa33 1027{
40bafa33 1028 struct blk_io_trace t;
17bf0853
JA
1029 int fd, ret;
1030
1031 *swap = -1;
1032
1033 fd = open(trace_file, O_RDONLY);
1034 if (fd < 0) {
1035 perror("open");
1036 return 1;
1037 }
1038
1039 ret = read(fd, &t, sizeof(t));
1040 if (ret < 0) {
18be35e8 1041 close(fd);
17bf0853
JA
1042 perror("read");
1043 return 1;
1044 } else if (ret != sizeof(t)) {
18be35e8 1045 close(fd);
17bf0853
JA
1046 log_err("fio: short read on trace file\n");
1047 return 1;
1048 }
1049
1050 close(fd);
1051
1052 if ((t.magic & 0xffffff00) == BLK_IO_TRACE_MAGIC)
1053 *swap = 0;
1054 else {
1055 /*
1056 * Maybe it needs to be endian swapped...
1057 */
1058 t.magic = fio_swap32(t.magic);
1059 if ((t.magic & 0xffffff00) == BLK_IO_TRACE_MAGIC)
1060 *swap = 1;
1061 }
1062
1063 if (*swap == -1) {
1064 log_err("fio: blktrace appears corrupt\n");
1065 return 1;
1066 }
1067
1068 return 0;
1069}
1070
1071int main(int argc, char *argv[])
1072{
1073 int need_swap, i, c;
40bafa33
JA
1074
1075 if (argc < 2)
1076 return usage(argv);
1077
ccfcf5bf 1078 while ((c = getopt(argc, argv, "t:n:fd:r:RD:c:u:x:")) != -1) {
40bafa33 1079 switch (c) {
8fc46b4d
JA
1080 case 'R':
1081 set_rate = 1;
1082 break;
1083 case 'r':
1084 rate_threshold = atoi(optarg);
1085 break;
40bafa33
JA
1086 case 't':
1087 rt_threshold = atoi(optarg);
1088 break;
1089 case 'n':
1090 ios_threshold = atoi(optarg);
1091 break;
1092 case 'f':
1093 output_ascii = 0;
1094 break;
35661615
JA
1095 case 'd':
1096 filename = strdup(optarg);
1097 break;
85fcdac8
JA
1098 case 'D':
1099 max_depth = atoi(optarg);
1100 break;
ccfcf5bf
JA
1101 case 'c':
1102 collapse_entries = atoi(optarg);
1103 break;
1104 case 'u':
1105 depth_diff = atoi(optarg);
1106 break;
1107 case 'x':
1108 random_diff = atoi(optarg);
1109 break;
40bafa33
JA
1110 case '?':
1111 default:
1112 return usage(argv);
1113 }
1114 }
1115
1116 if (argc == optind)
1117 return usage(argv);
1118
17bf0853 1119 if (trace_needs_swap(argv[optind], &need_swap))
40bafa33 1120 return 1;
40bafa33
JA
1121
1122 for (i = 0; i < PID_HASH_SIZE; i++)
1123 INIT_FLIST_HEAD(&pid_hash[i]);
17bf0853
JA
1124 for (i = 0; i < INFLIGHT_HASH_SIZE; i++)
1125 INIT_FLIST_HEAD(&inflight_hash[i]);
40bafa33
JA
1126
1127 load_blktrace(argv[optind], need_swap);
1128 first_ttime /= 1000ULL;
1129
1130 return output_p();
1131}