fio: ioengine flag cleanup
[fio.git] / blktrace.c
1 /*
2  * blktrace support code for fio
3  */
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <unistd.h>
7
8 #include "flist.h"
9 #include "fio.h"
10 #include "blktrace.h"
11 #include "blktrace_api.h"
12 #include "oslib/linux-dev-lookup.h"
13
14 #define TRACE_FIFO_SIZE 8192
15
16 /*
17  * fifo refill frontend, to avoid reading data in trace sized bites
18  */
19 static int refill_fifo(struct thread_data *td, struct fifo *fifo, int fd)
20 {
21         char buf[TRACE_FIFO_SIZE];
22         unsigned int total;
23         int ret;
24
25         total = sizeof(buf);
26         if (total > fifo_room(fifo))
27                 total = fifo_room(fifo);
28
29         ret = read(fd, buf, total);
30         if (ret < 0) {
31                 td_verror(td, errno, "read blktrace file");
32                 return -1;
33         }
34
35         if (ret > 0)
36                 ret = fifo_put(fifo, buf, ret);
37
38         dprint(FD_BLKTRACE, "refill: filled %d bytes\n", ret);
39         return ret;
40 }
41
42 /*
43  * Retrieve 'len' bytes from the fifo, refilling if necessary.
44  */
45 static int trace_fifo_get(struct thread_data *td, struct fifo *fifo, int fd,
46                           void *buf, unsigned int len)
47 {
48         if (fifo_len(fifo) < len) {
49                 int ret = refill_fifo(td, fifo, fd);
50
51                 if (ret < 0)
52                         return ret;
53         }
54
55         return fifo_get(fifo, buf, len);
56 }
57
58 /*
59  * Just discard the pdu by seeking past it.
60  */
61 static int discard_pdu(struct thread_data *td, struct fifo *fifo, int fd,
62                        struct blk_io_trace *t)
63 {
64         if (t->pdu_len == 0)
65                 return 0;
66
67         dprint(FD_BLKTRACE, "discard pdu len %u\n", t->pdu_len);
68         return trace_fifo_get(td, fifo, fd, NULL, t->pdu_len);
69 }
70
71 /*
72  * Check if this is a blktrace binary data file. We read a single trace
73  * into memory and check for the magic signature.
74  */
75 bool is_blktrace(const char *filename, int *need_swap)
76 {
77         struct blk_io_trace t;
78         int fd, ret;
79
80         fd = open(filename, O_RDONLY);
81         if (fd < 0)
82                 return false;
83
84         ret = read(fd, &t, sizeof(t));
85         close(fd);
86
87         if (ret < 0) {
88                 perror("read blktrace");
89                 return false;
90         } else if (ret != sizeof(t)) {
91                 log_err("fio: short read on blktrace file\n");
92                 return false;
93         }
94
95         if ((t.magic & 0xffffff00) == BLK_IO_TRACE_MAGIC) {
96                 *need_swap = 0;
97                 return true;
98         }
99
100         /*
101          * Maybe it needs to be endian swapped...
102          */
103         t.magic = fio_swap32(t.magic);
104         if ((t.magic & 0xffffff00) == BLK_IO_TRACE_MAGIC) {
105                 *need_swap = 1;
106                 return true;
107         }
108
109         return false;
110 }
111
112 #define FMINORBITS      20
113 #define FMINORMASK      ((1U << FMINORBITS) - 1)
114 #define FMAJOR(dev)     ((unsigned int) ((dev) >> FMINORBITS))
115 #define FMINOR(dev)     ((unsigned int) ((dev) & FMINORMASK))
116
117 static void trace_add_open_close_event(struct thread_data *td, int fileno, enum file_log_act action)
118 {
119         struct io_piece *ipo;
120
121         ipo = calloc(1, sizeof(*ipo));
122         init_ipo(ipo);
123
124         ipo->ddir = DDIR_INVAL;
125         ipo->fileno = fileno;
126         ipo->file_action = action;
127         flist_add_tail(&ipo->list, &td->io_log_list);
128 }
129
130 static int trace_add_file(struct thread_data *td, __u32 device)
131 {
132         static unsigned int last_maj, last_min, last_fileno;
133         unsigned int maj = FMAJOR(device);
134         unsigned int min = FMINOR(device);
135         struct fio_file *f;
136         char dev[256];
137         unsigned int i;
138
139         if (last_maj == maj && last_min == min)
140                 return last_fileno;
141
142         last_maj = maj;
143         last_min = min;
144
145         /*
146          * check for this file in our list
147          */
148         for_each_file(td, f, i)
149                 if (f->major == maj && f->minor == min) {
150                         last_fileno = f->fileno;
151                         return last_fileno;
152                 }
153
154         strcpy(dev, "/dev");
155         if (blktrace_lookup_device(td->o.replay_redirect, dev, maj, min)) {
156                 int fileno;
157
158                 if (td->o.replay_redirect)
159                         dprint(FD_BLKTRACE, "device lookup: %d/%d\n overridden"
160                                         " with: %s\n", maj, min,
161                                         td->o.replay_redirect);
162                 else
163                         dprint(FD_BLKTRACE, "device lookup: %d/%d\n", maj, min);
164
165                 dprint(FD_BLKTRACE, "add devices %s\n", dev);
166                 fileno = add_file_exclusive(td, dev);
167                 td->o.open_files++;
168                 td->files[fileno]->major = maj;
169                 td->files[fileno]->minor = min;
170                 trace_add_open_close_event(td, fileno, FIO_LOG_OPEN_FILE);
171                 last_fileno = fileno;
172         }
173
174         return last_fileno;
175 }
176
177 static void t_bytes_align(struct thread_options *o, struct blk_io_trace *t)
178 {
179         if (!o->replay_align)
180                 return;
181
182         t->bytes = (t->bytes + o->replay_align - 1) & ~(o->replay_align - 1);
183 }
184
185 /*
186  * Store blk_io_trace data in an ipo for later retrieval.
187  */
188 static void store_ipo(struct thread_data *td, unsigned long long offset,
189                       unsigned int bytes, int rw, unsigned long long ttime,
190                       int fileno)
191 {
192         struct io_piece *ipo;
193
194         ipo = calloc(1, sizeof(*ipo));
195         init_ipo(ipo);
196
197         ipo->offset = offset * 512;
198         if (td->o.replay_scale)
199                 ipo->offset = ipo->offset / td->o.replay_scale;
200         ipo_bytes_align(td->o.replay_align, ipo);
201         ipo->len = bytes;
202         ipo->delay = ttime / 1000;
203         if (rw)
204                 ipo->ddir = DDIR_WRITE;
205         else
206                 ipo->ddir = DDIR_READ;
207         ipo->fileno = fileno;
208
209         dprint(FD_BLKTRACE, "store ddir=%d, off=%llu, len=%lu, delay=%lu\n",
210                                                         ipo->ddir, ipo->offset,
211                                                         ipo->len, ipo->delay);
212         queue_io_piece(td, ipo);
213 }
214
215 static void handle_trace_notify(struct blk_io_trace *t)
216 {
217         switch (t->action) {
218         case BLK_TN_PROCESS:
219                 dprint(FD_BLKTRACE, "got process notify: %x, %d\n",
220                                 t->action, t->pid);
221                 break;
222         case BLK_TN_TIMESTAMP:
223                 dprint(FD_BLKTRACE, "got timestamp notify: %x, %d\n",
224                                 t->action, t->pid);
225                 break;
226         case BLK_TN_MESSAGE:
227                 break;
228         default:
229                 dprint(FD_BLKTRACE, "unknown trace act %x\n", t->action);
230                 break;
231         }
232 }
233
234 static void handle_trace_discard(struct thread_data *td,
235                                  struct blk_io_trace *t,
236                                  unsigned long long ttime,
237                                  unsigned long *ios, unsigned int *bs)
238 {
239         struct io_piece *ipo;
240         int fileno;
241
242         if (td->o.replay_skip & (1u << DDIR_TRIM))
243                 return;
244
245         ipo = calloc(1, sizeof(*ipo));
246         init_ipo(ipo);
247         fileno = trace_add_file(td, t->device);
248
249         ios[DDIR_TRIM]++;
250         if (t->bytes > bs[DDIR_TRIM])
251                 bs[DDIR_TRIM] = t->bytes;
252
253         td->o.size += t->bytes;
254
255         INIT_FLIST_HEAD(&ipo->list);
256
257         ipo->offset = t->sector * 512;
258         if (td->o.replay_scale)
259                 ipo->offset = ipo->offset / td->o.replay_scale;
260         ipo_bytes_align(td->o.replay_align, ipo);
261         ipo->len = t->bytes;
262         ipo->delay = ttime / 1000;
263         ipo->ddir = DDIR_TRIM;
264         ipo->fileno = fileno;
265
266         dprint(FD_BLKTRACE, "store discard, off=%llu, len=%lu, delay=%lu\n",
267                                                         ipo->offset, ipo->len,
268                                                         ipo->delay);
269         queue_io_piece(td, ipo);
270 }
271
272 static void dump_trace(struct blk_io_trace *t)
273 {
274         log_err("blktrace: ignoring zero byte trace: action=%x\n", t->action);
275 }
276
277 static void handle_trace_fs(struct thread_data *td, struct blk_io_trace *t,
278                             unsigned long long ttime, unsigned long *ios,
279                             unsigned int *bs)
280 {
281         int rw;
282         int fileno;
283
284         fileno = trace_add_file(td, t->device);
285
286         rw = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
287
288         if (rw) {
289                 if (td->o.replay_skip & (1u << DDIR_WRITE))
290                         return;
291         } else {
292                 if (td->o.replay_skip & (1u << DDIR_READ))
293                         return;
294         }
295
296         if (!t->bytes) {
297                 if (!fio_did_warn(FIO_WARN_BTRACE_ZERO))
298                         dump_trace(t);
299                 return;
300         }
301
302         if (t->bytes > bs[rw])
303                 bs[rw] = t->bytes;
304
305         ios[rw]++;
306         td->o.size += t->bytes;
307         store_ipo(td, t->sector, t->bytes, rw, ttime, fileno);
308 }
309
310 static void handle_trace_flush(struct thread_data *td, struct blk_io_trace *t,
311                                unsigned long long ttime, unsigned long *ios)
312 {
313         struct io_piece *ipo;
314         int fileno;
315
316         if (td->o.replay_skip & (1u << DDIR_SYNC))
317                 return;
318
319         ipo = calloc(1, sizeof(*ipo));
320         init_ipo(ipo);
321         fileno = trace_add_file(td, t->device);
322
323         ipo->delay = ttime / 1000;
324         ipo->ddir = DDIR_SYNC;
325         ipo->fileno = fileno;
326
327         ios[DDIR_SYNC]++;
328         dprint(FD_BLKTRACE, "store flush delay=%lu\n", ipo->delay);
329         queue_io_piece(td, ipo);
330 }
331
332 /*
333  * We only care for queue traces, most of the others are side effects
334  * due to internal workings of the block layer.
335  */
336 static void handle_trace(struct thread_data *td, struct blk_io_trace *t,
337                          unsigned long *ios, unsigned int *bs)
338 {
339         static unsigned long long last_ttime;
340         unsigned long long delay = 0;
341
342         if ((t->action & 0xffff) != __BLK_TA_QUEUE)
343                 return;
344
345         if (!(t->action & BLK_TC_ACT(BLK_TC_NOTIFY))) {
346                 if (!last_ttime || td->o.no_stall)
347                         delay = 0;
348                 else if (td->o.replay_time_scale == 100)
349                         delay = t->time - last_ttime;
350                 else {
351                         double tmp = t->time - last_ttime;
352                         double scale;
353
354                         scale = (double) 100.0 / (double) td->o.replay_time_scale;
355                         tmp *= scale;
356                         delay = tmp;
357                 }
358                 last_ttime = t->time;
359         }
360
361         t_bytes_align(&td->o, t);
362
363         if (t->action & BLK_TC_ACT(BLK_TC_NOTIFY))
364                 handle_trace_notify(t);
365         else if (t->action & BLK_TC_ACT(BLK_TC_DISCARD))
366                 handle_trace_discard(td, t, delay, ios, bs);
367         else if (t->action & BLK_TC_ACT(BLK_TC_FLUSH))
368                 handle_trace_flush(td, t, delay, ios);
369         else
370                 handle_trace_fs(td, t, delay, ios, bs);
371 }
372
373 static void byteswap_trace(struct blk_io_trace *t)
374 {
375         t->magic = fio_swap32(t->magic);
376         t->sequence = fio_swap32(t->sequence);
377         t->time = fio_swap64(t->time);
378         t->sector = fio_swap64(t->sector);
379         t->bytes = fio_swap32(t->bytes);
380         t->action = fio_swap32(t->action);
381         t->pid = fio_swap32(t->pid);
382         t->device = fio_swap32(t->device);
383         t->cpu = fio_swap32(t->cpu);
384         t->error = fio_swap16(t->error);
385         t->pdu_len = fio_swap16(t->pdu_len);
386 }
387
388 static bool t_is_write(struct blk_io_trace *t)
389 {
390         return (t->action & BLK_TC_ACT(BLK_TC_WRITE | BLK_TC_DISCARD)) != 0;
391 }
392
393 static enum fio_ddir t_get_ddir(struct blk_io_trace *t)
394 {
395         if (t->action & BLK_TC_ACT(BLK_TC_READ))
396                 return DDIR_READ;
397         else if (t->action & BLK_TC_ACT(BLK_TC_WRITE))
398                 return DDIR_WRITE;
399         else if (t->action & BLK_TC_ACT(BLK_TC_DISCARD))
400                 return DDIR_TRIM;
401
402         return DDIR_INVAL;
403 }
404
405 static void depth_inc(struct blk_io_trace *t, int *depth)
406 {
407         enum fio_ddir ddir;
408
409         ddir = t_get_ddir(t);
410         if (ddir != DDIR_INVAL)
411                 depth[ddir]++;
412 }
413
414 static void depth_dec(struct blk_io_trace *t, int *depth)
415 {
416         enum fio_ddir ddir;
417
418         ddir = t_get_ddir(t);
419         if (ddir != DDIR_INVAL)
420                 depth[ddir]--;
421 }
422
423 static void depth_end(struct blk_io_trace *t, int *this_depth, int *depth)
424 {
425         enum fio_ddir ddir = DDIR_INVAL;
426
427         ddir = t_get_ddir(t);
428         if (ddir != DDIR_INVAL) {
429                 depth[ddir] = max(depth[ddir], this_depth[ddir]);
430                 this_depth[ddir] = 0;
431         }
432 }
433
434 /*
435  * Load a blktrace file by reading all the blk_io_trace entries, and storing
436  * them as io_pieces like the fio text version would do.
437  */
438 bool load_blktrace(struct thread_data *td, const char *filename, int need_swap)
439 {
440         struct blk_io_trace t;
441         unsigned long ios[DDIR_RWDIR_SYNC_CNT] = { };
442         unsigned int rw_bs[DDIR_RWDIR_CNT] = { };
443         unsigned long skipped_writes;
444         struct fifo *fifo;
445         int fd, i, old_state, max_depth;
446         struct fio_file *f;
447         int this_depth[DDIR_RWDIR_CNT] = { };
448         int depth[DDIR_RWDIR_CNT] = { };
449
450         fd = open(filename, O_RDONLY);
451         if (fd < 0) {
452                 td_verror(td, errno, "open blktrace file");
453                 return false;
454         }
455
456         fifo = fifo_alloc(TRACE_FIFO_SIZE);
457
458         old_state = td_bump_runstate(td, TD_SETTING_UP);
459
460         td->o.size = 0;
461         skipped_writes = 0;
462         do {
463                 int ret = trace_fifo_get(td, fifo, fd, &t, sizeof(t));
464
465                 if (ret < 0)
466                         goto err;
467                 else if (!ret)
468                         break;
469                 else if (ret < (int) sizeof(t)) {
470                         log_err("fio: short fifo get\n");
471                         break;
472                 }
473
474                 if (need_swap)
475                         byteswap_trace(&t);
476
477                 if ((t.magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
478                         log_err("fio: bad magic in blktrace data: %x\n",
479                                                                 t.magic);
480                         goto err;
481                 }
482                 if ((t.magic & 0xff) != BLK_IO_TRACE_VERSION) {
483                         log_err("fio: bad blktrace version %d\n",
484                                                                 t.magic & 0xff);
485                         goto err;
486                 }
487                 ret = discard_pdu(td, fifo, fd, &t);
488                 if (ret < 0) {
489                         td_verror(td, ret, "blktrace lseek");
490                         goto err;
491                 } else if (t.pdu_len != ret) {
492                         log_err("fio: discarded %d of %d\n", ret, t.pdu_len);
493                         goto err;
494                 }
495                 if ((t.action & BLK_TC_ACT(BLK_TC_NOTIFY)) == 0) {
496                         if ((t.action & 0xffff) == __BLK_TA_QUEUE)
497                                 depth_inc(&t, this_depth);
498                         else if (((t.action & 0xffff) == __BLK_TA_BACKMERGE) ||
499                                 ((t.action & 0xffff) == __BLK_TA_FRONTMERGE))
500                                 depth_dec(&t, this_depth);
501                         else if ((t.action & 0xffff) == __BLK_TA_COMPLETE)
502                                 depth_end(&t, this_depth, depth);
503
504                         if (t_is_write(&t) && read_only) {
505                                 skipped_writes++;
506                                 continue;
507                         }
508                 }
509
510                 handle_trace(td, &t, ios, rw_bs);
511         } while (1);
512
513         for_each_file(td, f, i)
514                 trace_add_open_close_event(td, f->fileno, FIO_LOG_CLOSE_FILE);
515
516         fifo_free(fifo);
517         close(fd);
518
519         td_restore_runstate(td, old_state);
520
521         if (!td->files_index) {
522                 log_err("fio: did not find replay device(s)\n");
523                 return false;
524         }
525
526         /*
527          * For stacked devices, we don't always get a COMPLETE event so
528          * the depth grows to insane values. Limit it to something sane(r).
529          */
530         max_depth = 0;
531         for (i = 0; i < DDIR_RWDIR_CNT; i++) {
532                 if (depth[i] > 1024)
533                         depth[i] = 1024;
534                 else if (!depth[i] && ios[i])
535                         depth[i] = 1;
536                 max_depth = max(depth[i], max_depth);
537         }
538
539         if (skipped_writes)
540                 log_err("fio: %s skips replay of %lu writes due to read-only\n",
541                                                 td->o.name, skipped_writes);
542
543         if (!ios[DDIR_READ] && !ios[DDIR_WRITE] && !ios[DDIR_TRIM] &&
544             !ios[DDIR_SYNC]) {
545                 log_err("fio: found no ios in blktrace data\n");
546                 return false;
547         }
548
549         td->o.td_ddir = 0;
550         if (ios[DDIR_READ]) {
551                 td->o.td_ddir |= TD_DDIR_READ;
552                 td->o.max_bs[DDIR_READ] = rw_bs[DDIR_READ];
553         }
554         if (ios[DDIR_WRITE]) {
555                 td->o.td_ddir |= TD_DDIR_WRITE;
556                 td->o.max_bs[DDIR_WRITE] = rw_bs[DDIR_WRITE];
557         }
558         if (ios[DDIR_TRIM]) {
559                 td->o.td_ddir |= TD_DDIR_TRIM;
560                 td->o.max_bs[DDIR_TRIM] = rw_bs[DDIR_TRIM];
561         }
562
563         /*
564          * We need to do direct/raw ios to the device, to avoid getting
565          * read-ahead in our way. But only do so if the minimum block size
566          * is a multiple of 4k, otherwise we don't know if it's safe to do so.
567          */
568         if (!fio_option_is_set(&td->o, odirect) && !(td_min_bs(td) & 4095))
569                 td->o.odirect = 1;
570
571         /*
572          * If depth wasn't manually set, use probed depth
573          */
574         if (!fio_option_is_set(&td->o, iodepth))
575                 td->o.iodepth = td->o.iodepth_low = max_depth;
576
577         return true;
578 err:
579         close(fd);
580         fifo_free(fifo);
581         return false;
582 }
583
584 static int init_merge_param_list(fio_fp64_t *vals, struct blktrace_cursor *bcs,
585                                  int nr_logs, int def, size_t off)
586 {
587         int i = 0, len = 0;
588
589         while (len < FIO_IO_U_LIST_MAX_LEN && vals[len].u.f != 0.0)
590                 len++;
591
592         if (len && len != nr_logs)
593                 return len;
594
595         for (i = 0; i < nr_logs; i++) {
596                 int *val = (int *)((char *)&bcs[i] + off);
597                 *val = def;
598                 if (len)
599                         *val = (int)vals[i].u.f;
600         }
601
602         return 0;
603
604 }
605
606 static int find_earliest_io(struct blktrace_cursor *bcs, int nr_logs)
607 {
608         __u64 time = ~(__u64)0;
609         int idx = 0, i;
610
611         for (i = 0; i < nr_logs; i++) {
612                 if (bcs[i].t.time < time) {
613                         time = bcs[i].t.time;
614                         idx = i;
615                 }
616         }
617
618         return idx;
619 }
620
621 static void merge_finish_file(struct blktrace_cursor *bcs, int i, int *nr_logs)
622 {
623         bcs[i].iter++;
624         if (bcs[i].iter < bcs[i].nr_iter) {
625                 lseek(bcs[i].fd, 0, SEEK_SET);
626                 return;
627         }
628
629         *nr_logs -= 1;
630
631         /* close file */
632         fifo_free(bcs[i].fifo);
633         close(bcs[i].fd);
634
635         /* keep active files contiguous */
636         memmove(&bcs[i], &bcs[*nr_logs], sizeof(bcs[i]));
637 }
638
639 static int read_trace(struct thread_data *td, struct blktrace_cursor *bc)
640 {
641         int ret = 0;
642         struct blk_io_trace *t = &bc->t;
643
644 read_skip:
645         /* read an io trace */
646         ret = trace_fifo_get(td, bc->fifo, bc->fd, t, sizeof(*t));
647         if (ret < 0) {
648                 return ret;
649         } else if (!ret) {
650                 if (!bc->length)
651                         bc->length = bc->t.time;
652                 return ret;
653         } else if (ret < (int) sizeof(*t)) {
654                 log_err("fio: short fifo get\n");
655                 return -1;
656         }
657
658         if (bc->swap)
659                 byteswap_trace(t);
660
661         /* skip over actions that fio does not care about */
662         if ((t->action & 0xffff) != __BLK_TA_QUEUE ||
663             t_get_ddir(t) == DDIR_INVAL) {
664                 ret = discard_pdu(td, bc->fifo, bc->fd, t);
665                 if (ret < 0) {
666                         td_verror(td, ret, "blktrace lseek");
667                         return ret;
668                 } else if (t->pdu_len != ret) {
669                         log_err("fio: discarded %d of %d\n", ret,
670                                 t->pdu_len);
671                         return -1;
672                 }
673                 goto read_skip;
674         }
675
676         t->time = (t->time + bc->iter * bc->length) * bc->scalar / 100;
677
678         return ret;
679 }
680
681 static int write_trace(FILE *fp, struct blk_io_trace *t)
682 {
683         /* pdu is not used so just write out only the io trace */
684         t->pdu_len = 0;
685         return fwrite((void *)t, sizeof(*t), 1, fp);
686 }
687
688 int merge_blktrace_iologs(struct thread_data *td)
689 {
690         int nr_logs = get_max_str_idx(td->o.read_iolog_file);
691         struct blktrace_cursor *bcs = malloc(sizeof(struct blktrace_cursor) *
692                                              nr_logs);
693         struct blktrace_cursor *bc;
694         FILE *merge_fp;
695         char *str, *ptr, *name, *merge_buf;
696         int i, ret;
697
698         ret = init_merge_param_list(td->o.merge_blktrace_scalars, bcs, nr_logs,
699                                     100, offsetof(struct blktrace_cursor,
700                                                   scalar));
701         if (ret) {
702                 log_err("fio: merge_blktrace_scalars(%d) != nr_logs(%d)\n",
703                         ret, nr_logs);
704                 goto err_param;
705         }
706
707         ret = init_merge_param_list(td->o.merge_blktrace_iters, bcs, nr_logs,
708                                     1, offsetof(struct blktrace_cursor,
709                                                 nr_iter));
710         if (ret) {
711                 log_err("fio: merge_blktrace_iters(%d) != nr_logs(%d)\n",
712                         ret, nr_logs);
713                 goto err_param;
714         }
715
716         /* setup output file */
717         merge_fp = fopen(td->o.merge_blktrace_file, "w");
718         merge_buf = malloc(128 * 1024);
719         ret = setvbuf(merge_fp, merge_buf, _IOFBF, 128 * 1024);
720         if (ret)
721                 goto err_out_file;
722
723         /* setup input files */
724         str = ptr = strdup(td->o.read_iolog_file);
725         nr_logs = 0;
726         for (i = 0; (name = get_next_str(&ptr)) != NULL; i++) {
727                 bcs[i].fd = open(name, O_RDONLY);
728                 if (bcs[i].fd < 0) {
729                         log_err("fio: could not open file: %s\n", name);
730                         ret = bcs[i].fd;
731                         goto err_file;
732                 }
733                 bcs[i].fifo = fifo_alloc(TRACE_FIFO_SIZE);
734                 nr_logs++;
735
736                 if (!is_blktrace(name, &bcs[i].swap)) {
737                         log_err("fio: file is not a blktrace: %s\n", name);
738                         goto err_file;
739                 }
740
741                 ret = read_trace(td, &bcs[i]);
742                 if (ret < 0) {
743                         goto err_file;
744                 } else if (!ret) {
745                         merge_finish_file(bcs, i, &nr_logs);
746                         i--;
747                 }
748         }
749         free(str);
750
751         /* merge files */
752         while (nr_logs) {
753                 i = find_earliest_io(bcs, nr_logs);
754                 bc = &bcs[i];
755                 /* skip over the pdu */
756                 ret = discard_pdu(td, bc->fifo, bc->fd, &bc->t);
757                 if (ret < 0) {
758                         td_verror(td, ret, "blktrace lseek");
759                         goto err_file;
760                 } else if (bc->t.pdu_len != ret) {
761                         log_err("fio: discarded %d of %d\n", ret,
762                                 bc->t.pdu_len);
763                         goto err_file;
764                 }
765
766                 ret = write_trace(merge_fp, &bc->t);
767                 ret = read_trace(td, bc);
768                 if (ret < 0)
769                         goto err_file;
770                 else if (!ret)
771                         merge_finish_file(bcs, i, &nr_logs);
772         }
773
774         /* set iolog file to read from the newly merged file */
775         td->o.read_iolog_file = td->o.merge_blktrace_file;
776         ret = 0;
777
778 err_file:
779         /* cleanup */
780         for (i = 0; i < nr_logs; i++) {
781                 fifo_free(bcs[i].fifo);
782                 close(bcs[i].fd);
783         }
784 err_out_file:
785         fflush(merge_fp);
786         fclose(merge_fp);
787         free(merge_buf);
788 err_param:
789         free(bcs);
790
791         return ret;
792 }