libzbc: cleanup init code
[fio.git] / blktrace.c
1 /*
2  * blktrace support code for fio
3  */
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <unistd.h>
7
8 #include "flist.h"
9 #include "fio.h"
10 #include "blktrace.h"
11 #include "blktrace_api.h"
12 #include "oslib/linux-dev-lookup.h"
13
14 #define TRACE_FIFO_SIZE 8192
15
16 /*
17  * fifo refill frontend, to avoid reading data in trace sized bites
18  */
19 static int refill_fifo(struct thread_data *td, struct fifo *fifo, int fd)
20 {
21         char buf[TRACE_FIFO_SIZE];
22         unsigned int total;
23         int ret;
24
25         total = sizeof(buf);
26         if (total > fifo_room(fifo))
27                 total = fifo_room(fifo);
28
29         ret = read(fd, buf, total);
30         if (ret < 0) {
31                 int read_err = errno;
32
33                 assert(read_err > 0);
34                 td_verror(td, read_err, "read blktrace file");
35                 return -read_err;
36         }
37
38         if (ret > 0)
39                 ret = fifo_put(fifo, buf, ret);
40
41         dprint(FD_BLKTRACE, "refill: filled %d bytes\n", ret);
42         return ret;
43 }
44
45 /*
46  * Retrieve 'len' bytes from the fifo, refilling if necessary.
47  */
48 static int trace_fifo_get(struct thread_data *td, struct fifo *fifo, int fd,
49                           void *buf, unsigned int len)
50 {
51         if (fifo_len(fifo) < len) {
52                 int ret = refill_fifo(td, fifo, fd);
53
54                 if (ret < 0)
55                         return ret;
56         }
57
58         return fifo_get(fifo, buf, len);
59 }
60
61 /*
62  * Just discard the pdu by seeking past it.
63  */
64 static int discard_pdu(struct thread_data *td, struct fifo *fifo, int fd,
65                        struct blk_io_trace *t)
66 {
67         if (t->pdu_len == 0)
68                 return 0;
69
70         dprint(FD_BLKTRACE, "discard pdu len %u\n", t->pdu_len);
71         return trace_fifo_get(td, fifo, fd, NULL, t->pdu_len);
72 }
73
74 /*
75  * Check if this is a blktrace binary data file. We read a single trace
76  * into memory and check for the magic signature.
77  */
78 bool is_blktrace(const char *filename, int *need_swap)
79 {
80         struct blk_io_trace t;
81         int fd, ret;
82
83         fd = open(filename, O_RDONLY);
84         if (fd < 0)
85                 return false;
86
87         ret = read(fd, &t, sizeof(t));
88         close(fd);
89
90         if (ret < 0) {
91                 perror("read blktrace");
92                 return false;
93         } else if (ret != sizeof(t)) {
94                 log_err("fio: short read on blktrace file\n");
95                 return false;
96         }
97
98         if ((t.magic & 0xffffff00) == BLK_IO_TRACE_MAGIC) {
99                 *need_swap = 0;
100                 return true;
101         }
102
103         /*
104          * Maybe it needs to be endian swapped...
105          */
106         t.magic = fio_swap32(t.magic);
107         if ((t.magic & 0xffffff00) == BLK_IO_TRACE_MAGIC) {
108                 *need_swap = 1;
109                 return true;
110         }
111
112         return false;
113 }
114
115 #define FMINORBITS      20
116 #define FMINORMASK      ((1U << FMINORBITS) - 1)
117 #define FMAJOR(dev)     ((unsigned int) ((dev) >> FMINORBITS))
118 #define FMINOR(dev)     ((unsigned int) ((dev) & FMINORMASK))
119
120 static void trace_add_open_close_event(struct thread_data *td, int fileno, enum file_log_act action)
121 {
122         struct io_piece *ipo;
123
124         ipo = calloc(1, sizeof(*ipo));
125         init_ipo(ipo);
126
127         ipo->ddir = DDIR_INVAL;
128         ipo->fileno = fileno;
129         ipo->file_action = action;
130         flist_add_tail(&ipo->list, &td->io_log_list);
131 }
132
133 static int trace_add_file(struct thread_data *td, __u32 device)
134 {
135         static unsigned int last_maj, last_min, last_fileno;
136         unsigned int maj = FMAJOR(device);
137         unsigned int min = FMINOR(device);
138         struct fio_file *f;
139         char dev[256];
140         unsigned int i;
141
142         if (last_maj == maj && last_min == min)
143                 return last_fileno;
144
145         last_maj = maj;
146         last_min = min;
147
148         /*
149          * check for this file in our list
150          */
151         for_each_file(td, f, i)
152                 if (f->major == maj && f->minor == min) {
153                         last_fileno = f->fileno;
154                         return last_fileno;
155                 }
156
157         strcpy(dev, "/dev");
158         if (blktrace_lookup_device(td->o.replay_redirect, dev, maj, min)) {
159                 int fileno;
160
161                 if (td->o.replay_redirect)
162                         dprint(FD_BLKTRACE, "device lookup: %d/%d\n overridden"
163                                         " with: %s\n", maj, min,
164                                         td->o.replay_redirect);
165                 else
166                         dprint(FD_BLKTRACE, "device lookup: %d/%d\n", maj, min);
167
168                 dprint(FD_BLKTRACE, "add devices %s\n", dev);
169                 fileno = add_file_exclusive(td, dev);
170                 td->o.open_files++;
171                 td->files[fileno]->major = maj;
172                 td->files[fileno]->minor = min;
173                 trace_add_open_close_event(td, fileno, FIO_LOG_OPEN_FILE);
174                 last_fileno = fileno;
175         }
176
177         return last_fileno;
178 }
179
180 static void t_bytes_align(struct thread_options *o, struct blk_io_trace *t)
181 {
182         if (!o->replay_align)
183                 return;
184
185         t->bytes = (t->bytes + o->replay_align - 1) & ~(o->replay_align - 1);
186 }
187
188 /*
189  * Store blk_io_trace data in an ipo for later retrieval.
190  */
191 static void store_ipo(struct thread_data *td, unsigned long long offset,
192                       unsigned int bytes, int rw, unsigned long long ttime,
193                       int fileno)
194 {
195         struct io_piece *ipo;
196
197         ipo = calloc(1, sizeof(*ipo));
198         init_ipo(ipo);
199
200         ipo->offset = offset * 512;
201         if (td->o.replay_scale)
202                 ipo->offset = ipo->offset / td->o.replay_scale;
203         ipo_bytes_align(td->o.replay_align, ipo);
204         ipo->len = bytes;
205         ipo->delay = ttime / 1000;
206         if (rw)
207                 ipo->ddir = DDIR_WRITE;
208         else
209                 ipo->ddir = DDIR_READ;
210         ipo->fileno = fileno;
211
212         dprint(FD_BLKTRACE, "store ddir=%d, off=%llu, len=%lu, delay=%lu\n",
213                                                         ipo->ddir, ipo->offset,
214                                                         ipo->len, ipo->delay);
215         queue_io_piece(td, ipo);
216 }
217
218 static void handle_trace_notify(struct blk_io_trace *t)
219 {
220         switch (t->action) {
221         case BLK_TN_PROCESS:
222                 dprint(FD_BLKTRACE, "got process notify: %x, %d\n",
223                                 t->action, t->pid);
224                 break;
225         case BLK_TN_TIMESTAMP:
226                 dprint(FD_BLKTRACE, "got timestamp notify: %x, %d\n",
227                                 t->action, t->pid);
228                 break;
229         case BLK_TN_MESSAGE:
230                 break;
231         default:
232                 dprint(FD_BLKTRACE, "unknown trace act %x\n", t->action);
233                 break;
234         }
235 }
236
237 static void handle_trace_discard(struct thread_data *td,
238                                  struct blk_io_trace *t,
239                                  unsigned long long ttime,
240                                  unsigned long *ios, unsigned int *bs)
241 {
242         struct io_piece *ipo;
243         int fileno;
244
245         if (td->o.replay_skip & (1u << DDIR_TRIM))
246                 return;
247
248         ipo = calloc(1, sizeof(*ipo));
249         init_ipo(ipo);
250         fileno = trace_add_file(td, t->device);
251
252         ios[DDIR_TRIM]++;
253         if (t->bytes > bs[DDIR_TRIM])
254                 bs[DDIR_TRIM] = t->bytes;
255
256         td->o.size += t->bytes;
257
258         INIT_FLIST_HEAD(&ipo->list);
259
260         ipo->offset = t->sector * 512;
261         if (td->o.replay_scale)
262                 ipo->offset = ipo->offset / td->o.replay_scale;
263         ipo_bytes_align(td->o.replay_align, ipo);
264         ipo->len = t->bytes;
265         ipo->delay = ttime / 1000;
266         ipo->ddir = DDIR_TRIM;
267         ipo->fileno = fileno;
268
269         dprint(FD_BLKTRACE, "store discard, off=%llu, len=%lu, delay=%lu\n",
270                                                         ipo->offset, ipo->len,
271                                                         ipo->delay);
272         queue_io_piece(td, ipo);
273 }
274
275 static void dump_trace(struct blk_io_trace *t)
276 {
277         log_err("blktrace: ignoring zero byte trace: action=%x\n", t->action);
278 }
279
280 static void handle_trace_fs(struct thread_data *td, struct blk_io_trace *t,
281                             unsigned long long ttime, unsigned long *ios,
282                             unsigned int *bs)
283 {
284         int rw;
285         int fileno;
286
287         fileno = trace_add_file(td, t->device);
288
289         rw = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
290
291         if (rw) {
292                 if (td->o.replay_skip & (1u << DDIR_WRITE))
293                         return;
294         } else {
295                 if (td->o.replay_skip & (1u << DDIR_READ))
296                         return;
297         }
298
299         if (!t->bytes) {
300                 if (!fio_did_warn(FIO_WARN_BTRACE_ZERO))
301                         dump_trace(t);
302                 return;
303         }
304
305         if (t->bytes > bs[rw])
306                 bs[rw] = t->bytes;
307
308         ios[rw]++;
309         td->o.size += t->bytes;
310         store_ipo(td, t->sector, t->bytes, rw, ttime, fileno);
311 }
312
313 static void handle_trace_flush(struct thread_data *td, struct blk_io_trace *t,
314                                unsigned long long ttime, unsigned long *ios)
315 {
316         struct io_piece *ipo;
317         int fileno;
318
319         if (td->o.replay_skip & (1u << DDIR_SYNC))
320                 return;
321
322         ipo = calloc(1, sizeof(*ipo));
323         init_ipo(ipo);
324         fileno = trace_add_file(td, t->device);
325
326         ipo->delay = ttime / 1000;
327         ipo->ddir = DDIR_SYNC;
328         ipo->fileno = fileno;
329
330         ios[DDIR_SYNC]++;
331         dprint(FD_BLKTRACE, "store flush delay=%lu\n", ipo->delay);
332         queue_io_piece(td, ipo);
333 }
334
335 /*
336  * We only care for queue traces, most of the others are side effects
337  * due to internal workings of the block layer.
338  */
339 static void handle_trace(struct thread_data *td, struct blk_io_trace *t,
340                          unsigned long *ios, unsigned int *bs)
341 {
342         static unsigned long long last_ttime;
343         unsigned long long delay = 0;
344
345         if ((t->action & 0xffff) != __BLK_TA_QUEUE)
346                 return;
347
348         if (!(t->action & BLK_TC_ACT(BLK_TC_NOTIFY))) {
349                 if (!last_ttime || td->o.no_stall)
350                         delay = 0;
351                 else if (td->o.replay_time_scale == 100)
352                         delay = t->time - last_ttime;
353                 else {
354                         double tmp = t->time - last_ttime;
355                         double scale;
356
357                         scale = (double) 100.0 / (double) td->o.replay_time_scale;
358                         tmp *= scale;
359                         delay = tmp;
360                 }
361                 last_ttime = t->time;
362         }
363
364         t_bytes_align(&td->o, t);
365
366         if (t->action & BLK_TC_ACT(BLK_TC_NOTIFY))
367                 handle_trace_notify(t);
368         else if (t->action & BLK_TC_ACT(BLK_TC_DISCARD))
369                 handle_trace_discard(td, t, delay, ios, bs);
370         else if (t->action & BLK_TC_ACT(BLK_TC_FLUSH))
371                 handle_trace_flush(td, t, delay, ios);
372         else
373                 handle_trace_fs(td, t, delay, ios, bs);
374 }
375
376 static void byteswap_trace(struct blk_io_trace *t)
377 {
378         t->magic = fio_swap32(t->magic);
379         t->sequence = fio_swap32(t->sequence);
380         t->time = fio_swap64(t->time);
381         t->sector = fio_swap64(t->sector);
382         t->bytes = fio_swap32(t->bytes);
383         t->action = fio_swap32(t->action);
384         t->pid = fio_swap32(t->pid);
385         t->device = fio_swap32(t->device);
386         t->cpu = fio_swap32(t->cpu);
387         t->error = fio_swap16(t->error);
388         t->pdu_len = fio_swap16(t->pdu_len);
389 }
390
391 static bool t_is_write(struct blk_io_trace *t)
392 {
393         return (t->action & BLK_TC_ACT(BLK_TC_WRITE | BLK_TC_DISCARD)) != 0;
394 }
395
396 static enum fio_ddir t_get_ddir(struct blk_io_trace *t)
397 {
398         if (t->action & BLK_TC_ACT(BLK_TC_READ))
399                 return DDIR_READ;
400         else if (t->action & BLK_TC_ACT(BLK_TC_WRITE))
401                 return DDIR_WRITE;
402         else if (t->action & BLK_TC_ACT(BLK_TC_DISCARD))
403                 return DDIR_TRIM;
404
405         return DDIR_INVAL;
406 }
407
408 static void depth_inc(struct blk_io_trace *t, int *depth)
409 {
410         enum fio_ddir ddir;
411
412         ddir = t_get_ddir(t);
413         if (ddir != DDIR_INVAL)
414                 depth[ddir]++;
415 }
416
417 static void depth_dec(struct blk_io_trace *t, int *depth)
418 {
419         enum fio_ddir ddir;
420
421         ddir = t_get_ddir(t);
422         if (ddir != DDIR_INVAL)
423                 depth[ddir]--;
424 }
425
426 static void depth_end(struct blk_io_trace *t, int *this_depth, int *depth)
427 {
428         enum fio_ddir ddir = DDIR_INVAL;
429
430         ddir = t_get_ddir(t);
431         if (ddir != DDIR_INVAL) {
432                 depth[ddir] = max(depth[ddir], this_depth[ddir]);
433                 this_depth[ddir] = 0;
434         }
435 }
436
437 /*
438  * Load a blktrace file by reading all the blk_io_trace entries, and storing
439  * them as io_pieces like the fio text version would do.
440  */
441 bool load_blktrace(struct thread_data *td, const char *filename, int need_swap)
442 {
443         struct blk_io_trace t;
444         unsigned long ios[DDIR_RWDIR_SYNC_CNT] = { };
445         unsigned int rw_bs[DDIR_RWDIR_CNT] = { };
446         unsigned long skipped_writes;
447         struct fifo *fifo;
448         int fd, i, old_state, max_depth;
449         struct fio_file *f;
450         int this_depth[DDIR_RWDIR_CNT] = { };
451         int depth[DDIR_RWDIR_CNT] = { };
452
453         fd = open(filename, O_RDONLY);
454         if (fd < 0) {
455                 td_verror(td, errno, "open blktrace file");
456                 return false;
457         }
458
459         fifo = fifo_alloc(TRACE_FIFO_SIZE);
460
461         old_state = td_bump_runstate(td, TD_SETTING_UP);
462
463         td->o.size = 0;
464         skipped_writes = 0;
465         do {
466                 int ret = trace_fifo_get(td, fifo, fd, &t, sizeof(t));
467
468                 if (ret < 0)
469                         goto err;
470                 else if (!ret)
471                         break;
472                 else if (ret < (int) sizeof(t)) {
473                         log_err("fio: short fifo get\n");
474                         break;
475                 }
476
477                 if (need_swap)
478                         byteswap_trace(&t);
479
480                 if ((t.magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
481                         log_err("fio: bad magic in blktrace data: %x\n",
482                                                                 t.magic);
483                         goto err;
484                 }
485                 if ((t.magic & 0xff) != BLK_IO_TRACE_VERSION) {
486                         log_err("fio: bad blktrace version %d\n",
487                                                                 t.magic & 0xff);
488                         goto err;
489                 }
490                 ret = discard_pdu(td, fifo, fd, &t);
491                 if (ret < 0) {
492                         td_verror(td, -ret, "blktrace lseek");
493                         goto err;
494                 } else if (t.pdu_len != ret) {
495                         log_err("fio: discarded %d of %d\n", ret, t.pdu_len);
496                         goto err;
497                 }
498                 if ((t.action & BLK_TC_ACT(BLK_TC_NOTIFY)) == 0) {
499                         if ((t.action & 0xffff) == __BLK_TA_QUEUE)
500                                 depth_inc(&t, this_depth);
501                         else if (((t.action & 0xffff) == __BLK_TA_BACKMERGE) ||
502                                 ((t.action & 0xffff) == __BLK_TA_FRONTMERGE))
503                                 depth_dec(&t, this_depth);
504                         else if ((t.action & 0xffff) == __BLK_TA_COMPLETE)
505                                 depth_end(&t, this_depth, depth);
506
507                         if (t_is_write(&t) && read_only) {
508                                 skipped_writes++;
509                                 continue;
510                         }
511                 }
512
513                 handle_trace(td, &t, ios, rw_bs);
514         } while (1);
515
516         for_each_file(td, f, i)
517                 trace_add_open_close_event(td, f->fileno, FIO_LOG_CLOSE_FILE);
518
519         fifo_free(fifo);
520         close(fd);
521
522         td_restore_runstate(td, old_state);
523
524         if (!td->files_index) {
525                 log_err("fio: did not find replay device(s)\n");
526                 return false;
527         }
528
529         /*
530          * For stacked devices, we don't always get a COMPLETE event so
531          * the depth grows to insane values. Limit it to something sane(r).
532          */
533         max_depth = 0;
534         for (i = 0; i < DDIR_RWDIR_CNT; i++) {
535                 if (depth[i] > 1024)
536                         depth[i] = 1024;
537                 else if (!depth[i] && ios[i])
538                         depth[i] = 1;
539                 max_depth = max(depth[i], max_depth);
540         }
541
542         if (skipped_writes)
543                 log_err("fio: %s skips replay of %lu writes due to read-only\n",
544                                                 td->o.name, skipped_writes);
545
546         if (!ios[DDIR_READ] && !ios[DDIR_WRITE] && !ios[DDIR_TRIM] &&
547             !ios[DDIR_SYNC]) {
548                 log_err("fio: found no ios in blktrace data\n");
549                 return false;
550         }
551
552         td->o.td_ddir = 0;
553         if (ios[DDIR_READ]) {
554                 td->o.td_ddir |= TD_DDIR_READ;
555                 td->o.max_bs[DDIR_READ] = rw_bs[DDIR_READ];
556         }
557         if (ios[DDIR_WRITE]) {
558                 td->o.td_ddir |= TD_DDIR_WRITE;
559                 td->o.max_bs[DDIR_WRITE] = rw_bs[DDIR_WRITE];
560         }
561         if (ios[DDIR_TRIM]) {
562                 td->o.td_ddir |= TD_DDIR_TRIM;
563                 td->o.max_bs[DDIR_TRIM] = rw_bs[DDIR_TRIM];
564         }
565
566         /*
567          * We need to do direct/raw ios to the device, to avoid getting
568          * read-ahead in our way. But only do so if the minimum block size
569          * is a multiple of 4k, otherwise we don't know if it's safe to do so.
570          */
571         if (!fio_option_is_set(&td->o, odirect) && !(td_min_bs(td) & 4095))
572                 td->o.odirect = 1;
573
574         /*
575          * If depth wasn't manually set, use probed depth
576          */
577         if (!fio_option_is_set(&td->o, iodepth))
578                 td->o.iodepth = td->o.iodepth_low = max_depth;
579
580         return true;
581 err:
582         close(fd);
583         fifo_free(fifo);
584         return false;
585 }
586
587 static int init_merge_param_list(fio_fp64_t *vals, struct blktrace_cursor *bcs,
588                                  int nr_logs, int def, size_t off)
589 {
590         int i = 0, len = 0;
591
592         while (len < FIO_IO_U_LIST_MAX_LEN && vals[len].u.f != 0.0)
593                 len++;
594
595         if (len && len != nr_logs)
596                 return len;
597
598         for (i = 0; i < nr_logs; i++) {
599                 int *val = (int *)((char *)&bcs[i] + off);
600                 *val = def;
601                 if (len)
602                         *val = (int)vals[i].u.f;
603         }
604
605         return 0;
606
607 }
608
609 static int find_earliest_io(struct blktrace_cursor *bcs, int nr_logs)
610 {
611         __u64 time = ~(__u64)0;
612         int idx = 0, i;
613
614         for (i = 0; i < nr_logs; i++) {
615                 if (bcs[i].t.time < time) {
616                         time = bcs[i].t.time;
617                         idx = i;
618                 }
619         }
620
621         return idx;
622 }
623
624 static void merge_finish_file(struct blktrace_cursor *bcs, int i, int *nr_logs)
625 {
626         bcs[i].iter++;
627         if (bcs[i].iter < bcs[i].nr_iter) {
628                 lseek(bcs[i].fd, 0, SEEK_SET);
629                 return;
630         }
631
632         *nr_logs -= 1;
633
634         /* close file */
635         fifo_free(bcs[i].fifo);
636         close(bcs[i].fd);
637
638         /* keep active files contiguous */
639         memmove(&bcs[i], &bcs[*nr_logs], sizeof(bcs[i]));
640 }
641
642 static int read_trace(struct thread_data *td, struct blktrace_cursor *bc)
643 {
644         int ret = 0;
645         struct blk_io_trace *t = &bc->t;
646
647 read_skip:
648         /* read an io trace */
649         ret = trace_fifo_get(td, bc->fifo, bc->fd, t, sizeof(*t));
650         if (ret < 0) {
651                 return ret;
652         } else if (!ret) {
653                 if (!bc->length)
654                         bc->length = bc->t.time;
655                 return ret;
656         } else if (ret < (int) sizeof(*t)) {
657                 log_err("fio: short fifo get\n");
658                 return -1;
659         }
660
661         if (bc->swap)
662                 byteswap_trace(t);
663
664         /* skip over actions that fio does not care about */
665         if ((t->action & 0xffff) != __BLK_TA_QUEUE ||
666             t_get_ddir(t) == DDIR_INVAL) {
667                 ret = discard_pdu(td, bc->fifo, bc->fd, t);
668                 if (ret < 0) {
669                         td_verror(td, -ret, "blktrace lseek");
670                         return ret;
671                 } else if (t->pdu_len != ret) {
672                         log_err("fio: discarded %d of %d\n", ret,
673                                 t->pdu_len);
674                         return -1;
675                 }
676                 goto read_skip;
677         }
678
679         t->time = (t->time + bc->iter * bc->length) * bc->scalar / 100;
680
681         return ret;
682 }
683
684 static int write_trace(FILE *fp, struct blk_io_trace *t)
685 {
686         /* pdu is not used so just write out only the io trace */
687         t->pdu_len = 0;
688         return fwrite((void *)t, sizeof(*t), 1, fp);
689 }
690
691 int merge_blktrace_iologs(struct thread_data *td)
692 {
693         int nr_logs = get_max_str_idx(td->o.read_iolog_file);
694         struct blktrace_cursor *bcs = malloc(sizeof(struct blktrace_cursor) *
695                                              nr_logs);
696         struct blktrace_cursor *bc;
697         FILE *merge_fp;
698         char *str, *ptr, *name, *merge_buf;
699         int i, ret;
700
701         ret = init_merge_param_list(td->o.merge_blktrace_scalars, bcs, nr_logs,
702                                     100, offsetof(struct blktrace_cursor,
703                                                   scalar));
704         if (ret) {
705                 log_err("fio: merge_blktrace_scalars(%d) != nr_logs(%d)\n",
706                         ret, nr_logs);
707                 goto err_param;
708         }
709
710         ret = init_merge_param_list(td->o.merge_blktrace_iters, bcs, nr_logs,
711                                     1, offsetof(struct blktrace_cursor,
712                                                 nr_iter));
713         if (ret) {
714                 log_err("fio: merge_blktrace_iters(%d) != nr_logs(%d)\n",
715                         ret, nr_logs);
716                 goto err_param;
717         }
718
719         /* setup output file */
720         merge_fp = fopen(td->o.merge_blktrace_file, "w");
721         merge_buf = malloc(128 * 1024);
722         if (!merge_buf)
723                 goto err_out_file;
724         ret = setvbuf(merge_fp, merge_buf, _IOFBF, 128 * 1024);
725         if (ret)
726                 goto err_merge_buf;
727
728         /* setup input files */
729         str = ptr = strdup(td->o.read_iolog_file);
730         nr_logs = 0;
731         for (i = 0; (name = get_next_str(&ptr)) != NULL; i++) {
732                 bcs[i].fd = open(name, O_RDONLY);
733                 if (bcs[i].fd < 0) {
734                         log_err("fio: could not open file: %s\n", name);
735                         ret = bcs[i].fd;
736                         free(str);
737                         goto err_file;
738                 }
739                 bcs[i].fifo = fifo_alloc(TRACE_FIFO_SIZE);
740                 nr_logs++;
741
742                 if (!is_blktrace(name, &bcs[i].swap)) {
743                         log_err("fio: file is not a blktrace: %s\n", name);
744                         free(str);
745                         goto err_file;
746                 }
747
748                 ret = read_trace(td, &bcs[i]);
749                 if (ret < 0) {
750                         free(str);
751                         goto err_file;
752                 } else if (!ret) {
753                         merge_finish_file(bcs, i, &nr_logs);
754                         i--;
755                 }
756         }
757         free(str);
758
759         /* merge files */
760         while (nr_logs) {
761                 i = find_earliest_io(bcs, nr_logs);
762                 bc = &bcs[i];
763                 /* skip over the pdu */
764                 ret = discard_pdu(td, bc->fifo, bc->fd, &bc->t);
765                 if (ret < 0) {
766                         td_verror(td, -ret, "blktrace lseek");
767                         goto err_file;
768                 } else if (bc->t.pdu_len != ret) {
769                         log_err("fio: discarded %d of %d\n", ret,
770                                 bc->t.pdu_len);
771                         goto err_file;
772                 }
773
774                 ret = write_trace(merge_fp, &bc->t);
775                 ret = read_trace(td, bc);
776                 if (ret < 0)
777                         goto err_file;
778                 else if (!ret)
779                         merge_finish_file(bcs, i, &nr_logs);
780         }
781
782         /* set iolog file to read from the newly merged file */
783         td->o.read_iolog_file = td->o.merge_blktrace_file;
784         ret = 0;
785
786 err_file:
787         /* cleanup */
788         for (i = 0; i < nr_logs; i++) {
789                 fifo_free(bcs[i].fifo);
790                 close(bcs[i].fd);
791         }
792 err_merge_buf:
793         free(merge_buf);
794 err_out_file:
795         fflush(merge_fp);
796         fclose(merge_fp);
797 err_param:
798         free(bcs);
799
800         return ret;
801 }