blktrace.c: Add support for read_iolog_chunked
[fio.git] / blktrace.c
CommitLineData
fb7b71a3
JA
1/*
2 * blktrace support code for fio
3 */
4#include <stdio.h>
5#include <stdlib.h>
87a48ada 6#include <unistd.h>
5ab088aa 7#include <errno.h>
8c1fdf04 8
01743ee1 9#include "flist.h"
fb7b71a3 10#include "fio.h"
10f74940 11#include "iolog.h"
a3e59412 12#include "blktrace.h"
fb7b71a3 13#include "blktrace_api.h"
984f30c9 14#include "oslib/linux-dev-lookup.h"
fb7b71a3 15
8c1fdf04
JA
16/*
17 * Just discard the pdu by seeking past it.
18 */
5ab088aa 19static int discard_pdu(FILE* f, struct blk_io_trace *t)
fb7b71a3
JA
20{
21 if (t->pdu_len == 0)
22 return 0;
23
bd6f78b2 24 dprint(FD_BLKTRACE, "discard pdu len %u\n", t->pdu_len);
5ab088aa
LS
25 if (fseek(f, t->pdu_len, SEEK_CUR) < 0)
26 return -errno;
27
28 return t->pdu_len;
fb7b71a3
JA
29}
30
8c1fdf04
JA
31/*
32 * Check if this is a blktrace binary data file. We read a single trace
33 * into memory and check for the magic signature.
34 */
b153f94a 35bool is_blktrace(const char *filename, int *need_swap)
fb7b71a3
JA
36{
37 struct blk_io_trace t;
38 int fd, ret;
39
40 fd = open(filename, O_RDONLY);
4dced407 41 if (fd < 0)
b153f94a 42 return false;
fb7b71a3
JA
43
44 ret = read(fd, &t, sizeof(t));
45 close(fd);
46
47 if (ret < 0) {
48 perror("read blktrace");
b153f94a 49 return false;
fb7b71a3
JA
50 } else if (ret != sizeof(t)) {
51 log_err("fio: short read on blktrace file\n");
b153f94a 52 return false;
fb7b71a3
JA
53 }
54
d95b34a6
JA
55 if ((t.magic & 0xffffff00) == BLK_IO_TRACE_MAGIC) {
56 *need_swap = 0;
b153f94a 57 return true;
d95b34a6
JA
58 }
59
60 /*
61 * Maybe it needs to be endian swapped...
62 */
63 t.magic = fio_swap32(t.magic);
64 if ((t.magic & 0xffffff00) == BLK_IO_TRACE_MAGIC) {
65 *need_swap = 1;
b153f94a 66 return true;
d95b34a6 67 }
fb7b71a3 68
b153f94a 69 return false;
fb7b71a3
JA
70}
71
c69aa91f
JA
72#define FMINORBITS 20
73#define FMINORMASK ((1U << FMINORBITS) - 1)
74#define FMAJOR(dev) ((unsigned int) ((dev) >> FMINORBITS))
75#define FMINOR(dev) ((unsigned int) ((dev) & FMINORMASK))
eeb9c2aa 76
89ac1d48 77static void trace_add_open_close_event(struct thread_data *td, int fileno, enum file_log_act action)
691c8fb0
JA
78{
79 struct io_piece *ipo;
80
81 ipo = calloc(1, sizeof(*ipo));
0d29de83 82 init_ipo(ipo);
691c8fb0
JA
83
84 ipo->ddir = DDIR_INVAL;
85 ipo->fileno = fileno;
89ac1d48 86 ipo->file_action = action;
01743ee1 87 flist_add_tail(&ipo->list, &td->io_log_list);
691c8fb0
JA
88}
89
6aaf98e9 90static int trace_add_file(struct thread_data *td, __u32 device)
5e6c2067 91{
6aaf98e9 92 static unsigned int last_maj, last_min, last_fileno;
c69aa91f
JA
93 unsigned int maj = FMAJOR(device);
94 unsigned int min = FMINOR(device);
5e6c2067 95 struct fio_file *f;
26e616d0 96 char dev[256];
6aaf98e9 97 unsigned int i;
5e6c2067 98
6aaf98e9 99 if (last_maj == maj && last_min == min)
89ac1d48 100 return last_fileno;
5e6c2067
JA
101
102 last_maj = maj;
103 last_min = min;
104
105 /*
106 * check for this file in our list
107 */
6aaf98e9 108 for_each_file(td, f, i)
89ac1d48
SL
109 if (f->major == maj && f->minor == min) {
110 last_fileno = f->fileno;
6aaf98e9 111 return last_fileno;
89ac1d48 112 }
5e6c2067
JA
113
114 strcpy(dev, "/dev");
40bafa33 115 if (blktrace_lookup_device(td->o.replay_redirect, dev, maj, min)) {
691c8fb0
JA
116 int fileno;
117
40bafa33
JA
118 if (td->o.replay_redirect)
119 dprint(FD_BLKTRACE, "device lookup: %d/%d\n overridden"
120 " with: %s\n", maj, min,
121 td->o.replay_redirect);
122 else
123 dprint(FD_BLKTRACE, "device lookup: %d/%d\n", maj, min);
124
bd6f78b2 125 dprint(FD_BLKTRACE, "add devices %s\n", dev);
49ffb4a2 126 fileno = add_file_exclusive(td, dev);
b53f2c54 127 td->o.open_files++;
5903e7b7
JA
128 td->files[fileno]->major = maj;
129 td->files[fileno]->minor = min;
89ac1d48
SL
130 trace_add_open_close_event(td, fileno, FIO_LOG_OPEN_FILE);
131 last_fileno = fileno;
bd6f78b2 132 }
f01b34ae 133
89ac1d48 134 return last_fileno;
5e6c2067
JA
135}
136
0c63576e
JA
137static void t_bytes_align(struct thread_options *o, struct blk_io_trace *t)
138{
139 if (!o->replay_align)
140 return;
141
142 t->bytes = (t->bytes + o->replay_align - 1) & ~(o->replay_align - 1);
143}
144
8c1fdf04
JA
145/*
146 * Store blk_io_trace data in an ipo for later retrieval.
147 */
fdefd987 148static void store_ipo(struct thread_data *td, unsigned long long offset,
89ac1d48 149 unsigned int bytes, int rw, unsigned long long ttime,
6aaf98e9 150 int fileno)
fdefd987 151{
8812d7f2 152 struct io_piece *ipo;
fdefd987 153
8812d7f2 154 ipo = calloc(1, sizeof(*ipo));
0d29de83
JA
155 init_ipo(ipo);
156
6aaf98e9 157 ipo->offset = offset * 512;
0c63576e
JA
158 if (td->o.replay_scale)
159 ipo->offset = ipo->offset / td->o.replay_scale;
a79f17bf 160 ipo_bytes_align(td->o.replay_align, ipo);
fdefd987 161 ipo->len = bytes;
8c1fdf04 162 ipo->delay = ttime / 1000;
fdefd987
JA
163 if (rw)
164 ipo->ddir = DDIR_WRITE;
165 else
166 ipo->ddir = DDIR_READ;
89ac1d48 167 ipo->fileno = fileno;
fdefd987 168
bd6f78b2
JA
169 dprint(FD_BLKTRACE, "store ddir=%d, off=%llu, len=%lu, delay=%lu\n",
170 ipo->ddir, ipo->offset,
171 ipo->len, ipo->delay);
691c8fb0 172 queue_io_piece(td, ipo);
fdefd987
JA
173}
174
10f74940 175static bool handle_trace_notify(struct blk_io_trace *t)
cd991b9e 176{
691c8fb0
JA
177 switch (t->action) {
178 case BLK_TN_PROCESS:
24653680 179 dprint(FD_BLKTRACE, "got process notify: %x, %d\n",
d95b34a6 180 t->action, t->pid);
691c8fb0
JA
181 break;
182 case BLK_TN_TIMESTAMP:
24653680 183 dprint(FD_BLKTRACE, "got timestamp notify: %x, %d\n",
d95b34a6 184 t->action, t->pid);
691c8fb0 185 break;
ff58fced
JA
186 case BLK_TN_MESSAGE:
187 break;
691c8fb0
JA
188 default:
189 dprint(FD_BLKTRACE, "unknown trace act %x\n", t->action);
190 break;
191 }
10f74940 192 return false;
691c8fb0 193}
5b3023b8 194
10f74940 195static bool handle_trace_discard(struct thread_data *td,
24653680
JA
196 struct blk_io_trace *t,
197 unsigned long long ttime,
10f74940 198 unsigned long *ios, unsigned long long *bs)
ff58fced 199{
8812d7f2 200 struct io_piece *ipo;
89ac1d48 201 int fileno;
ff58fced 202
d7235efb 203 if (td->o.replay_skip & (1u << DDIR_TRIM))
10f74940 204 return false;
d7235efb 205
8812d7f2 206 ipo = calloc(1, sizeof(*ipo));
0d29de83 207 init_ipo(ipo);
6aaf98e9 208 fileno = trace_add_file(td, t->device);
ff58fced 209
24653680 210 ios[DDIR_TRIM]++;
6aaf98e9
DZ
211 if (t->bytes > bs[DDIR_TRIM])
212 bs[DDIR_TRIM] = t->bytes;
24653680 213
ff58fced
JA
214 td->o.size += t->bytes;
215
ff58fced
JA
216 INIT_FLIST_HEAD(&ipo->list);
217
6aaf98e9 218 ipo->offset = t->sector * 512;
0c63576e
JA
219 if (td->o.replay_scale)
220 ipo->offset = ipo->offset / td->o.replay_scale;
a79f17bf 221 ipo_bytes_align(td->o.replay_align, ipo);
ff58fced
JA
222 ipo->len = t->bytes;
223 ipo->delay = ttime / 1000;
224 ipo->ddir = DDIR_TRIM;
89ac1d48 225 ipo->fileno = fileno;
ff58fced
JA
226
227 dprint(FD_BLKTRACE, "store discard, off=%llu, len=%lu, delay=%lu\n",
228 ipo->offset, ipo->len,
229 ipo->delay);
230 queue_io_piece(td, ipo);
10f74940 231 return true;
ff58fced
JA
232}
233
19a8064e
JA
234static void dump_trace(struct blk_io_trace *t)
235{
236 log_err("blktrace: ignoring zero byte trace: action=%x\n", t->action);
237}
238
10f74940 239static bool handle_trace_fs(struct thread_data *td, struct blk_io_trace *t,
691c8fb0 240 unsigned long long ttime, unsigned long *ios,
10f74940 241 unsigned long long *bs)
691c8fb0
JA
242{
243 int rw;
89ac1d48 244 int fileno;
5b3023b8 245
6aaf98e9 246 fileno = trace_add_file(td, t->device);
5b3023b8
JA
247
248 rw = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
249
d7235efb
JA
250 if (rw) {
251 if (td->o.replay_skip & (1u << DDIR_WRITE))
10f74940 252 return false;
d7235efb
JA
253 } else {
254 if (td->o.replay_skip & (1u << DDIR_READ))
10f74940 255 return false;
d7235efb
JA
256 }
257
19a8064e
JA
258 if (!t->bytes) {
259 if (!fio_did_warn(FIO_WARN_BTRACE_ZERO))
260 dump_trace(t);
10f74940 261 return false;
19a8064e 262 }
c1f22c21 263
6aaf98e9
DZ
264 if (t->bytes > bs[rw])
265 bs[rw] = t->bytes;
5b3023b8
JA
266
267 ios[rw]++;
268 td->o.size += t->bytes;
6aaf98e9 269 store_ipo(td, t->sector, t->bytes, rw, ttime, fileno);
10f74940 270 return true;
cd991b9e
JA
271}
272
10f74940 273static bool handle_trace_flush(struct thread_data *td, struct blk_io_trace *t,
811f5421 274 unsigned long long ttime, unsigned long *ios)
65c42cc8
JA
275{
276 struct io_piece *ipo;
65c42cc8
JA
277 int fileno;
278
d7235efb 279 if (td->o.replay_skip & (1u << DDIR_SYNC))
10f74940 280 return false;
d7235efb 281
65c42cc8
JA
282 ipo = calloc(1, sizeof(*ipo));
283 init_ipo(ipo);
6aaf98e9 284 fileno = trace_add_file(td, t->device);
65c42cc8
JA
285
286 ipo->delay = ttime / 1000;
287 ipo->ddir = DDIR_SYNC;
288 ipo->fileno = fileno;
289
811f5421 290 ios[DDIR_SYNC]++;
65c42cc8
JA
291 dprint(FD_BLKTRACE, "store flush delay=%lu\n", ipo->delay);
292 queue_io_piece(td, ipo);
10f74940 293 return true;
65c42cc8
JA
294}
295
691c8fb0
JA
296/*
297 * We only care for queue traces, most of the others are side effects
298 * due to internal workings of the block layer.
299 */
10f74940
LS
300static bool queue_trace(struct thread_data *td, struct blk_io_trace *t,
301 unsigned long *ios, unsigned long long *bs)
691c8fb0 302{
24653680 303 static unsigned long long last_ttime;
0c63576e 304 unsigned long long delay = 0;
24653680 305
691c8fb0 306 if ((t->action & 0xffff) != __BLK_TA_QUEUE)
10f74940 307 return false;
24653680
JA
308
309 if (!(t->action & BLK_TC_ACT(BLK_TC_NOTIFY))) {
6dd7fa77 310 if (!last_ttime || td->o.no_stall)
24653680 311 delay = 0;
6dd7fa77 312 else if (td->o.replay_time_scale == 100)
24653680 313 delay = t->time - last_ttime;
6dd7fa77
JA
314 else {
315 double tmp = t->time - last_ttime;
316 double scale;
317
318 scale = (double) 100.0 / (double) td->o.replay_time_scale;
319 tmp *= scale;
320 delay = tmp;
24653680 321 }
6dd7fa77 322 last_ttime = t->time;
24653680 323 }
691c8fb0 324
0c63576e
JA
325 t_bytes_align(&td->o, t);
326
691c8fb0 327 if (t->action & BLK_TC_ACT(BLK_TC_NOTIFY))
10f74940 328 return handle_trace_notify(t);
ff58fced 329 else if (t->action & BLK_TC_ACT(BLK_TC_DISCARD))
10f74940 330 return handle_trace_discard(td, t, delay, ios, bs);
65c42cc8 331 else if (t->action & BLK_TC_ACT(BLK_TC_FLUSH))
10f74940 332 return handle_trace_flush(td, t, delay, ios);
691c8fb0 333 else
10f74940 334 return handle_trace_fs(td, t, delay, ios, bs);
691c8fb0
JA
335}
336
d95b34a6
JA
337static void byteswap_trace(struct blk_io_trace *t)
338{
339 t->magic = fio_swap32(t->magic);
340 t->sequence = fio_swap32(t->sequence);
341 t->time = fio_swap64(t->time);
342 t->sector = fio_swap64(t->sector);
343 t->bytes = fio_swap32(t->bytes);
344 t->action = fio_swap32(t->action);
345 t->pid = fio_swap32(t->pid);
346 t->device = fio_swap32(t->device);
347 t->cpu = fio_swap32(t->cpu);
348 t->error = fio_swap16(t->error);
349 t->pdu_len = fio_swap16(t->pdu_len);
350}
351
b153f94a 352static bool t_is_write(struct blk_io_trace *t)
24653680
JA
353{
354 return (t->action & BLK_TC_ACT(BLK_TC_WRITE | BLK_TC_DISCARD)) != 0;
355}
356
a6eaf6c9
JA
357static enum fio_ddir t_get_ddir(struct blk_io_trace *t)
358{
359 if (t->action & BLK_TC_ACT(BLK_TC_READ))
360 return DDIR_READ;
361 else if (t->action & BLK_TC_ACT(BLK_TC_WRITE))
362 return DDIR_WRITE;
363 else if (t->action & BLK_TC_ACT(BLK_TC_DISCARD))
364 return DDIR_TRIM;
365
366 return DDIR_INVAL;
367}
368
369static void depth_inc(struct blk_io_trace *t, int *depth)
370{
371 enum fio_ddir ddir;
372
373 ddir = t_get_ddir(t);
374 if (ddir != DDIR_INVAL)
375 depth[ddir]++;
376}
377
378static void depth_dec(struct blk_io_trace *t, int *depth)
379{
380 enum fio_ddir ddir;
381
382 ddir = t_get_ddir(t);
383 if (ddir != DDIR_INVAL)
384 depth[ddir]--;
385}
386
387static void depth_end(struct blk_io_trace *t, int *this_depth, int *depth)
388{
389 enum fio_ddir ddir = DDIR_INVAL;
390
391 ddir = t_get_ddir(t);
392 if (ddir != DDIR_INVAL) {
393 depth[ddir] = max(depth[ddir], this_depth[ddir]);
394 this_depth[ddir] = 0;
395 }
396}
397
8c1fdf04
JA
398/*
399 * Load a blktrace file by reading all the blk_io_trace entries, and storing
400 * them as io_pieces like the fio text version would do.
401 */
10f74940
LS
402bool init_blktrace_read(struct thread_data *td, const char *filename, int need_swap)
403{
404 int old_state;
405
406 td->io_log_rfile = fopen(filename, "rb");
407 if (!td->io_log_rfile) {
408 td_verror(td, errno, "open blktrace file");
409 goto err;
410 }
411 td->io_log_blktrace_swap = need_swap;
412 td->o.size = 0;
413
414 free_release_files(td);
415
416 old_state = td_bump_runstate(td, TD_SETTING_UP);
417
418 if (!read_blktrace(td)) {
419 goto err;
420 }
421
422 td_restore_runstate(td, old_state);
423
424 if (!td->files_index) {
425 log_err("fio: did not find replay device(s)\n");
426 return false;
427 }
428
429 return true;
430
431err:
432 if (td->io_log_rfile) {
433 fclose(td->io_log_rfile);
434 td->io_log_rfile = NULL;
435 }
436 return false;
437}
438
439bool read_blktrace(struct thread_data* td)
fb7b71a3
JA
440{
441 struct blk_io_trace t;
811f5421 442 unsigned long ios[DDIR_RWDIR_SYNC_CNT] = { };
10f74940 443 unsigned long long rw_bs[DDIR_RWDIR_CNT] = { };
811f5421 444 unsigned long skipped_writes;
10f74940
LS
445 FILE *f = td->io_log_rfile;
446 int i, max_depth;
5ab088aa 447 struct fio_file *fiof;
811f5421
JA
448 int this_depth[DDIR_RWDIR_CNT] = { };
449 int depth[DDIR_RWDIR_CNT] = { };
10f74940 450 int64_t items_to_fetch = 0;
fb7b71a3 451
10f74940
LS
452 if (td->o.read_iolog_chunked) {
453 items_to_fetch = iolog_items_to_fetch(td);
454 if (!items_to_fetch)
455 return true;
fb7b71a3
JA
456 }
457
4241ea8f 458 skipped_writes = 0;
fb7b71a3 459 do {
5ab088aa 460 int ret = fread(&t, 1, sizeof(t), f);
fb7b71a3 461
5ab088aa
LS
462 if (ferror(f)) {
463 td_verror(td, errno, "read blktrace file");
8c1fdf04 464 goto err;
5ab088aa 465 } else if (feof(f)) {
e2887563 466 break;
5ab088aa
LS
467 } else if (ret < (int) sizeof(t)) {
468 log_err("fio: iolog short read\n");
fb7b71a3 469 break;
fb7b71a3
JA
470 }
471
10f74940 472 if (td->io_log_blktrace_swap)
d95b34a6
JA
473 byteswap_trace(&t);
474
fb7b71a3 475 if ((t.magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
5ec10eaa
JA
476 log_err("fio: bad magic in blktrace data: %x\n",
477 t.magic);
8c1fdf04 478 goto err;
fb7b71a3
JA
479 }
480 if ((t.magic & 0xff) != BLK_IO_TRACE_VERSION) {
5ec10eaa
JA
481 log_err("fio: bad blktrace version %d\n",
482 t.magic & 0xff);
8c1fdf04 483 goto err;
fb7b71a3 484 }
5ab088aa 485 ret = discard_pdu(f, &t);
f12b323f 486 if (ret < 0) {
874a61d0 487 td_verror(td, -ret, "blktrace lseek");
8c1fdf04 488 goto err;
fb7b71a3 489 }
691c8fb0 490 if ((t.action & BLK_TC_ACT(BLK_TC_NOTIFY)) == 0) {
eb5fdcf1 491 if ((t.action & 0xffff) == __BLK_TA_QUEUE)
a6eaf6c9
JA
492 depth_inc(&t, this_depth);
493 else if (((t.action & 0xffff) == __BLK_TA_BACKMERGE) ||
494 ((t.action & 0xffff) == __BLK_TA_FRONTMERGE))
495 depth_dec(&t, this_depth);
496 else if ((t.action & 0xffff) == __BLK_TA_COMPLETE)
497 depth_end(&t, this_depth, depth);
691c8fb0 498
24653680 499 if (t_is_write(&t) && read_only) {
691c8fb0 500 skipped_writes++;
24653680 501 continue;
64bbb865 502 }
a6edd638 503 }
24653680 504
10f74940
LS
505 if (!queue_trace(td, &t, ios, rw_bs))
506 continue;
fb7b71a3 507
10f74940
LS
508 if (td->o.read_iolog_chunked) {
509 td->io_log_current++;
510 items_to_fetch--;
511 if (items_to_fetch == 0)
512 break;
513 }
514 } while (1);
89ac1d48 515
10f74940
LS
516 if (td->o.read_iolog_chunked) {
517 td->io_log_highmark = td->io_log_current;
518 td->io_log_checkmark = (td->io_log_highmark + 1) / 2;
519 fio_gettime(&td->io_log_highmark_time, NULL);
520 }
8c1fdf04 521
10f74940
LS
522 if (skipped_writes)
523 log_err("fio: %s skips replay of %lu writes due to read-only\n",
524 td->o.name, skipped_writes);
5903e7b7 525
10f74940
LS
526 if (td->o.read_iolog_chunked) {
527 if (td->io_log_current == 0) {
528 return false;
529 }
530 td->o.td_ddir = TD_DDIR_RW;
531 if ((rw_bs[DDIR_READ] > td->o.max_bs[DDIR_READ] ||
532 rw_bs[DDIR_WRITE] > td->o.max_bs[DDIR_WRITE] ||
533 rw_bs[DDIR_TRIM] > td->o.max_bs[DDIR_TRIM]) &&
534 td->orig_buffer)
535 {
536 td->o.max_bs[DDIR_READ] = max(td->o.max_bs[DDIR_READ], rw_bs[DDIR_READ]);
537 td->o.max_bs[DDIR_WRITE] = max(td->o.max_bs[DDIR_WRITE], rw_bs[DDIR_WRITE]);
538 td->o.max_bs[DDIR_TRIM] = max(td->o.max_bs[DDIR_TRIM], rw_bs[DDIR_TRIM]);
539 io_u_quiesce(td);
540 free_io_mem(td);
541 init_io_u_buffers(td);
542 }
543 return true;
f01b34ae
JA
544 }
545
10f74940
LS
546 for_each_file(td, fiof, i)
547 trace_add_open_close_event(td, fiof->fileno, FIO_LOG_CLOSE_FILE);
548
549 fclose(td->io_log_rfile);
550 td->io_log_rfile = NULL;
551
eb5fdcf1
JA
552 /*
553 * For stacked devices, we don't always get a COMPLETE event so
554 * the depth grows to insane values. Limit it to something sane(r).
555 */
a6eaf6c9
JA
556 max_depth = 0;
557 for (i = 0; i < DDIR_RWDIR_CNT; i++) {
558 if (depth[i] > 1024)
559 depth[i] = 1024;
560 else if (!depth[i] && ios[i])
561 depth[i] = 1;
562 max_depth = max(depth[i], max_depth);
563 }
eb5fdcf1 564
811f5421
JA
565 if (!ios[DDIR_READ] && !ios[DDIR_WRITE] && !ios[DDIR_TRIM] &&
566 !ios[DDIR_SYNC]) {
8c1fdf04 567 log_err("fio: found no ios in blktrace data\n");
b153f94a 568 return false;
252928cb 569 }
570
571 td->o.td_ddir = 0;
572 if (ios[DDIR_READ]) {
573 td->o.td_ddir |= TD_DDIR_READ;
d84f8d49 574 td->o.max_bs[DDIR_READ] = rw_bs[DDIR_READ];
252928cb 575 }
576 if (ios[DDIR_WRITE]) {
577 td->o.td_ddir |= TD_DDIR_WRITE;
d84f8d49 578 td->o.max_bs[DDIR_WRITE] = rw_bs[DDIR_WRITE];
252928cb 579 }
580 if (ios[DDIR_TRIM]) {
581 td->o.td_ddir |= TD_DDIR_TRIM;
24653680 582 td->o.max_bs[DDIR_TRIM] = rw_bs[DDIR_TRIM];
d84f8d49 583 }
8c1fdf04
JA
584
585 /*
586 * We need to do direct/raw ios to the device, to avoid getting
76a00ec6
JA
587 * read-ahead in our way. But only do so if the minimum block size
588 * is a multiple of 4k, otherwise we don't know if it's safe to do so.
8c1fdf04 589 */
76a00ec6 590 if (!fio_option_is_set(&td->o, odirect) && !(td_min_bs(td) & 4095))
a6eaf6c9 591 td->o.odirect = 1;
8c1fdf04 592
eb5fdcf1 593 /*
8a16f59b 594 * If depth wasn't manually set, use probed depth
eb5fdcf1 595 */
8a16f59b 596 if (!fio_option_is_set(&td->o, iodepth))
a6eaf6c9 597 td->o.iodepth = td->o.iodepth_low = max_depth;
eb5fdcf1 598
b153f94a 599 return true;
8c1fdf04 600err:
5ab088aa 601 fclose(f);
b153f94a 602 return false;
fb7b71a3 603}
b9921d1a 604
87a48ada
DZ
605static int init_merge_param_list(fio_fp64_t *vals, struct blktrace_cursor *bcs,
606 int nr_logs, int def, size_t off)
607{
608 int i = 0, len = 0;
609
610 while (len < FIO_IO_U_LIST_MAX_LEN && vals[len].u.f != 0.0)
611 len++;
612
613 if (len && len != nr_logs)
614 return len;
615
616 for (i = 0; i < nr_logs; i++) {
617 int *val = (int *)((char *)&bcs[i] + off);
618 *val = def;
619 if (len)
620 *val = (int)vals[i].u.f;
621 }
622
623 return 0;
624
625}
626
b9921d1a
DZ
627static int find_earliest_io(struct blktrace_cursor *bcs, int nr_logs)
628{
629 __u64 time = ~(__u64)0;
630 int idx = 0, i;
631
632 for (i = 0; i < nr_logs; i++) {
633 if (bcs[i].t.time < time) {
634 time = bcs[i].t.time;
635 idx = i;
636 }
637 }
638
639 return idx;
640}
641
642static void merge_finish_file(struct blktrace_cursor *bcs, int i, int *nr_logs)
643{
55bfd8c8
DZ
644 bcs[i].iter++;
645 if (bcs[i].iter < bcs[i].nr_iter) {
5ab088aa 646 fseek(bcs[i].f, 0, SEEK_SET);
55bfd8c8
DZ
647 return;
648 }
649
b9921d1a
DZ
650 *nr_logs -= 1;
651
652 /* close file */
5ab088aa 653 fclose(bcs[i].f);
b9921d1a
DZ
654
655 /* keep active files contiguous */
656 memmove(&bcs[i], &bcs[*nr_logs], sizeof(bcs[i]));
657}
658
659static int read_trace(struct thread_data *td, struct blktrace_cursor *bc)
660{
661 int ret = 0;
662 struct blk_io_trace *t = &bc->t;
663
664read_skip:
665 /* read an io trace */
5ab088aa
LS
666 ret = fread(&t, 1, sizeof(t), bc->f);
667 if (ferror(bc->f)) {
668 td_verror(td, errno, "read blktrace file");
55bfd8c8 669 return ret;
5ab088aa 670 } else if (feof(bc->f)) {
55bfd8c8
DZ
671 if (!bc->length)
672 bc->length = bc->t.time;
b9921d1a
DZ
673 return ret;
674 } else if (ret < (int) sizeof(*t)) {
5ab088aa 675 log_err("fio: iolog short read\n");
b9921d1a
DZ
676 return -1;
677 }
678
679 if (bc->swap)
680 byteswap_trace(t);
681
682 /* skip over actions that fio does not care about */
683 if ((t->action & 0xffff) != __BLK_TA_QUEUE ||
684 t_get_ddir(t) == DDIR_INVAL) {
5ab088aa 685 ret = discard_pdu(bc->f, t);
b9921d1a 686 if (ret < 0) {
874a61d0 687 td_verror(td, -ret, "blktrace lseek");
b9921d1a 688 return ret;
b9921d1a
DZ
689 }
690 goto read_skip;
691 }
692
55bfd8c8 693 t->time = (t->time + bc->iter * bc->length) * bc->scalar / 100;
87a48ada 694
b9921d1a
DZ
695 return ret;
696}
697
698static int write_trace(FILE *fp, struct blk_io_trace *t)
699{
700 /* pdu is not used so just write out only the io trace */
701 t->pdu_len = 0;
702 return fwrite((void *)t, sizeof(*t), 1, fp);
703}
704
705int merge_blktrace_iologs(struct thread_data *td)
706{
707 int nr_logs = get_max_str_idx(td->o.read_iolog_file);
708 struct blktrace_cursor *bcs = malloc(sizeof(struct blktrace_cursor) *
709 nr_logs);
710 struct blktrace_cursor *bc;
711 FILE *merge_fp;
712 char *str, *ptr, *name, *merge_buf;
713 int i, ret;
714
87a48ada
DZ
715 ret = init_merge_param_list(td->o.merge_blktrace_scalars, bcs, nr_logs,
716 100, offsetof(struct blktrace_cursor,
717 scalar));
718 if (ret) {
719 log_err("fio: merge_blktrace_scalars(%d) != nr_logs(%d)\n",
720 ret, nr_logs);
721 goto err_param;
722 }
723
55bfd8c8
DZ
724 ret = init_merge_param_list(td->o.merge_blktrace_iters, bcs, nr_logs,
725 1, offsetof(struct blktrace_cursor,
726 nr_iter));
727 if (ret) {
728 log_err("fio: merge_blktrace_iters(%d) != nr_logs(%d)\n",
729 ret, nr_logs);
730 goto err_param;
731 }
732
b9921d1a
DZ
733 /* setup output file */
734 merge_fp = fopen(td->o.merge_blktrace_file, "w");
735 merge_buf = malloc(128 * 1024);
c81ab051
BVA
736 if (!merge_buf)
737 goto err_out_file;
b9921d1a
DZ
738 ret = setvbuf(merge_fp, merge_buf, _IOFBF, 128 * 1024);
739 if (ret)
c81ab051 740 goto err_merge_buf;
b9921d1a
DZ
741
742 /* setup input files */
743 str = ptr = strdup(td->o.read_iolog_file);
744 nr_logs = 0;
745 for (i = 0; (name = get_next_str(&ptr)) != NULL; i++) {
5ab088aa
LS
746 bcs[i].f = fopen(name, "rb");
747 if (!bcs[i].f) {
b9921d1a 748 log_err("fio: could not open file: %s\n", name);
5ab088aa 749 ret = -errno;
2ba46d1b 750 free(str);
b9921d1a
DZ
751 goto err_file;
752 }
b9921d1a
DZ
753 nr_logs++;
754
755 if (!is_blktrace(name, &bcs[i].swap)) {
756 log_err("fio: file is not a blktrace: %s\n", name);
2ba46d1b 757 free(str);
b9921d1a
DZ
758 goto err_file;
759 }
760
761 ret = read_trace(td, &bcs[i]);
762 if (ret < 0) {
2ba46d1b 763 free(str);
b9921d1a
DZ
764 goto err_file;
765 } else if (!ret) {
766 merge_finish_file(bcs, i, &nr_logs);
767 i--;
768 }
769 }
770 free(str);
771
772 /* merge files */
773 while (nr_logs) {
774 i = find_earliest_io(bcs, nr_logs);
775 bc = &bcs[i];
776 /* skip over the pdu */
5ab088aa 777 ret = discard_pdu(bc->f, &bc->t);
b9921d1a 778 if (ret < 0) {
874a61d0 779 td_verror(td, -ret, "blktrace lseek");
b9921d1a 780 goto err_file;
b9921d1a
DZ
781 }
782
783 ret = write_trace(merge_fp, &bc->t);
784 ret = read_trace(td, bc);
785 if (ret < 0)
786 goto err_file;
787 else if (!ret)
788 merge_finish_file(bcs, i, &nr_logs);
789 }
790
791 /* set iolog file to read from the newly merged file */
792 td->o.read_iolog_file = td->o.merge_blktrace_file;
793 ret = 0;
794
795err_file:
796 /* cleanup */
797 for (i = 0; i < nr_logs; i++) {
5ab088aa 798 fclose(bcs[i].f);
b9921d1a 799 }
c81ab051
BVA
800err_merge_buf:
801 free(merge_buf);
b9921d1a
DZ
802err_out_file:
803 fflush(merge_fp);
804 fclose(merge_fp);
87a48ada 805err_param:
b9921d1a
DZ
806 free(bcs);
807
808 return ret;
809}