2 * Code related to writing an iolog of what a thread is doing, and to
3 * later read that back and replay
14 static const char iolog_ver2[] = "fio version 2 iolog";
16 void queue_io_piece(struct thread_data *td, struct io_piece *ipo)
18 flist_add_tail(&ipo->list, &td->io_log_list);
19 td->total_io_size += ipo->len;
22 void log_io_u(struct thread_data *td, struct io_u *io_u)
24 const char *act[] = { "read", "write", "sync", "datasync",
25 "sync_file_range", "wait", "trim" };
27 assert(io_u->ddir <= 6);
29 if (!td->o.write_iolog_file)
32 fprintf(td->iolog_f, "%s %s %llu %lu\n", io_u->file->file_name,
33 act[io_u->ddir], io_u->offset,
37 void log_file(struct thread_data *td, struct fio_file *f,
38 enum file_log_act what)
40 const char *act[] = { "add", "open", "close" };
44 if (!td->o.write_iolog_file)
49 * this happens on the pre-open/close done before the job starts
54 fprintf(td->iolog_f, "%s %s\n", f->file_name, act[what]);
57 static void iolog_delay(struct thread_data *td, unsigned long delay)
59 unsigned long usec = utime_since_now(&td->last_issue);
67 * less than 100 usec delay, just regard it as noise
72 usec_sleep(td, delay);
75 static int ipo_special(struct thread_data *td, struct io_piece *ipo)
83 if (ipo->ddir != DDIR_INVAL)
86 f = td->files[ipo->fileno];
88 switch (ipo->file_action) {
89 case FIO_LOG_OPEN_FILE:
90 ret = td_io_open_file(td, f);
93 td_verror(td, ret, "iolog open file");
95 case FIO_LOG_CLOSE_FILE:
96 td_io_close_file(td, f);
98 case FIO_LOG_UNLINK_FILE:
102 log_err("fio: bad file action %d\n", ipo->file_action);
109 int read_iolog_get(struct thread_data *td, struct io_u *io_u)
111 struct io_piece *ipo;
112 unsigned long elapsed;
114 while (!flist_empty(&td->io_log_list)) {
117 ipo = flist_entry(td->io_log_list.next, struct io_piece, list);
118 flist_del(&ipo->list);
119 remove_trim_entry(td, ipo);
121 ret = ipo_special(td, ipo);
125 } else if (ret > 0) {
130 io_u->ddir = ipo->ddir;
131 if (ipo->ddir != DDIR_WAIT) {
132 io_u->offset = ipo->offset;
133 io_u->buflen = ipo->len;
134 io_u->file = td->files[ipo->fileno];
135 get_file(io_u->file);
136 dprint(FD_IO, "iolog: get %llu/%lu/%s\n", io_u->offset,
137 io_u->buflen, io_u->file->file_name);
139 iolog_delay(td, ipo->delay);
141 elapsed = mtime_since_genesis();
142 if (ipo->delay > elapsed)
143 usec_sleep(td, (ipo->delay - elapsed) * 1000);
149 if (io_u->ddir != DDIR_WAIT)
157 void prune_io_piece_log(struct thread_data *td)
159 struct io_piece *ipo;
162 while ((n = rb_first(&td->io_hist_tree)) != NULL) {
163 ipo = rb_entry(n, struct io_piece, rb_node);
164 rb_erase(n, &td->io_hist_tree);
165 remove_trim_entry(td, ipo);
170 while (!flist_empty(&td->io_hist_list)) {
171 ipo = flist_entry(td->io_hist_list.next, struct io_piece, list);
172 flist_del(&ipo->list);
173 remove_trim_entry(td, ipo);
179 static void dump_ipo(struct io_piece *ipo, const char *msg)
182 printf("\toffset %llu, len %lu, fl %x, ddir %d\n", ipo->offset, ipo->len, ipo->flags, ipo->ddir);
186 * log a successful write, so we can unwind the log for verify
188 void log_io_piece(struct thread_data *td, struct io_u *io_u)
190 struct rb_node **p, *parent;
191 struct io_piece *ipo, *__ipo;
193 ipo = malloc(sizeof(struct io_piece));
195 ipo->file = io_u->file;
196 ipo->offset = io_u->offset;
197 ipo->len = io_u->buflen;
199 if (io_u_should_trim(td, io_u)) {
200 flist_add_tail(&ipo->trim_list, &td->trim_list);
205 * We don't need to sort the entries, if:
207 * Sequential writes, or
208 * Random writes that lay out the file as it goes along
210 * For both these cases, just reading back data in the order we
211 * wrote it out is the fastest.
213 * One exception is if we don't have a random map AND we are doing
214 * verifies, in that case we need to check for duplicate blocks and
215 * drop the old one, which we rely on the rb insert/lookup for
218 if ((!td_random(td) || !td->o.overwrite) &&
219 (file_randommap(td, ipo->file) || td->o.verify == VERIFY_NONE)) {
220 INIT_FLIST_HEAD(&ipo->list);
221 flist_add_tail(&ipo->list, &td->io_hist_list);
222 ipo->flags |= IP_F_ONLIST;
227 RB_CLEAR_NODE(&ipo->rb_node);
230 * Sort the entry into the verification list
233 p = &td->io_hist_tree.rb_node;
238 __ipo = rb_entry(parent, struct io_piece, rb_node);
239 if (ipo->file < __ipo->file)
241 else if (ipo->file > __ipo->file)
243 else if (ipo->offset < __ipo->offset)
245 else if (ipo->offset > __ipo->offset)
248 if (ipo->len != __ipo->len) {
249 log_err("fio: unexpected ipo overlap!\n");
250 log_err("fio: please report this issue.\n");
251 dump_ipo(ipo, "ipo");
252 dump_ipo(__ipo, "__ipo");
255 rb_erase(parent, &td->io_hist_tree);
256 remove_trim_entry(td, __ipo);
262 rb_link_node(&ipo->rb_node, parent, p);
263 rb_insert_color(&ipo->rb_node, &td->io_hist_tree);
264 ipo->flags |= IP_F_ONRB;
268 void write_iolog_close(struct thread_data *td)
274 td->iolog_buf = NULL;
278 * Read version 2 iolog data. It is enhanced to include per-file logging,
281 static int read_iolog2(struct thread_data *td, FILE *f)
283 unsigned long long offset;
285 int reads, writes, waits, fileno = 0, file_action = 0; /* stupid gcc */
290 free_release_files(td);
293 * Read in the read iolog and store it, reuse the infrastructure
294 * for doing verifications.
297 fname = malloc(256+16);
298 act = malloc(256+16);
300 reads = writes = waits = 0;
301 while ((p = fgets(str, 4096, f)) != NULL) {
302 struct io_piece *ipo;
305 r = sscanf(p, "%256s %256s %llu %u", fname, act, &offset,
311 if (!strcmp(act, "wait"))
313 else if (!strcmp(act, "read"))
315 else if (!strcmp(act, "write"))
317 else if (!strcmp(act, "sync"))
319 else if (!strcmp(act, "datasync"))
321 else if (!strcmp(act, "trim"))
324 log_err("fio: bad iolog file action: %s\n",
330 if (!strcmp(act, "add")) {
332 fileno = add_file(td, fname);
333 file_action = FIO_LOG_ADD_FILE;
335 } else if (!strcmp(act, "open")) {
336 fileno = get_fileno(td, fname);
337 file_action = FIO_LOG_OPEN_FILE;
338 } else if (!strcmp(act, "close")) {
339 fileno = get_fileno(td, fname);
340 file_action = FIO_LOG_CLOSE_FILE;
342 log_err("fio: bad iolog file action: %s\n",
347 log_err("bad iolog2: %s", p);
353 else if (rw == DDIR_WRITE) {
355 * Don't add a write for ro mode
360 } else if (rw == DDIR_WAIT) {
362 } else if (rw == DDIR_INVAL) {
363 } else if (!ddir_sync(rw)) {
364 log_err("bad ddir: %d\n", rw);
371 ipo = malloc(sizeof(*ipo));
374 if (rw == DDIR_WAIT) {
377 ipo->offset = offset;
379 if (bytes > td->o.max_bs[rw])
380 td->o.max_bs[rw] = bytes;
381 ipo->fileno = fileno;
382 ipo->file_action = file_action;
385 queue_io_piece(td, ipo);
392 if (writes && read_only) {
393 log_err("fio: <%s> skips replay of %d writes due to"
394 " read-only\n", td->o.name, writes);
398 if (!reads && !writes && !waits)
400 else if (reads && !writes)
401 td->o.td_ddir = TD_DDIR_READ;
402 else if (!reads && writes)
403 td->o.td_ddir = TD_DDIR_WRITE;
405 td->o.td_ddir = TD_DDIR_RW;
411 * open iolog, check version, and call appropriate parser
413 static int init_iolog_read(struct thread_data *td)
415 char buffer[256], *p;
419 f = fopen(td->o.read_iolog_file, "r");
421 perror("fopen read iolog");
425 p = fgets(buffer, sizeof(buffer), f);
427 td_verror(td, errno, "iolog read");
428 log_err("fio: unable to read iolog\n");
434 * version 2 of the iolog stores a specific string as the
435 * first line, check for that
437 if (!strncmp(iolog_ver2, buffer, strlen(iolog_ver2)))
438 ret = read_iolog2(td, f);
440 log_err("fio: iolog version 1 is no longer supported\n");
449 * Set up a log for storing io patterns.
451 static int init_iolog_write(struct thread_data *td)
457 f = fopen(td->o.write_iolog_file, "a");
459 perror("fopen write iolog");
464 * That's it for writing, setup a log buffer and we're done.
467 td->iolog_buf = malloc(8192);
468 setvbuf(f, td->iolog_buf, _IOFBF, 8192);
471 * write our version line
473 if (fprintf(f, "%s\n", iolog_ver2) < 0) {
474 perror("iolog init\n");
479 * add all known files
481 for_each_file(td, ff, i)
482 log_file(td, ff, FIO_LOG_ADD_FILE);
487 int init_iolog(struct thread_data *td)
491 if (td->o.read_iolog_file) {
493 * Check if it's a blktrace file and load that if possible.
494 * Otherwise assume it's a normal log file and load that.
496 if (is_blktrace(td->o.read_iolog_file))
497 ret = load_blktrace(td, td->o.read_iolog_file);
499 ret = init_iolog_read(td);
500 } else if (td->o.write_iolog_file)
501 ret = init_iolog_write(td);
506 void setup_log(struct io_log **log, unsigned long avg_msec)
508 struct io_log *l = malloc(sizeof(*l));
510 memset(l, 0, sizeof(*l));
512 l->max_samples = 1024;
513 l->log = malloc(l->max_samples * sizeof(struct io_sample));
514 l->avg_msec = avg_msec;
518 void __finish_log(struct io_log *log, const char *name)
523 f = fopen(name, "a");
529 for (i = 0; i < log->nr_samples; i++) {
530 fprintf(f, "%lu, %lu, %u, %u\n", log->log[i].time,
541 void finish_log_named(struct thread_data *td, struct io_log *log,
542 const char *prefix, const char *postfix)
544 char file_name[256], *p;
546 snprintf(file_name, 200, "%s_%s.log", prefix, postfix);
547 p = basename(file_name);
548 __finish_log(log, p);
551 void finish_log(struct thread_data *td, struct io_log *log, const char *name)
553 finish_log_named(td, log, td->o.name, name);