2 * Code related to writing an iolog of what a thread is doing, and to
3 * later read that back and replay
15 static const char iolog_ver2[] = "fio version 2 iolog";
17 void queue_io_piece(struct thread_data *td, struct io_piece *ipo)
19 flist_add_tail(&ipo->list, &td->io_log_list);
20 td->total_io_size += ipo->len;
23 void log_io_u(struct thread_data *td, struct io_u *io_u)
25 const char *act[] = { "read", "write", "sync", "datasync",
26 "sync_file_range", "wait", "trim" };
28 assert(io_u->ddir <= 6);
30 if (!td->o.write_iolog_file)
33 fprintf(td->iolog_f, "%s %s %llu %lu\n", io_u->file->file_name,
34 act[io_u->ddir], io_u->offset,
38 void log_file(struct thread_data *td, struct fio_file *f,
39 enum file_log_act what)
41 const char *act[] = { "add", "open", "close" };
45 if (!td->o.write_iolog_file)
50 * this happens on the pre-open/close done before the job starts
55 fprintf(td->iolog_f, "%s %s\n", f->file_name, act[what]);
58 static void iolog_delay(struct thread_data *td, unsigned long delay)
60 unsigned long usec = utime_since_now(&td->last_issue);
68 * less than 100 usec delay, just regard it as noise
73 usec_sleep(td, delay);
76 static int ipo_special(struct thread_data *td, struct io_piece *ipo)
84 if (ipo->ddir != DDIR_INVAL)
87 f = td->files[ipo->fileno];
89 switch (ipo->file_action) {
90 case FIO_LOG_OPEN_FILE:
91 ret = td_io_open_file(td, f);
94 td_verror(td, ret, "iolog open file");
96 case FIO_LOG_CLOSE_FILE:
97 td_io_close_file(td, f);
99 case FIO_LOG_UNLINK_FILE:
100 unlink(f->file_name);
103 log_err("fio: bad file action %d\n", ipo->file_action);
110 int read_iolog_get(struct thread_data *td, struct io_u *io_u)
112 struct io_piece *ipo;
113 unsigned long elapsed;
115 while (!flist_empty(&td->io_log_list)) {
118 ipo = flist_entry(td->io_log_list.next, struct io_piece, list);
119 flist_del(&ipo->list);
120 remove_trim_entry(td, ipo);
122 ret = ipo_special(td, ipo);
126 } else if (ret > 0) {
131 io_u->ddir = ipo->ddir;
132 if (ipo->ddir != DDIR_WAIT) {
133 io_u->offset = ipo->offset;
134 io_u->buflen = ipo->len;
135 io_u->file = td->files[ipo->fileno];
136 get_file(io_u->file);
137 dprint(FD_IO, "iolog: get %llu/%lu/%s\n", io_u->offset,
138 io_u->buflen, io_u->file->file_name);
140 iolog_delay(td, ipo->delay);
142 elapsed = mtime_since_genesis();
143 if (ipo->delay > elapsed)
144 usec_sleep(td, (ipo->delay - elapsed) * 1000);
149 if (io_u->ddir != DDIR_WAIT)
157 void prune_io_piece_log(struct thread_data *td)
159 struct io_piece *ipo;
162 while ((n = rb_first(&td->io_hist_tree)) != NULL) {
163 ipo = rb_entry(n, struct io_piece, rb_node);
164 rb_erase(n, &td->io_hist_tree);
165 remove_trim_entry(td, ipo);
170 while (!flist_empty(&td->io_hist_list)) {
171 ipo = flist_entry(td->io_hist_list.next, struct io_piece, list);
172 flist_del(&ipo->list);
173 remove_trim_entry(td, ipo);
180 * log a successful write, so we can unwind the log for verify
182 void log_io_piece(struct thread_data *td, struct io_u *io_u)
184 struct rb_node **p, *parent;
185 struct io_piece *ipo, *__ipo;
187 ipo = malloc(sizeof(struct io_piece));
189 ipo->file = io_u->file;
190 ipo->offset = io_u->offset;
191 ipo->len = io_u->buflen;
192 ipo->numberio = io_u->numberio;
193 ipo->flags = IP_F_IN_FLIGHT;
197 if (io_u_should_trim(td, io_u)) {
198 flist_add_tail(&ipo->trim_list, &td->trim_list);
203 * We don't need to sort the entries, if:
205 * Sequential writes, or
206 * Random writes that lay out the file as it goes along
208 * For both these cases, just reading back data in the order we
209 * wrote it out is the fastest.
211 * One exception is if we don't have a random map AND we are doing
212 * verifies, in that case we need to check for duplicate blocks and
213 * drop the old one, which we rely on the rb insert/lookup for
216 if (((!td->o.verifysort) || !td_random(td) || !td->o.overwrite) &&
217 (file_randommap(td, ipo->file) || td->o.verify == VERIFY_NONE)) {
218 INIT_FLIST_HEAD(&ipo->list);
219 flist_add_tail(&ipo->list, &td->io_hist_list);
220 ipo->flags |= IP_F_ONLIST;
225 RB_CLEAR_NODE(&ipo->rb_node);
228 * Sort the entry into the verification list
231 p = &td->io_hist_tree.rb_node;
236 __ipo = rb_entry(parent, struct io_piece, rb_node);
237 if (ipo->file < __ipo->file)
239 else if (ipo->file > __ipo->file)
241 else if (ipo->offset < __ipo->offset)
243 else if (ipo->offset > __ipo->offset)
246 dprint(FD_IO, "iolog: overlap %llu/%lu, %llu/%lu",
247 __ipo->offset, __ipo->len,
248 ipo->offset, ipo->len);
250 rb_erase(parent, &td->io_hist_tree);
251 remove_trim_entry(td, __ipo);
257 rb_link_node(&ipo->rb_node, parent, p);
258 rb_insert_color(&ipo->rb_node, &td->io_hist_tree);
259 ipo->flags |= IP_F_ONRB;
263 void write_iolog_close(struct thread_data *td)
269 td->iolog_buf = NULL;
273 * Read version 2 iolog data. It is enhanced to include per-file logging,
276 static int read_iolog2(struct thread_data *td, FILE *f)
278 unsigned long long offset;
280 int reads, writes, waits, fileno = 0, file_action = 0; /* stupid gcc */
285 free_release_files(td);
288 * Read in the read iolog and store it, reuse the infrastructure
289 * for doing verifications.
292 fname = malloc(256+16);
293 act = malloc(256+16);
295 reads = writes = waits = 0;
296 while ((p = fgets(str, 4096, f)) != NULL) {
297 struct io_piece *ipo;
300 r = sscanf(p, "%256s %256s %llu %u", fname, act, &offset,
306 if (!strcmp(act, "wait"))
308 else if (!strcmp(act, "read"))
310 else if (!strcmp(act, "write"))
312 else if (!strcmp(act, "sync"))
314 else if (!strcmp(act, "datasync"))
316 else if (!strcmp(act, "trim"))
319 log_err("fio: bad iolog file action: %s\n",
323 fileno = get_fileno(td, fname);
326 if (!strcmp(act, "add")) {
327 fileno = add_file(td, fname, 0, 1);
328 file_action = FIO_LOG_ADD_FILE;
330 } else if (!strcmp(act, "open")) {
331 fileno = get_fileno(td, fname);
332 file_action = FIO_LOG_OPEN_FILE;
333 } else if (!strcmp(act, "close")) {
334 fileno = get_fileno(td, fname);
335 file_action = FIO_LOG_CLOSE_FILE;
337 log_err("fio: bad iolog file action: %s\n",
342 log_err("bad iolog2: %s", p);
348 else if (rw == DDIR_WRITE) {
350 * Don't add a write for ro mode
355 } else if (rw == DDIR_WAIT) {
357 } else if (rw == DDIR_INVAL) {
358 } else if (!ddir_sync(rw)) {
359 log_err("bad ddir: %d\n", rw);
366 ipo = malloc(sizeof(*ipo));
369 if (rw == DDIR_WAIT) {
372 ipo->offset = offset;
374 if (rw != DDIR_INVAL && bytes > td->o.max_bs[rw])
375 td->o.max_bs[rw] = bytes;
376 ipo->fileno = fileno;
377 ipo->file_action = file_action;
380 queue_io_piece(td, ipo);
387 if (writes && read_only) {
388 log_err("fio: <%s> skips replay of %d writes due to"
389 " read-only\n", td->o.name, writes);
393 if (!reads && !writes && !waits)
395 else if (reads && !writes)
396 td->o.td_ddir = TD_DDIR_READ;
397 else if (!reads && writes)
398 td->o.td_ddir = TD_DDIR_WRITE;
400 td->o.td_ddir = TD_DDIR_RW;
406 * open iolog, check version, and call appropriate parser
408 static int init_iolog_read(struct thread_data *td)
410 char buffer[256], *p;
414 f = fopen(td->o.read_iolog_file, "r");
416 perror("fopen read iolog");
420 p = fgets(buffer, sizeof(buffer), f);
422 td_verror(td, errno, "iolog read");
423 log_err("fio: unable to read iolog\n");
429 * version 2 of the iolog stores a specific string as the
430 * first line, check for that
432 if (!strncmp(iolog_ver2, buffer, strlen(iolog_ver2)))
433 ret = read_iolog2(td, f);
435 log_err("fio: iolog version 1 is no longer supported\n");
444 * Set up a log for storing io patterns.
446 static int init_iolog_write(struct thread_data *td)
452 f = fopen(td->o.write_iolog_file, "a");
454 perror("fopen write iolog");
459 * That's it for writing, setup a log buffer and we're done.
462 td->iolog_buf = malloc(8192);
463 setvbuf(f, td->iolog_buf, _IOFBF, 8192);
466 * write our version line
468 if (fprintf(f, "%s\n", iolog_ver2) < 0) {
469 perror("iolog init\n");
474 * add all known files
476 for_each_file(td, ff, i)
477 log_file(td, ff, FIO_LOG_ADD_FILE);
482 int init_iolog(struct thread_data *td)
486 if (td->o.read_iolog_file) {
490 * Check if it's a blktrace file and load that if possible.
491 * Otherwise assume it's a normal log file and load that.
493 if (is_blktrace(td->o.read_iolog_file, &need_swap))
494 ret = load_blktrace(td, td->o.read_iolog_file, need_swap);
496 ret = init_iolog_read(td);
497 } else if (td->o.write_iolog_file)
498 ret = init_iolog_write(td);
501 td_verror(td, EINVAL, "failed initializing iolog");
506 void setup_log(struct io_log **log, unsigned long avg_msec, int log_type)
508 struct io_log *l = malloc(sizeof(*l));
510 memset(l, 0, sizeof(*l));
512 l->max_samples = 1024;
513 l->log_type = log_type;
514 l->log = malloc(l->max_samples * sizeof(struct io_sample));
515 l->avg_msec = avg_msec;
519 #ifdef CONFIG_SETVBUF
520 static void *set_file_buffer(FILE *f)
522 size_t size = 1048576;
526 setvbuf(f, buf, _IOFBF, size);
530 static void clear_file_buffer(void *buf)
535 static void *set_file_buffer(FILE *f)
540 static void clear_file_buffer(void *buf)
545 void __finish_log(struct io_log *log, const char *name)
551 f = fopen(name, "a");
557 buf = set_file_buffer(f);
559 for (i = 0; i < log->nr_samples; i++) {
560 fprintf(f, "%lu, %lu, %u, %u\n",
561 (unsigned long) log->log[i].time,
562 (unsigned long) log->log[i].val,
563 log->log[i].ddir, log->log[i].bs);
567 clear_file_buffer(buf);
572 static int finish_log_named(struct thread_data *td, struct io_log *log,
573 const char *prefix, const char *postfix,
578 snprintf(file_name, sizeof(file_name), "%s_%s.log", prefix, postfix);
581 if (fio_trylock_file(file_name))
584 fio_lock_file(file_name);
586 if (td->client_type == FIO_CLIENT_TYPE_GUI) {
587 fio_send_iolog(td, log, file_name);
591 __finish_log(log, file_name);
593 fio_unlock_file(file_name);
597 static int finish_log(struct thread_data *td, struct io_log *log,
598 const char *name, int trylock)
600 return finish_log_named(td, log, td->o.name, name, trylock);
603 static int write_this_log(struct thread_data *td, struct io_log *log,
604 const char *log_file, const char *name, int try)
612 ret = finish_log_named(td, log, log_file, name, try);
614 ret = finish_log(td, log, name, try);
619 static int write_iops_log(struct thread_data *td, int try)
621 struct thread_options *o = &td->o;
623 return write_this_log(td, td->iops_log, o->iops_log_file, "iops", try);
626 static int write_slat_log(struct thread_data *td, int try)
628 struct thread_options *o = &td->o;
630 return write_this_log(td, td->slat_log, o->lat_log_file, "slat", try);
633 static int write_clat_log(struct thread_data *td, int try)
635 struct thread_options *o = &td->o;
637 return write_this_log(td, td->clat_log, o->lat_log_file, "clat" , try);
640 static int write_lat_log(struct thread_data *td, int try)
642 struct thread_options *o = &td->o;
644 return write_this_log(td, td->lat_log, o->lat_log_file, "lat", try);
647 static int write_bandw_log(struct thread_data *td, int try)
649 struct thread_options *o = &td->o;
651 return write_this_log(td, td->bw_log, o->bw_log_file, "bw", try);
666 int (*fn)(struct thread_data *, int);
669 static struct log_type log_types[] = {
672 .fn = write_bandw_log,
675 .mask = LAT_LOG_MASK,
679 .mask = SLAT_LOG_MASK,
680 .fn = write_slat_log,
683 .mask = CLAT_LOG_MASK,
684 .fn = write_clat_log,
687 .mask = IOPS_LOG_MASK,
688 .fn = write_iops_log,
692 void fio_writeout_logs(struct thread_data *td)
694 unsigned int log_mask = 0;
695 unsigned int log_left = ALL_LOG_NR;
698 old_state = td_bump_runstate(td, TD_FINISHING);
703 int prev_log_left = log_left;
705 for (i = 0; i < ALL_LOG_NR && log_left; i++) {
706 struct log_type *lt = &log_types[i];
709 if (!(log_mask & lt->mask)) {
710 ret = lt->fn(td, log_left != 1);
713 log_mask |= lt->mask;
718 if (prev_log_left == log_left)
722 td_restore_runstate(td, old_state);