2 * Code related to writing an iolog of what a thread is doing, and to
3 * later read that back and replay
15 static const char iolog_ver2[] = "fio version 2 iolog";
17 void queue_io_piece(struct thread_data *td, struct io_piece *ipo)
19 flist_add_tail(&ipo->list, &td->io_log_list);
20 td->total_io_size += ipo->len;
23 void log_io_u(struct thread_data *td, struct io_u *io_u)
25 const char *act[] = { "read", "write", "sync", "datasync",
26 "sync_file_range", "wait", "trim" };
28 assert(io_u->ddir <= 6);
30 if (!td->o.write_iolog_file)
33 fprintf(td->iolog_f, "%s %s %llu %lu\n", io_u->file->file_name,
34 act[io_u->ddir], io_u->offset,
38 void log_file(struct thread_data *td, struct fio_file *f,
39 enum file_log_act what)
41 const char *act[] = { "add", "open", "close" };
45 if (!td->o.write_iolog_file)
50 * this happens on the pre-open/close done before the job starts
55 fprintf(td->iolog_f, "%s %s\n", f->file_name, act[what]);
58 static void iolog_delay(struct thread_data *td, unsigned long delay)
60 unsigned long usec = utime_since_now(&td->last_issue);
61 unsigned long this_delay;
69 * less than 100 usec delay, just regard it as noise
74 while (delay && !td->terminate) {
76 if (this_delay > 500000)
79 usec_sleep(td, this_delay);
84 static int ipo_special(struct thread_data *td, struct io_piece *ipo)
92 if (ipo->ddir != DDIR_INVAL)
95 f = td->files[ipo->fileno];
97 switch (ipo->file_action) {
98 case FIO_LOG_OPEN_FILE:
99 ret = td_io_open_file(td, f);
102 td_verror(td, ret, "iolog open file");
104 case FIO_LOG_CLOSE_FILE:
105 td_io_close_file(td, f);
107 case FIO_LOG_UNLINK_FILE:
108 unlink(f->file_name);
111 log_err("fio: bad file action %d\n", ipo->file_action);
118 int read_iolog_get(struct thread_data *td, struct io_u *io_u)
120 struct io_piece *ipo;
121 unsigned long elapsed;
123 while (!flist_empty(&td->io_log_list)) {
126 ipo = flist_entry(td->io_log_list.next, struct io_piece, list);
127 flist_del(&ipo->list);
128 remove_trim_entry(td, ipo);
130 ret = ipo_special(td, ipo);
134 } else if (ret > 0) {
139 io_u->ddir = ipo->ddir;
140 if (ipo->ddir != DDIR_WAIT) {
141 io_u->offset = ipo->offset;
142 io_u->buflen = ipo->len;
143 io_u->file = td->files[ipo->fileno];
144 get_file(io_u->file);
145 dprint(FD_IO, "iolog: get %llu/%lu/%s\n", io_u->offset,
146 io_u->buflen, io_u->file->file_name);
148 iolog_delay(td, ipo->delay);
150 elapsed = mtime_since_genesis();
151 if (ipo->delay > elapsed)
152 usec_sleep(td, (ipo->delay - elapsed) * 1000);
157 if (io_u->ddir != DDIR_WAIT)
165 void prune_io_piece_log(struct thread_data *td)
167 struct io_piece *ipo;
170 while ((n = rb_first(&td->io_hist_tree)) != NULL) {
171 ipo = rb_entry(n, struct io_piece, rb_node);
172 rb_erase(n, &td->io_hist_tree);
173 remove_trim_entry(td, ipo);
178 while (!flist_empty(&td->io_hist_list)) {
179 ipo = flist_entry(td->io_hist_list.next, struct io_piece, list);
180 flist_del(&ipo->list);
181 remove_trim_entry(td, ipo);
188 * log a successful write, so we can unwind the log for verify
190 void log_io_piece(struct thread_data *td, struct io_u *io_u)
192 struct rb_node **p, *parent;
193 struct io_piece *ipo, *__ipo;
195 ipo = malloc(sizeof(struct io_piece));
197 ipo->file = io_u->file;
198 ipo->offset = io_u->offset;
199 ipo->len = io_u->buflen;
200 ipo->numberio = io_u->numberio;
201 ipo->flags = IP_F_IN_FLIGHT;
205 if (io_u_should_trim(td, io_u)) {
206 flist_add_tail(&ipo->trim_list, &td->trim_list);
211 * We don't need to sort the entries, if:
213 * Sequential writes, or
214 * Random writes that lay out the file as it goes along
216 * For both these cases, just reading back data in the order we
217 * wrote it out is the fastest.
219 * One exception is if we don't have a random map AND we are doing
220 * verifies, in that case we need to check for duplicate blocks and
221 * drop the old one, which we rely on the rb insert/lookup for
224 if (((!td->o.verifysort) || !td_random(td) || !td->o.overwrite) &&
225 (file_randommap(td, ipo->file) || td->o.verify == VERIFY_NONE)) {
226 INIT_FLIST_HEAD(&ipo->list);
227 flist_add_tail(&ipo->list, &td->io_hist_list);
228 ipo->flags |= IP_F_ONLIST;
233 RB_CLEAR_NODE(&ipo->rb_node);
236 * Sort the entry into the verification list
239 p = &td->io_hist_tree.rb_node;
244 __ipo = rb_entry(parent, struct io_piece, rb_node);
245 if (ipo->file < __ipo->file)
247 else if (ipo->file > __ipo->file)
249 else if (ipo->offset < __ipo->offset)
251 else if (ipo->offset > __ipo->offset)
254 dprint(FD_IO, "iolog: overlap %llu/%lu, %llu/%lu",
255 __ipo->offset, __ipo->len,
256 ipo->offset, ipo->len);
258 rb_erase(parent, &td->io_hist_tree);
259 remove_trim_entry(td, __ipo);
265 rb_link_node(&ipo->rb_node, parent, p);
266 rb_insert_color(&ipo->rb_node, &td->io_hist_tree);
267 ipo->flags |= IP_F_ONRB;
271 void write_iolog_close(struct thread_data *td)
277 td->iolog_buf = NULL;
281 * Read version 2 iolog data. It is enhanced to include per-file logging,
284 static int read_iolog2(struct thread_data *td, FILE *f)
286 unsigned long long offset;
288 int reads, writes, waits, fileno = 0, file_action = 0; /* stupid gcc */
293 free_release_files(td);
296 * Read in the read iolog and store it, reuse the infrastructure
297 * for doing verifications.
300 fname = malloc(256+16);
301 act = malloc(256+16);
303 reads = writes = waits = 0;
304 while ((p = fgets(str, 4096, f)) != NULL) {
305 struct io_piece *ipo;
308 r = sscanf(p, "%256s %256s %llu %u", fname, act, &offset,
314 if (!strcmp(act, "wait"))
316 else if (!strcmp(act, "read"))
318 else if (!strcmp(act, "write"))
320 else if (!strcmp(act, "sync"))
322 else if (!strcmp(act, "datasync"))
324 else if (!strcmp(act, "trim"))
327 log_err("fio: bad iolog file action: %s\n",
331 fileno = get_fileno(td, fname);
334 if (!strcmp(act, "add")) {
335 fileno = add_file(td, fname, 0, 1);
336 file_action = FIO_LOG_ADD_FILE;
338 } else if (!strcmp(act, "open")) {
339 fileno = get_fileno(td, fname);
340 file_action = FIO_LOG_OPEN_FILE;
341 } else if (!strcmp(act, "close")) {
342 fileno = get_fileno(td, fname);
343 file_action = FIO_LOG_CLOSE_FILE;
345 log_err("fio: bad iolog file action: %s\n",
350 log_err("bad iolog2: %s", p);
356 else if (rw == DDIR_WRITE) {
358 * Don't add a write for ro mode
363 } else if (rw == DDIR_WAIT) {
365 } else if (rw == DDIR_INVAL) {
366 } else if (!ddir_sync(rw)) {
367 log_err("bad ddir: %d\n", rw);
374 ipo = malloc(sizeof(*ipo));
377 if (rw == DDIR_WAIT) {
380 ipo->offset = offset;
382 if (rw != DDIR_INVAL && bytes > td->o.max_bs[rw])
383 td->o.max_bs[rw] = bytes;
384 ipo->fileno = fileno;
385 ipo->file_action = file_action;
388 queue_io_piece(td, ipo);
395 if (writes && read_only) {
396 log_err("fio: <%s> skips replay of %d writes due to"
397 " read-only\n", td->o.name, writes);
401 if (!reads && !writes && !waits)
403 else if (reads && !writes)
404 td->o.td_ddir = TD_DDIR_READ;
405 else if (!reads && writes)
406 td->o.td_ddir = TD_DDIR_WRITE;
408 td->o.td_ddir = TD_DDIR_RW;
414 * open iolog, check version, and call appropriate parser
416 static int init_iolog_read(struct thread_data *td)
418 char buffer[256], *p;
422 f = fopen(td->o.read_iolog_file, "r");
424 perror("fopen read iolog");
428 p = fgets(buffer, sizeof(buffer), f);
430 td_verror(td, errno, "iolog read");
431 log_err("fio: unable to read iolog\n");
437 * version 2 of the iolog stores a specific string as the
438 * first line, check for that
440 if (!strncmp(iolog_ver2, buffer, strlen(iolog_ver2)))
441 ret = read_iolog2(td, f);
443 log_err("fio: iolog version 1 is no longer supported\n");
452 * Set up a log for storing io patterns.
454 static int init_iolog_write(struct thread_data *td)
460 f = fopen(td->o.write_iolog_file, "a");
462 perror("fopen write iolog");
467 * That's it for writing, setup a log buffer and we're done.
470 td->iolog_buf = malloc(8192);
471 setvbuf(f, td->iolog_buf, _IOFBF, 8192);
474 * write our version line
476 if (fprintf(f, "%s\n", iolog_ver2) < 0) {
477 perror("iolog init\n");
482 * add all known files
484 for_each_file(td, ff, i)
485 log_file(td, ff, FIO_LOG_ADD_FILE);
490 int init_iolog(struct thread_data *td)
494 if (td->o.read_iolog_file) {
498 * Check if it's a blktrace file and load that if possible.
499 * Otherwise assume it's a normal log file and load that.
501 if (is_blktrace(td->o.read_iolog_file, &need_swap))
502 ret = load_blktrace(td, td->o.read_iolog_file, need_swap);
504 ret = init_iolog_read(td);
505 } else if (td->o.write_iolog_file)
506 ret = init_iolog_write(td);
509 td_verror(td, EINVAL, "failed initializing iolog");
514 void setup_log(struct io_log **log, unsigned long avg_msec, int log_type)
516 struct io_log *l = malloc(sizeof(*l));
518 memset(l, 0, sizeof(*l));
520 l->max_samples = 1024;
521 l->log_type = log_type;
522 l->log = malloc(l->max_samples * sizeof(struct io_sample));
523 l->avg_msec = avg_msec;
527 #ifdef CONFIG_SETVBUF
528 static void *set_file_buffer(FILE *f)
530 size_t size = 1048576;
534 setvbuf(f, buf, _IOFBF, size);
538 static void clear_file_buffer(void *buf)
543 static void *set_file_buffer(FILE *f)
548 static void clear_file_buffer(void *buf)
553 void __finish_log(struct io_log *log, const char *name)
559 f = fopen(name, "a");
565 buf = set_file_buffer(f);
567 for (i = 0; i < log->nr_samples; i++) {
568 fprintf(f, "%lu, %lu, %u, %u\n",
569 (unsigned long) log->log[i].time,
570 (unsigned long) log->log[i].val,
571 log->log[i].ddir, log->log[i].bs);
575 clear_file_buffer(buf);
580 static int finish_log_named(struct thread_data *td, struct io_log *log,
581 const char *prefix, const char *postfix,
586 snprintf(file_name, sizeof(file_name), "%s_%s.log", prefix, postfix);
589 if (fio_trylock_file(file_name))
592 fio_lock_file(file_name);
594 if (td->client_type == FIO_CLIENT_TYPE_GUI) {
595 fio_send_iolog(td, log, file_name);
599 __finish_log(log, file_name);
601 fio_unlock_file(file_name);
605 static int finish_log(struct thread_data *td, struct io_log *log,
606 const char *name, int trylock)
608 return finish_log_named(td, log, td->o.name, name, trylock);
611 static int write_this_log(struct thread_data *td, struct io_log *log,
612 const char *log_file, const char *name, int try)
620 ret = finish_log_named(td, log, log_file, name, try);
622 ret = finish_log(td, log, name, try);
627 static int write_iops_log(struct thread_data *td, int try)
629 struct thread_options *o = &td->o;
631 return write_this_log(td, td->iops_log, o->iops_log_file, "iops", try);
634 static int write_slat_log(struct thread_data *td, int try)
636 struct thread_options *o = &td->o;
638 return write_this_log(td, td->slat_log, o->lat_log_file, "slat", try);
641 static int write_clat_log(struct thread_data *td, int try)
643 struct thread_options *o = &td->o;
645 return write_this_log(td, td->clat_log, o->lat_log_file, "clat" , try);
648 static int write_lat_log(struct thread_data *td, int try)
650 struct thread_options *o = &td->o;
652 return write_this_log(td, td->lat_log, o->lat_log_file, "lat", try);
655 static int write_bandw_log(struct thread_data *td, int try)
657 struct thread_options *o = &td->o;
659 return write_this_log(td, td->bw_log, o->bw_log_file, "bw", try);
674 int (*fn)(struct thread_data *, int);
677 static struct log_type log_types[] = {
680 .fn = write_bandw_log,
683 .mask = LAT_LOG_MASK,
687 .mask = SLAT_LOG_MASK,
688 .fn = write_slat_log,
691 .mask = CLAT_LOG_MASK,
692 .fn = write_clat_log,
695 .mask = IOPS_LOG_MASK,
696 .fn = write_iops_log,
700 void fio_writeout_logs(struct thread_data *td)
702 unsigned int log_mask = 0;
703 unsigned int log_left = ALL_LOG_NR;
706 old_state = td_bump_runstate(td, TD_FINISHING);
711 int prev_log_left = log_left;
713 for (i = 0; i < ALL_LOG_NR && log_left; i++) {
714 struct log_type *lt = &log_types[i];
717 if (!(log_mask & lt->mask)) {
718 ret = lt->fn(td, log_left != 1);
721 log_mask |= lt->mask;
726 if (prev_log_left == log_left)
730 td_restore_runstate(td, old_state);