2 * Code related to writing an iolog of what a thread is doing, and to
3 * later read that back and replay
15 static const char iolog_ver2[] = "fio version 2 iolog";
17 void queue_io_piece(struct thread_data *td, struct io_piece *ipo)
19 flist_add_tail(&ipo->list, &td->io_log_list);
20 td->total_io_size += ipo->len;
23 void log_io_u(struct thread_data *td, struct io_u *io_u)
25 const char *act[] = { "read", "write", "sync", "datasync",
26 "sync_file_range", "wait", "trim" };
28 assert(io_u->ddir <= 6);
30 if (!td->o.write_iolog_file)
33 fprintf(td->iolog_f, "%s %s %llu %lu\n", io_u->file->file_name,
34 act[io_u->ddir], io_u->offset,
38 void log_file(struct thread_data *td, struct fio_file *f,
39 enum file_log_act what)
41 const char *act[] = { "add", "open", "close" };
45 if (!td->o.write_iolog_file)
50 * this happens on the pre-open/close done before the job starts
55 fprintf(td->iolog_f, "%s %s\n", f->file_name, act[what]);
58 static void iolog_delay(struct thread_data *td, unsigned long delay)
60 unsigned long usec = utime_since_now(&td->last_issue);
61 unsigned long this_delay;
69 * less than 100 usec delay, just regard it as noise
74 while (delay && !td->terminate) {
76 if (this_delay > 500000)
79 usec_sleep(td, this_delay);
84 static int ipo_special(struct thread_data *td, struct io_piece *ipo)
92 if (ipo->ddir != DDIR_INVAL)
95 f = td->files[ipo->fileno];
97 switch (ipo->file_action) {
98 case FIO_LOG_OPEN_FILE:
99 ret = td_io_open_file(td, f);
102 td_verror(td, ret, "iolog open file");
104 case FIO_LOG_CLOSE_FILE:
105 td_io_close_file(td, f);
107 case FIO_LOG_UNLINK_FILE:
108 unlink(f->file_name);
111 log_err("fio: bad file action %d\n", ipo->file_action);
118 int read_iolog_get(struct thread_data *td, struct io_u *io_u)
120 struct io_piece *ipo;
121 unsigned long elapsed;
123 while (!flist_empty(&td->io_log_list)) {
126 ipo = flist_entry(td->io_log_list.next, struct io_piece, list);
127 flist_del(&ipo->list);
128 remove_trim_entry(td, ipo);
130 ret = ipo_special(td, ipo);
134 } else if (ret > 0) {
139 io_u->ddir = ipo->ddir;
140 if (ipo->ddir != DDIR_WAIT) {
141 io_u->offset = ipo->offset;
142 io_u->buflen = ipo->len;
143 io_u->file = td->files[ipo->fileno];
144 get_file(io_u->file);
145 dprint(FD_IO, "iolog: get %llu/%lu/%s\n", io_u->offset,
146 io_u->buflen, io_u->file->file_name);
148 iolog_delay(td, ipo->delay);
150 elapsed = mtime_since_genesis();
151 if (ipo->delay > elapsed)
152 usec_sleep(td, (ipo->delay - elapsed) * 1000);
157 if (io_u->ddir != DDIR_WAIT)
165 void prune_io_piece_log(struct thread_data *td)
167 struct io_piece *ipo;
170 while ((n = rb_first(&td->io_hist_tree)) != NULL) {
171 ipo = rb_entry(n, struct io_piece, rb_node);
172 rb_erase(n, &td->io_hist_tree);
173 remove_trim_entry(td, ipo);
178 while (!flist_empty(&td->io_hist_list)) {
179 ipo = flist_entry(td->io_hist_list.next, struct io_piece, list);
180 flist_del(&ipo->list);
181 remove_trim_entry(td, ipo);
188 * log a successful write, so we can unwind the log for verify
190 void log_io_piece(struct thread_data *td, struct io_u *io_u)
192 struct rb_node **p, *parent;
193 struct io_piece *ipo, *__ipo;
195 ipo = malloc(sizeof(struct io_piece));
197 ipo->file = io_u->file;
198 ipo->offset = io_u->offset;
199 ipo->len = io_u->buflen;
200 ipo->numberio = io_u->numberio;
201 ipo->flags = IP_F_IN_FLIGHT;
205 if (io_u_should_trim(td, io_u)) {
206 flist_add_tail(&ipo->trim_list, &td->trim_list);
211 * We don't need to sort the entries, if:
213 * Sequential writes, or
214 * Random writes that lay out the file as it goes along
216 * For both these cases, just reading back data in the order we
217 * wrote it out is the fastest.
219 * One exception is if we don't have a random map AND we are doing
220 * verifies, in that case we need to check for duplicate blocks and
221 * drop the old one, which we rely on the rb insert/lookup for
224 if (((!td->o.verifysort) || !td_random(td) || !td->o.overwrite) &&
225 (file_randommap(td, ipo->file) || td->o.verify == VERIFY_NONE)) {
226 INIT_FLIST_HEAD(&ipo->list);
227 flist_add_tail(&ipo->list, &td->io_hist_list);
228 ipo->flags |= IP_F_ONLIST;
233 RB_CLEAR_NODE(&ipo->rb_node);
236 * Sort the entry into the verification list
239 p = &td->io_hist_tree.rb_node;
244 __ipo = rb_entry(parent, struct io_piece, rb_node);
245 if (ipo->file < __ipo->file)
247 else if (ipo->file > __ipo->file)
249 else if (ipo->offset < __ipo->offset)
251 else if (ipo->offset > __ipo->offset)
254 dprint(FD_IO, "iolog: overlap %llu/%lu, %llu/%lu",
255 __ipo->offset, __ipo->len,
256 ipo->offset, ipo->len);
258 rb_erase(parent, &td->io_hist_tree);
259 remove_trim_entry(td, __ipo);
265 rb_link_node(&ipo->rb_node, parent, p);
266 rb_insert_color(&ipo->rb_node, &td->io_hist_tree);
267 ipo->flags |= IP_F_ONRB;
271 void unlog_io_piece(struct thread_data *td, struct io_u *io_u)
273 struct io_piece *ipo = io_u->ipo;
278 if (ipo->flags & IP_F_ONRB)
279 rb_erase(&ipo->rb_node, &td->io_hist_tree);
280 else if (ipo->flags & IP_F_ONLIST)
281 flist_del(&ipo->list);
288 void trim_io_piece(struct thread_data *td, struct io_u *io_u)
290 struct io_piece *ipo = io_u->ipo;
295 ipo->len = io_u->xfer_buflen - io_u->resid;
298 void write_iolog_close(struct thread_data *td)
304 td->iolog_buf = NULL;
308 * Read version 2 iolog data. It is enhanced to include per-file logging,
311 static int read_iolog2(struct thread_data *td, FILE *f)
313 unsigned long long offset;
315 int reads, writes, waits, fileno = 0, file_action = 0; /* stupid gcc */
320 free_release_files(td);
323 * Read in the read iolog and store it, reuse the infrastructure
324 * for doing verifications.
327 fname = malloc(256+16);
328 act = malloc(256+16);
330 reads = writes = waits = 0;
331 while ((p = fgets(str, 4096, f)) != NULL) {
332 struct io_piece *ipo;
335 r = sscanf(p, "%256s %256s %llu %u", fname, act, &offset,
341 if (!strcmp(act, "wait"))
343 else if (!strcmp(act, "read"))
345 else if (!strcmp(act, "write"))
347 else if (!strcmp(act, "sync"))
349 else if (!strcmp(act, "datasync"))
351 else if (!strcmp(act, "trim"))
354 log_err("fio: bad iolog file action: %s\n",
358 fileno = get_fileno(td, fname);
361 if (!strcmp(act, "add")) {
362 fileno = add_file(td, fname, 0, 1);
363 file_action = FIO_LOG_ADD_FILE;
365 } else if (!strcmp(act, "open")) {
366 fileno = get_fileno(td, fname);
367 file_action = FIO_LOG_OPEN_FILE;
368 } else if (!strcmp(act, "close")) {
369 fileno = get_fileno(td, fname);
370 file_action = FIO_LOG_CLOSE_FILE;
372 log_err("fio: bad iolog file action: %s\n",
377 log_err("bad iolog2: %s", p);
383 else if (rw == DDIR_WRITE) {
385 * Don't add a write for ro mode
390 } else if (rw == DDIR_WAIT) {
392 } else if (rw == DDIR_INVAL) {
393 } else if (!ddir_sync(rw)) {
394 log_err("bad ddir: %d\n", rw);
401 ipo = malloc(sizeof(*ipo));
404 if (rw == DDIR_WAIT) {
407 ipo->offset = offset;
409 if (rw != DDIR_INVAL && bytes > td->o.max_bs[rw])
410 td->o.max_bs[rw] = bytes;
411 ipo->fileno = fileno;
412 ipo->file_action = file_action;
415 queue_io_piece(td, ipo);
422 if (writes && read_only) {
423 log_err("fio: <%s> skips replay of %d writes due to"
424 " read-only\n", td->o.name, writes);
428 if (!reads && !writes && !waits)
430 else if (reads && !writes)
431 td->o.td_ddir = TD_DDIR_READ;
432 else if (!reads && writes)
433 td->o.td_ddir = TD_DDIR_WRITE;
435 td->o.td_ddir = TD_DDIR_RW;
441 * open iolog, check version, and call appropriate parser
443 static int init_iolog_read(struct thread_data *td)
445 char buffer[256], *p;
449 f = fopen(td->o.read_iolog_file, "r");
451 perror("fopen read iolog");
455 p = fgets(buffer, sizeof(buffer), f);
457 td_verror(td, errno, "iolog read");
458 log_err("fio: unable to read iolog\n");
464 * version 2 of the iolog stores a specific string as the
465 * first line, check for that
467 if (!strncmp(iolog_ver2, buffer, strlen(iolog_ver2)))
468 ret = read_iolog2(td, f);
470 log_err("fio: iolog version 1 is no longer supported\n");
479 * Set up a log for storing io patterns.
481 static int init_iolog_write(struct thread_data *td)
487 f = fopen(td->o.write_iolog_file, "a");
489 perror("fopen write iolog");
494 * That's it for writing, setup a log buffer and we're done.
497 td->iolog_buf = malloc(8192);
498 setvbuf(f, td->iolog_buf, _IOFBF, 8192);
501 * write our version line
503 if (fprintf(f, "%s\n", iolog_ver2) < 0) {
504 perror("iolog init\n");
509 * add all known files
511 for_each_file(td, ff, i)
512 log_file(td, ff, FIO_LOG_ADD_FILE);
517 int init_iolog(struct thread_data *td)
521 if (td->o.read_iolog_file) {
525 * Check if it's a blktrace file and load that if possible.
526 * Otherwise assume it's a normal log file and load that.
528 if (is_blktrace(td->o.read_iolog_file, &need_swap))
529 ret = load_blktrace(td, td->o.read_iolog_file, need_swap);
531 ret = init_iolog_read(td);
532 } else if (td->o.write_iolog_file)
533 ret = init_iolog_write(td);
536 td_verror(td, EINVAL, "failed initializing iolog");
541 void setup_log(struct io_log **log, unsigned long avg_msec, int log_type)
543 struct io_log *l = malloc(sizeof(*l));
545 memset(l, 0, sizeof(*l));
547 l->max_samples = 1024;
548 l->log_type = log_type;
549 l->log = malloc(l->max_samples * sizeof(struct io_sample));
550 l->avg_msec = avg_msec;
554 #ifdef CONFIG_SETVBUF
555 static void *set_file_buffer(FILE *f)
557 size_t size = 1048576;
561 setvbuf(f, buf, _IOFBF, size);
565 static void clear_file_buffer(void *buf)
570 static void *set_file_buffer(FILE *f)
575 static void clear_file_buffer(void *buf)
580 void __finish_log(struct io_log *log, const char *name)
586 f = fopen(name, "a");
592 buf = set_file_buffer(f);
594 for (i = 0; i < log->nr_samples; i++) {
595 fprintf(f, "%lu, %lu, %u, %u\n",
596 (unsigned long) log->log[i].time,
597 (unsigned long) log->log[i].val,
598 log->log[i].ddir, log->log[i].bs);
602 clear_file_buffer(buf);
607 static int finish_log_named(struct thread_data *td, struct io_log *log,
608 const char *prefix, const char *postfix,
613 snprintf(file_name, sizeof(file_name), "%s_%s.log", prefix, postfix);
616 if (fio_trylock_file(file_name))
619 fio_lock_file(file_name);
621 if (td->client_type == FIO_CLIENT_TYPE_GUI) {
622 fio_send_iolog(td, log, file_name);
626 __finish_log(log, file_name);
628 fio_unlock_file(file_name);
632 static int finish_log(struct thread_data *td, struct io_log *log,
633 const char *name, int trylock)
635 return finish_log_named(td, log, td->o.name, name, trylock);
638 static int write_this_log(struct thread_data *td, struct io_log *log,
639 const char *log_file, const char *name, int try)
647 ret = finish_log_named(td, log, log_file, name, try);
649 ret = finish_log(td, log, name, try);
654 static int write_iops_log(struct thread_data *td, int try)
656 struct thread_options *o = &td->o;
658 return write_this_log(td, td->iops_log, o->iops_log_file, "iops", try);
661 static int write_slat_log(struct thread_data *td, int try)
663 struct thread_options *o = &td->o;
665 return write_this_log(td, td->slat_log, o->lat_log_file, "slat", try);
668 static int write_clat_log(struct thread_data *td, int try)
670 struct thread_options *o = &td->o;
672 return write_this_log(td, td->clat_log, o->lat_log_file, "clat" , try);
675 static int write_lat_log(struct thread_data *td, int try)
677 struct thread_options *o = &td->o;
679 return write_this_log(td, td->lat_log, o->lat_log_file, "lat", try);
682 static int write_bandw_log(struct thread_data *td, int try)
684 struct thread_options *o = &td->o;
686 return write_this_log(td, td->bw_log, o->bw_log_file, "bw", try);
701 int (*fn)(struct thread_data *, int);
704 static struct log_type log_types[] = {
707 .fn = write_bandw_log,
710 .mask = LAT_LOG_MASK,
714 .mask = SLAT_LOG_MASK,
715 .fn = write_slat_log,
718 .mask = CLAT_LOG_MASK,
719 .fn = write_clat_log,
722 .mask = IOPS_LOG_MASK,
723 .fn = write_iops_log,
727 void fio_writeout_logs(struct thread_data *td)
729 unsigned int log_mask = 0;
730 unsigned int log_left = ALL_LOG_NR;
733 old_state = td_bump_runstate(td, TD_FINISHING);
738 int prev_log_left = log_left;
740 for (i = 0; i < ALL_LOG_NR && log_left; i++) {
741 struct log_type *lt = &log_types[i];
744 if (!(log_mask & lt->mask)) {
745 ret = lt->fn(td, log_left != 1);
748 log_mask |= lt->mask;
753 if (prev_log_left == log_left)
757 td_restore_runstate(td, old_state);