2 * Code related to writing an iolog of what a thread is doing, and to
3 * later read that back and replay
15 static const char iolog_ver2[] = "fio version 2 iolog";
17 void queue_io_piece(struct thread_data *td, struct io_piece *ipo)
19 flist_add_tail(&ipo->list, &td->io_log_list);
20 td->total_io_size += ipo->len;
23 void log_io_u(struct thread_data *td, struct io_u *io_u)
25 const char *act[] = { "read", "write", "sync", "datasync",
26 "sync_file_range", "wait", "trim" };
28 assert(io_u->ddir <= 6);
30 if (!td->o.write_iolog_file)
33 fprintf(td->iolog_f, "%s %s %llu %lu\n", io_u->file->file_name,
34 act[io_u->ddir], io_u->offset,
38 void log_file(struct thread_data *td, struct fio_file *f,
39 enum file_log_act what)
41 const char *act[] = { "add", "open", "close" };
45 if (!td->o.write_iolog_file)
50 * this happens on the pre-open/close done before the job starts
55 fprintf(td->iolog_f, "%s %s\n", f->file_name, act[what]);
58 static void iolog_delay(struct thread_data *td, unsigned long delay)
60 unsigned long usec = utime_since_now(&td->last_issue);
61 unsigned long this_delay;
69 * less than 100 usec delay, just regard it as noise
74 while (delay && !td->terminate) {
76 if (this_delay > 500000)
79 usec_sleep(td, this_delay);
84 static int ipo_special(struct thread_data *td, struct io_piece *ipo)
92 if (ipo->ddir != DDIR_INVAL)
95 f = td->files[ipo->fileno];
97 switch (ipo->file_action) {
98 case FIO_LOG_OPEN_FILE:
99 ret = td_io_open_file(td, f);
102 td_verror(td, ret, "iolog open file");
104 case FIO_LOG_CLOSE_FILE:
105 td_io_close_file(td, f);
107 case FIO_LOG_UNLINK_FILE:
108 unlink(f->file_name);
111 log_err("fio: bad file action %d\n", ipo->file_action);
118 int read_iolog_get(struct thread_data *td, struct io_u *io_u)
120 struct io_piece *ipo;
121 unsigned long elapsed;
123 while (!flist_empty(&td->io_log_list)) {
126 ipo = flist_entry(td->io_log_list.next, struct io_piece, list);
127 flist_del(&ipo->list);
128 remove_trim_entry(td, ipo);
130 ret = ipo_special(td, ipo);
134 } else if (ret > 0) {
139 io_u->ddir = ipo->ddir;
140 if (ipo->ddir != DDIR_WAIT) {
141 io_u->offset = ipo->offset;
142 io_u->buflen = ipo->len;
143 io_u->file = td->files[ipo->fileno];
144 get_file(io_u->file);
145 dprint(FD_IO, "iolog: get %llu/%lu/%s\n", io_u->offset,
146 io_u->buflen, io_u->file->file_name);
148 iolog_delay(td, ipo->delay);
150 elapsed = mtime_since_genesis();
151 if (ipo->delay > elapsed)
152 usec_sleep(td, (ipo->delay - elapsed) * 1000);
157 if (io_u->ddir != DDIR_WAIT)
165 void prune_io_piece_log(struct thread_data *td)
167 struct io_piece *ipo;
170 while ((n = rb_first(&td->io_hist_tree)) != NULL) {
171 ipo = rb_entry(n, struct io_piece, rb_node);
172 rb_erase(n, &td->io_hist_tree);
173 remove_trim_entry(td, ipo);
178 while (!flist_empty(&td->io_hist_list)) {
179 ipo = flist_entry(td->io_hist_list.next, struct io_piece, list);
180 flist_del(&ipo->list);
181 remove_trim_entry(td, ipo);
188 * log a successful write, so we can unwind the log for verify
190 void log_io_piece(struct thread_data *td, struct io_u *io_u)
192 struct rb_node **p, *parent;
193 struct io_piece *ipo, *__ipo;
195 ipo = malloc(sizeof(struct io_piece));
197 ipo->file = io_u->file;
198 ipo->offset = io_u->offset;
199 ipo->len = io_u->buflen;
200 ipo->numberio = io_u->numberio;
201 ipo->flags = IP_F_IN_FLIGHT;
205 if (io_u_should_trim(td, io_u)) {
206 flist_add_tail(&ipo->trim_list, &td->trim_list);
211 * We don't need to sort the entries, if:
213 * Sequential writes, or
214 * Random writes that lay out the file as it goes along
216 * For both these cases, just reading back data in the order we
217 * wrote it out is the fastest.
219 * One exception is if we don't have a random map AND we are doing
220 * verifies, in that case we need to check for duplicate blocks and
221 * drop the old one, which we rely on the rb insert/lookup for
224 if (((!td->o.verifysort) || !td_random(td) || !td->o.overwrite) &&
225 (file_randommap(td, ipo->file) || td->o.verify == VERIFY_NONE)) {
226 INIT_FLIST_HEAD(&ipo->list);
227 flist_add_tail(&ipo->list, &td->io_hist_list);
228 ipo->flags |= IP_F_ONLIST;
233 RB_CLEAR_NODE(&ipo->rb_node);
236 * Sort the entry into the verification list
239 p = &td->io_hist_tree.rb_node;
244 __ipo = rb_entry(parent, struct io_piece, rb_node);
245 if (ipo->file < __ipo->file)
247 else if (ipo->file > __ipo->file)
249 else if (ipo->offset < __ipo->offset)
251 else if (ipo->offset > __ipo->offset)
254 dprint(FD_IO, "iolog: overlap %llu/%lu, %llu/%lu",
255 __ipo->offset, __ipo->len,
256 ipo->offset, ipo->len);
258 rb_erase(parent, &td->io_hist_tree);
259 remove_trim_entry(td, __ipo);
265 rb_link_node(&ipo->rb_node, parent, p);
266 rb_insert_color(&ipo->rb_node, &td->io_hist_tree);
267 ipo->flags |= IP_F_ONRB;
271 void unlog_io_piece(struct thread_data *td, struct io_u *io_u)
273 struct io_piece *ipo = io_u->ipo;
278 if (ipo->flags & IP_F_ONRB)
279 rb_erase(&ipo->rb_node, &td->io_hist_tree);
280 else if (ipo->flags & IP_F_ONLIST)
281 flist_del(&ipo->list);
288 void trim_io_piece(struct thread_data *td, struct io_u *io_u)
290 struct io_piece *ipo = io_u->ipo;
295 ipo->len = io_u->xfer_buflen - io_u->resid;
298 void write_iolog_close(struct thread_data *td)
304 td->iolog_buf = NULL;
308 * Read version 2 iolog data. It is enhanced to include per-file logging,
311 static int read_iolog2(struct thread_data *td, FILE *f)
313 unsigned long long offset;
315 int reads, writes, waits, fileno = 0, file_action = 0; /* stupid gcc */
320 free_release_files(td);
323 * Read in the read iolog and store it, reuse the infrastructure
324 * for doing verifications.
327 fname = malloc(256+16);
328 act = malloc(256+16);
330 reads = writes = waits = 0;
331 while ((p = fgets(str, 4096, f)) != NULL) {
332 struct io_piece *ipo;
335 r = sscanf(p, "%256s %256s %llu %u", fname, act, &offset,
341 if (!strcmp(act, "wait"))
343 else if (!strcmp(act, "read"))
345 else if (!strcmp(act, "write"))
347 else if (!strcmp(act, "sync"))
349 else if (!strcmp(act, "datasync"))
351 else if (!strcmp(act, "trim"))
354 log_err("fio: bad iolog file action: %s\n",
358 fileno = get_fileno(td, fname);
361 if (!strcmp(act, "add")) {
362 fileno = add_file(td, fname, 0, 1);
363 file_action = FIO_LOG_ADD_FILE;
365 } else if (!strcmp(act, "open")) {
366 fileno = get_fileno(td, fname);
367 file_action = FIO_LOG_OPEN_FILE;
368 } else if (!strcmp(act, "close")) {
369 fileno = get_fileno(td, fname);
370 file_action = FIO_LOG_CLOSE_FILE;
372 log_err("fio: bad iolog file action: %s\n",
377 log_err("bad iolog2: %s", p);
383 else if (rw == DDIR_WRITE) {
385 * Don't add a write for ro mode
390 } else if (rw == DDIR_WAIT) {
392 } else if (rw == DDIR_INVAL) {
393 } else if (!ddir_sync(rw)) {
394 log_err("bad ddir: %d\n", rw);
401 ipo = malloc(sizeof(*ipo));
404 if (rw == DDIR_WAIT) {
407 ipo->offset = offset;
409 if (rw != DDIR_INVAL && bytes > td->o.max_bs[rw])
410 td->o.max_bs[rw] = bytes;
411 ipo->fileno = fileno;
412 ipo->file_action = file_action;
416 queue_io_piece(td, ipo);
423 if (writes && read_only) {
424 log_err("fio: <%s> skips replay of %d writes due to"
425 " read-only\n", td->o.name, writes);
429 if (!reads && !writes && !waits)
431 else if (reads && !writes)
432 td->o.td_ddir = TD_DDIR_READ;
433 else if (!reads && writes)
434 td->o.td_ddir = TD_DDIR_WRITE;
436 td->o.td_ddir = TD_DDIR_RW;
442 * open iolog, check version, and call appropriate parser
444 static int init_iolog_read(struct thread_data *td)
446 char buffer[256], *p;
450 f = fopen(td->o.read_iolog_file, "r");
452 perror("fopen read iolog");
456 p = fgets(buffer, sizeof(buffer), f);
458 td_verror(td, errno, "iolog read");
459 log_err("fio: unable to read iolog\n");
465 * version 2 of the iolog stores a specific string as the
466 * first line, check for that
468 if (!strncmp(iolog_ver2, buffer, strlen(iolog_ver2)))
469 ret = read_iolog2(td, f);
471 log_err("fio: iolog version 1 is no longer supported\n");
480 * Set up a log for storing io patterns.
482 static int init_iolog_write(struct thread_data *td)
488 f = fopen(td->o.write_iolog_file, "a");
490 perror("fopen write iolog");
495 * That's it for writing, setup a log buffer and we're done.
498 td->iolog_buf = malloc(8192);
499 setvbuf(f, td->iolog_buf, _IOFBF, 8192);
502 * write our version line
504 if (fprintf(f, "%s\n", iolog_ver2) < 0) {
505 perror("iolog init\n");
510 * add all known files
512 for_each_file(td, ff, i)
513 log_file(td, ff, FIO_LOG_ADD_FILE);
518 int init_iolog(struct thread_data *td)
522 if (td->o.read_iolog_file) {
526 * Check if it's a blktrace file and load that if possible.
527 * Otherwise assume it's a normal log file and load that.
529 if (is_blktrace(td->o.read_iolog_file, &need_swap))
530 ret = load_blktrace(td, td->o.read_iolog_file, need_swap);
532 ret = init_iolog_read(td);
533 } else if (td->o.write_iolog_file)
534 ret = init_iolog_write(td);
537 td_verror(td, EINVAL, "failed initializing iolog");
542 void setup_log(struct io_log **log, unsigned long avg_msec, int log_type)
544 struct io_log *l = malloc(sizeof(*l));
546 memset(l, 0, sizeof(*l));
548 l->max_samples = 1024;
549 l->log_type = log_type;
550 l->log = malloc(l->max_samples * sizeof(struct io_sample));
551 l->avg_msec = avg_msec;
555 #ifdef CONFIG_SETVBUF
556 static void *set_file_buffer(FILE *f)
558 size_t size = 1048576;
562 setvbuf(f, buf, _IOFBF, size);
566 static void clear_file_buffer(void *buf)
571 static void *set_file_buffer(FILE *f)
576 static void clear_file_buffer(void *buf)
581 void __finish_log(struct io_log *log, const char *name)
587 f = fopen(name, "a");
593 buf = set_file_buffer(f);
595 for (i = 0; i < log->nr_samples; i++) {
596 fprintf(f, "%lu, %lu, %u, %u\n",
597 (unsigned long) log->log[i].time,
598 (unsigned long) log->log[i].val,
599 log->log[i].ddir, log->log[i].bs);
603 clear_file_buffer(buf);
608 static int finish_log_named(struct thread_data *td, struct io_log *log,
609 const char *prefix, const char *postfix,
614 snprintf(file_name, sizeof(file_name), "%s_%s.log", prefix, postfix);
617 if (fio_trylock_file(file_name))
620 fio_lock_file(file_name);
622 if (td->client_type == FIO_CLIENT_TYPE_GUI) {
623 fio_send_iolog(td, log, file_name);
627 __finish_log(log, file_name);
629 fio_unlock_file(file_name);
633 static int finish_log(struct thread_data *td, struct io_log *log,
634 const char *name, int trylock)
636 return finish_log_named(td, log, td->o.name, name, trylock);
639 static int write_this_log(struct thread_data *td, struct io_log *log,
640 const char *log_file, const char *name, int try)
648 ret = finish_log_named(td, log, log_file, name, try);
650 ret = finish_log(td, log, name, try);
655 static int write_iops_log(struct thread_data *td, int try)
657 struct thread_options *o = &td->o;
659 return write_this_log(td, td->iops_log, o->iops_log_file, "iops", try);
662 static int write_slat_log(struct thread_data *td, int try)
664 struct thread_options *o = &td->o;
666 return write_this_log(td, td->slat_log, o->lat_log_file, "slat", try);
669 static int write_clat_log(struct thread_data *td, int try)
671 struct thread_options *o = &td->o;
673 return write_this_log(td, td->clat_log, o->lat_log_file, "clat" , try);
676 static int write_lat_log(struct thread_data *td, int try)
678 struct thread_options *o = &td->o;
680 return write_this_log(td, td->lat_log, o->lat_log_file, "lat", try);
683 static int write_bandw_log(struct thread_data *td, int try)
685 struct thread_options *o = &td->o;
687 return write_this_log(td, td->bw_log, o->bw_log_file, "bw", try);
702 int (*fn)(struct thread_data *, int);
705 static struct log_type log_types[] = {
708 .fn = write_bandw_log,
711 .mask = LAT_LOG_MASK,
715 .mask = SLAT_LOG_MASK,
716 .fn = write_slat_log,
719 .mask = CLAT_LOG_MASK,
720 .fn = write_clat_log,
723 .mask = IOPS_LOG_MASK,
724 .fn = write_iops_log,
728 void fio_writeout_logs(struct thread_data *td)
730 unsigned int log_mask = 0;
731 unsigned int log_left = ALL_LOG_NR;
734 old_state = td_bump_runstate(td, TD_FINISHING);
739 int prev_log_left = log_left;
741 for (i = 0; i < ALL_LOG_NR && log_left; i++) {
742 struct log_type *lt = &log_types[i];
745 if (!(log_mask & lt->mask)) {
746 ret = lt->fn(td, log_left != 1);
749 log_mask |= lt->mask;
754 if (prev_log_left == log_left)
758 td_restore_runstate(td, old_state);