2 * Code related to writing an iolog of what a thread is doing, and to
3 * later read that back and replay
14 static const char iolog_ver2[] = "fio version 2 iolog";
16 void queue_io_piece(struct thread_data *td, struct io_piece *ipo)
18 flist_add_tail(&ipo->list, &td->io_log_list);
19 td->total_io_size += ipo->len;
22 void log_io_u(struct thread_data *td, struct io_u *io_u)
24 const char *act[] = { "read", "write", "sync", "datasync",
25 "sync_file_range", "wait", "trim" };
27 assert(io_u->ddir <= 6);
29 if (!td->o.write_iolog_file)
32 fprintf(td->iolog_f, "%s %s %llu %lu\n", io_u->file->file_name,
33 act[io_u->ddir], io_u->offset,
37 void log_file(struct thread_data *td, struct fio_file *f,
38 enum file_log_act what)
40 const char *act[] = { "add", "open", "close" };
44 if (!td->o.write_iolog_file)
49 * this happens on the pre-open/close done before the job starts
54 fprintf(td->iolog_f, "%s %s\n", f->file_name, act[what]);
57 static void iolog_delay(struct thread_data *td, unsigned long delay)
59 unsigned long usec = utime_since_now(&td->last_issue);
67 * less than 100 usec delay, just regard it as noise
72 usec_sleep(td, delay);
75 static int ipo_special(struct thread_data *td, struct io_piece *ipo)
83 if (ipo->ddir != DDIR_INVAL)
86 f = td->files[ipo->fileno];
88 switch (ipo->file_action) {
89 case FIO_LOG_OPEN_FILE:
90 ret = td_io_open_file(td, f);
93 td_verror(td, ret, "iolog open file");
95 case FIO_LOG_CLOSE_FILE:
96 td_io_close_file(td, f);
98 case FIO_LOG_UNLINK_FILE:
102 log_err("fio: bad file action %d\n", ipo->file_action);
109 int read_iolog_get(struct thread_data *td, struct io_u *io_u)
111 struct io_piece *ipo;
112 unsigned long elapsed;
114 while (!flist_empty(&td->io_log_list)) {
117 ipo = flist_entry(td->io_log_list.next, struct io_piece, list);
118 flist_del(&ipo->list);
119 remove_trim_entry(td, ipo);
121 ret = ipo_special(td, ipo);
125 } else if (ret > 0) {
130 io_u->ddir = ipo->ddir;
131 if (ipo->ddir != DDIR_WAIT) {
132 io_u->offset = ipo->offset;
133 io_u->buflen = ipo->len;
134 io_u->file = td->files[ipo->fileno];
135 get_file(io_u->file);
136 dprint(FD_IO, "iolog: get %llu/%lu/%s\n", io_u->offset,
137 io_u->buflen, io_u->file->file_name);
139 iolog_delay(td, ipo->delay);
141 elapsed = mtime_since_genesis();
142 if (ipo->delay > elapsed)
143 usec_sleep(td, (ipo->delay - elapsed) * 1000);
149 if (io_u->ddir != DDIR_WAIT)
157 void prune_io_piece_log(struct thread_data *td)
159 struct io_piece *ipo;
162 while ((n = rb_first(&td->io_hist_tree)) != NULL) {
163 ipo = rb_entry(n, struct io_piece, rb_node);
164 rb_erase(n, &td->io_hist_tree);
165 remove_trim_entry(td, ipo);
170 while (!flist_empty(&td->io_hist_list)) {
171 ipo = flist_entry(td->io_hist_list.next, struct io_piece, list);
172 flist_del(&ipo->list);
173 remove_trim_entry(td, ipo);
180 * log a successful write, so we can unwind the log for verify
182 void log_io_piece(struct thread_data *td, struct io_u *io_u)
184 struct rb_node **p, *parent;
185 struct io_piece *ipo, *__ipo;
187 ipo = malloc(sizeof(struct io_piece));
189 ipo->file = io_u->file;
190 ipo->offset = io_u->offset;
191 ipo->len = io_u->buflen;
193 if (io_u_should_trim(td, io_u)) {
194 flist_add_tail(&ipo->trim_list, &td->trim_list);
199 * We don't need to sort the entries, if:
201 * Sequential writes, or
202 * Random writes that lay out the file as it goes along
204 * For both these cases, just reading back data in the order we
205 * wrote it out is the fastest.
207 * One exception is if we don't have a random map AND we are doing
208 * verifies, in that case we need to check for duplicate blocks and
209 * drop the old one, which we rely on the rb insert/lookup for
212 if ((!td_random(td) || !td->o.overwrite) &&
213 (file_randommap(td, ipo->file) || td->o.verify == VERIFY_NONE)) {
214 INIT_FLIST_HEAD(&ipo->list);
215 flist_add_tail(&ipo->list, &td->io_hist_list);
216 ipo->flags |= IP_F_ONLIST;
221 RB_CLEAR_NODE(&ipo->rb_node);
224 * Sort the entry into the verification list
227 p = &td->io_hist_tree.rb_node;
232 __ipo = rb_entry(parent, struct io_piece, rb_node);
233 if (ipo->file < __ipo->file)
235 else if (ipo->file > __ipo->file)
237 else if (ipo->offset < __ipo->offset)
239 else if (ipo->offset > __ipo->offset)
242 dprint(FD_IO, "iolog: overlap %llu/%lu, %llu/%lu",
243 __ipo->offset, __ipo->len,
244 ipo->offset, ipo->len);
246 rb_erase(parent, &td->io_hist_tree);
247 remove_trim_entry(td, __ipo);
253 rb_link_node(&ipo->rb_node, parent, p);
254 rb_insert_color(&ipo->rb_node, &td->io_hist_tree);
255 ipo->flags |= IP_F_ONRB;
259 void write_iolog_close(struct thread_data *td)
265 td->iolog_buf = NULL;
269 * Read version 2 iolog data. It is enhanced to include per-file logging,
272 static int read_iolog2(struct thread_data *td, FILE *f)
274 unsigned long long offset;
276 int reads, writes, waits, fileno = 0, file_action = 0; /* stupid gcc */
281 free_release_files(td);
284 * Read in the read iolog and store it, reuse the infrastructure
285 * for doing verifications.
288 fname = malloc(256+16);
289 act = malloc(256+16);
291 reads = writes = waits = 0;
292 while ((p = fgets(str, 4096, f)) != NULL) {
293 struct io_piece *ipo;
296 r = sscanf(p, "%256s %256s %llu %u", fname, act, &offset,
302 if (!strcmp(act, "wait"))
304 else if (!strcmp(act, "read"))
306 else if (!strcmp(act, "write"))
308 else if (!strcmp(act, "sync"))
310 else if (!strcmp(act, "datasync"))
312 else if (!strcmp(act, "trim"))
315 log_err("fio: bad iolog file action: %s\n",
321 if (!strcmp(act, "add")) {
323 fileno = add_file(td, fname);
324 file_action = FIO_LOG_ADD_FILE;
326 } else if (!strcmp(act, "open")) {
327 fileno = get_fileno(td, fname);
328 file_action = FIO_LOG_OPEN_FILE;
329 } else if (!strcmp(act, "close")) {
330 fileno = get_fileno(td, fname);
331 file_action = FIO_LOG_CLOSE_FILE;
333 log_err("fio: bad iolog file action: %s\n",
338 log_err("bad iolog2: %s", p);
344 else if (rw == DDIR_WRITE) {
346 * Don't add a write for ro mode
351 } else if (rw == DDIR_WAIT) {
353 } else if (rw == DDIR_INVAL) {
354 } else if (!ddir_sync(rw)) {
355 log_err("bad ddir: %d\n", rw);
362 ipo = malloc(sizeof(*ipo));
365 if (rw == DDIR_WAIT) {
368 ipo->offset = offset;
370 if (bytes > td->o.max_bs[rw])
371 td->o.max_bs[rw] = bytes;
372 ipo->fileno = fileno;
373 ipo->file_action = file_action;
376 queue_io_piece(td, ipo);
383 if (writes && read_only) {
384 log_err("fio: <%s> skips replay of %d writes due to"
385 " read-only\n", td->o.name, writes);
389 if (!reads && !writes && !waits)
391 else if (reads && !writes)
392 td->o.td_ddir = TD_DDIR_READ;
393 else if (!reads && writes)
394 td->o.td_ddir = TD_DDIR_WRITE;
396 td->o.td_ddir = TD_DDIR_RW;
402 * open iolog, check version, and call appropriate parser
404 static int init_iolog_read(struct thread_data *td)
406 char buffer[256], *p;
410 f = fopen(td->o.read_iolog_file, "r");
412 perror("fopen read iolog");
416 p = fgets(buffer, sizeof(buffer), f);
418 td_verror(td, errno, "iolog read");
419 log_err("fio: unable to read iolog\n");
425 * version 2 of the iolog stores a specific string as the
426 * first line, check for that
428 if (!strncmp(iolog_ver2, buffer, strlen(iolog_ver2)))
429 ret = read_iolog2(td, f);
431 log_err("fio: iolog version 1 is no longer supported\n");
440 * Set up a log for storing io patterns.
442 static int init_iolog_write(struct thread_data *td)
448 f = fopen(td->o.write_iolog_file, "a");
450 perror("fopen write iolog");
455 * That's it for writing, setup a log buffer and we're done.
458 td->iolog_buf = malloc(8192);
459 setvbuf(f, td->iolog_buf, _IOFBF, 8192);
462 * write our version line
464 if (fprintf(f, "%s\n", iolog_ver2) < 0) {
465 perror("iolog init\n");
470 * add all known files
472 for_each_file(td, ff, i)
473 log_file(td, ff, FIO_LOG_ADD_FILE);
478 int init_iolog(struct thread_data *td)
482 if (td->o.read_iolog_file) {
484 * Check if it's a blktrace file and load that if possible.
485 * Otherwise assume it's a normal log file and load that.
487 if (is_blktrace(td->o.read_iolog_file))
488 ret = load_blktrace(td, td->o.read_iolog_file);
490 ret = init_iolog_read(td);
491 } else if (td->o.write_iolog_file)
492 ret = init_iolog_write(td);
497 void setup_log(struct io_log **log, unsigned long avg_msec)
499 struct io_log *l = malloc(sizeof(*l));
501 memset(l, 0, sizeof(*l));
503 l->max_samples = 1024;
504 l->log = malloc(l->max_samples * sizeof(struct io_sample));
505 l->avg_msec = avg_msec;
509 void __finish_log(struct io_log *log, const char *name)
514 f = fopen(name, "a");
520 for (i = 0; i < log->nr_samples; i++) {
521 fprintf(f, "%lu, %lu, %u, %u\n", log->log[i].time,
532 void finish_log_named(struct thread_data *td, struct io_log *log,
533 const char *prefix, const char *postfix)
535 char file_name[256], *p;
537 snprintf(file_name, 200, "%s_%s.log", prefix, postfix);
538 p = basename(file_name);
539 __finish_log(log, p);
542 void finish_log(struct thread_data *td, struct io_log *log, const char *name)
544 finish_log_named(td, log, td->o.name, name);