2 * Code related to writing an iolog of what a thread is doing, and to
3 * later read that back and replay
14 static const char iolog_ver2[] = "fio version 2 iolog";
16 void queue_io_piece(struct thread_data *td, struct io_piece *ipo)
18 flist_add_tail(&ipo->list, &td->io_log_list);
19 td->total_io_size += ipo->len;
22 void log_io_u(struct thread_data *td, struct io_u *io_u)
24 const char *act[] = { "read", "write", "sync", "datasync",
25 "sync_file_range", "wait", "trim" };
27 assert(io_u->ddir <= 6);
29 if (!td->o.write_iolog_file)
32 fprintf(td->iolog_f, "%s %s %llu %lu\n", io_u->file->file_name,
33 act[io_u->ddir], io_u->offset,
37 void log_file(struct thread_data *td, struct fio_file *f,
38 enum file_log_act what)
40 const char *act[] = { "add", "open", "close" };
44 if (!td->o.write_iolog_file)
49 * this happens on the pre-open/close done before the job starts
54 fprintf(td->iolog_f, "%s %s\n", f->file_name, act[what]);
57 static void iolog_delay(struct thread_data *td, unsigned long delay)
59 unsigned long usec = utime_since_now(&td->last_issue);
67 * less than 100 usec delay, just regard it as noise
72 usec_sleep(td, delay);
75 static int ipo_special(struct thread_data *td, struct io_piece *ipo)
83 if (ipo->ddir != DDIR_INVAL)
86 f = td->files[ipo->fileno];
88 switch (ipo->file_action) {
89 case FIO_LOG_OPEN_FILE:
90 ret = td_io_open_file(td, f);
93 td_verror(td, ret, "iolog open file");
95 case FIO_LOG_CLOSE_FILE:
96 td_io_close_file(td, f);
98 case FIO_LOG_UNLINK_FILE:
102 log_err("fio: bad file action %d\n", ipo->file_action);
109 int read_iolog_get(struct thread_data *td, struct io_u *io_u)
111 struct io_piece *ipo;
112 unsigned long elapsed;
114 while (!flist_empty(&td->io_log_list)) {
117 ipo = flist_entry(td->io_log_list.next, struct io_piece, list);
118 flist_del(&ipo->list);
119 remove_trim_entry(td, ipo);
121 ret = ipo_special(td, ipo);
125 } else if (ret > 0) {
130 io_u->ddir = ipo->ddir;
131 if (ipo->ddir != DDIR_WAIT) {
132 io_u->offset = ipo->offset;
133 io_u->buflen = ipo->len;
134 io_u->file = td->files[ipo->fileno];
135 get_file(io_u->file);
136 dprint(FD_IO, "iolog: get %llu/%lu/%s\n", io_u->offset,
137 io_u->buflen, io_u->file->file_name);
139 iolog_delay(td, ipo->delay);
141 elapsed = mtime_since_genesis();
142 if (ipo->delay > elapsed)
143 usec_sleep(td, (ipo->delay - elapsed) * 1000);
149 if (io_u->ddir != DDIR_WAIT)
157 void prune_io_piece_log(struct thread_data *td)
159 struct io_piece *ipo;
162 while ((n = rb_first(&td->io_hist_tree)) != NULL) {
163 ipo = rb_entry(n, struct io_piece, rb_node);
164 rb_erase(n, &td->io_hist_tree);
165 remove_trim_entry(td, ipo);
170 while (!flist_empty(&td->io_hist_list)) {
171 ipo = flist_entry(td->io_hist_list.next, struct io_piece, list);
172 flist_del(&ipo->list);
173 remove_trim_entry(td, ipo);
180 * log a successful write, so we can unwind the log for verify
182 void log_io_piece(struct thread_data *td, struct io_u *io_u)
184 struct rb_node **p, *parent;
185 struct io_piece *ipo, *__ipo;
187 ipo = malloc(sizeof(struct io_piece));
189 ipo->file = io_u->file;
190 ipo->offset = io_u->offset;
191 ipo->len = io_u->buflen;
193 if (io_u_should_trim(td, io_u)) {
194 flist_add_tail(&ipo->trim_list, &td->trim_list);
199 * We don't need to sort the entries, if:
201 * Sequential writes, or
202 * Random writes that lay out the file as it goes along
204 * For both these cases, just reading back data in the order we
205 * wrote it out is the fastest.
207 * One exception is if we don't have a random map AND we are doing
208 * verifies, in that case we need to check for duplicate blocks and
209 * drop the old one, which we rely on the rb insert/lookup for
212 if ((!td_random(td) || !td->o.overwrite) &&
213 (file_randommap(td, ipo->file) || td->o.verify == VERIFY_NONE)) {
214 INIT_FLIST_HEAD(&ipo->list);
215 flist_add_tail(&ipo->list, &td->io_hist_list);
216 ipo->flags |= IP_F_ONLIST;
221 RB_CLEAR_NODE(&ipo->rb_node);
224 * Sort the entry into the verification list
227 p = &td->io_hist_tree.rb_node;
232 __ipo = rb_entry(parent, struct io_piece, rb_node);
233 if (ipo->file < __ipo->file)
235 else if (ipo->file > __ipo->file)
237 else if (ipo->offset < __ipo->offset)
239 else if (ipo->offset > __ipo->offset)
242 assert(ipo->len == __ipo->len);
244 rb_erase(parent, &td->io_hist_tree);
245 remove_trim_entry(td, __ipo);
251 rb_link_node(&ipo->rb_node, parent, p);
252 rb_insert_color(&ipo->rb_node, &td->io_hist_tree);
253 ipo->flags |= IP_F_ONRB;
257 void write_iolog_close(struct thread_data *td)
263 td->iolog_buf = NULL;
267 * Read version 2 iolog data. It is enhanced to include per-file logging,
270 static int read_iolog2(struct thread_data *td, FILE *f)
272 unsigned long long offset;
274 int reads, writes, waits, fileno = 0, file_action = 0; /* stupid gcc */
279 free_release_files(td);
282 * Read in the read iolog and store it, reuse the infrastructure
283 * for doing verifications.
286 fname = malloc(256+16);
287 act = malloc(256+16);
289 reads = writes = waits = 0;
290 while ((p = fgets(str, 4096, f)) != NULL) {
291 struct io_piece *ipo;
294 r = sscanf(p, "%256s %256s %llu %u", fname, act, &offset,
300 if (!strcmp(act, "wait"))
302 else if (!strcmp(act, "read"))
304 else if (!strcmp(act, "write"))
306 else if (!strcmp(act, "sync"))
308 else if (!strcmp(act, "datasync"))
310 else if (!strcmp(act, "trim"))
313 log_err("fio: bad iolog file action: %s\n",
319 if (!strcmp(act, "add")) {
321 fileno = add_file(td, fname);
322 file_action = FIO_LOG_ADD_FILE;
324 } else if (!strcmp(act, "open")) {
325 fileno = get_fileno(td, fname);
326 file_action = FIO_LOG_OPEN_FILE;
327 } else if (!strcmp(act, "close")) {
328 fileno = get_fileno(td, fname);
329 file_action = FIO_LOG_CLOSE_FILE;
331 log_err("fio: bad iolog file action: %s\n",
336 log_err("bad iolog2: %s", p);
342 else if (rw == DDIR_WRITE) {
344 * Don't add a write for ro mode
349 } else if (rw == DDIR_WAIT) {
351 } else if (rw == DDIR_INVAL) {
352 } else if (!ddir_sync(rw)) {
353 log_err("bad ddir: %d\n", rw);
360 ipo = malloc(sizeof(*ipo));
363 if (rw == DDIR_WAIT) {
366 ipo->offset = offset;
368 if (bytes > td->o.max_bs[rw])
369 td->o.max_bs[rw] = bytes;
370 ipo->fileno = fileno;
371 ipo->file_action = file_action;
374 queue_io_piece(td, ipo);
381 if (writes && read_only) {
382 log_err("fio: <%s> skips replay of %d writes due to"
383 " read-only\n", td->o.name, writes);
387 if (!reads && !writes && !waits)
389 else if (reads && !writes)
390 td->o.td_ddir = TD_DDIR_READ;
391 else if (!reads && writes)
392 td->o.td_ddir = TD_DDIR_WRITE;
394 td->o.td_ddir = TD_DDIR_RW;
400 * open iolog, check version, and call appropriate parser
402 static int init_iolog_read(struct thread_data *td)
404 char buffer[256], *p;
408 f = fopen(td->o.read_iolog_file, "r");
410 perror("fopen read iolog");
414 p = fgets(buffer, sizeof(buffer), f);
416 td_verror(td, errno, "iolog read");
417 log_err("fio: unable to read iolog\n");
423 * version 2 of the iolog stores a specific string as the
424 * first line, check for that
426 if (!strncmp(iolog_ver2, buffer, strlen(iolog_ver2)))
427 ret = read_iolog2(td, f);
429 log_err("fio: iolog version 1 is no longer supported\n");
438 * Set up a log for storing io patterns.
440 static int init_iolog_write(struct thread_data *td)
446 f = fopen(td->o.write_iolog_file, "a");
448 perror("fopen write iolog");
453 * That's it for writing, setup a log buffer and we're done.
456 td->iolog_buf = malloc(8192);
457 setvbuf(f, td->iolog_buf, _IOFBF, 8192);
460 * write our version line
462 if (fprintf(f, "%s\n", iolog_ver2) < 0) {
463 perror("iolog init\n");
468 * add all known files
470 for_each_file(td, ff, i)
471 log_file(td, ff, FIO_LOG_ADD_FILE);
476 int init_iolog(struct thread_data *td)
480 if (td->o.read_iolog_file) {
482 * Check if it's a blktrace file and load that if possible.
483 * Otherwise assume it's a normal log file and load that.
485 if (is_blktrace(td->o.read_iolog_file))
486 ret = load_blktrace(td, td->o.read_iolog_file);
488 ret = init_iolog_read(td);
489 } else if (td->o.write_iolog_file)
490 ret = init_iolog_write(td);
495 void setup_log(struct io_log **log, unsigned long avg_msec)
497 struct io_log *l = malloc(sizeof(*l));
499 memset(l, 0, sizeof(*l));
501 l->max_samples = 1024;
502 l->log = malloc(l->max_samples * sizeof(struct io_sample));
503 l->avg_msec = avg_msec;
507 void __finish_log(struct io_log *log, const char *name)
512 f = fopen(name, "a");
518 for (i = 0; i < log->nr_samples; i++) {
519 fprintf(f, "%lu, %lu, %u, %u\n", log->log[i].time,
530 void finish_log_named(struct thread_data *td, struct io_log *log,
531 const char *prefix, const char *postfix)
533 char file_name[256], *p;
535 snprintf(file_name, 200, "%s_%s.log", prefix, postfix);
536 p = basename(file_name);
537 __finish_log(log, p);
540 void finish_log(struct thread_data *td, struct io_log *log, const char *name)
542 finish_log_named(td, log, td->o.name, name);