2 * Code related to writing an iolog of what a thread is doing, and to
3 * later read that back and replay
14 static const char iolog_ver2[] = "fio version 2 iolog";
16 void queue_io_piece(struct thread_data *td, struct io_piece *ipo)
18 flist_add_tail(&ipo->list, &td->io_log_list);
19 td->total_io_size += ipo->len;
22 void log_io_u(struct thread_data *td, struct io_u *io_u)
24 const char *act[] = { "read", "write", "sync", "datasync",
25 "sync_file_range", "wait", "trim" };
27 assert(io_u->ddir <= 6);
29 if (!td->o.write_iolog_file)
32 fprintf(td->iolog_f, "%s %s %llu %lu\n", io_u->file->file_name,
33 act[io_u->ddir], io_u->offset,
37 void log_file(struct thread_data *td, struct fio_file *f,
38 enum file_log_act what)
40 const char *act[] = { "add", "open", "close" };
44 if (!td->o.write_iolog_file)
49 * this happens on the pre-open/close done before the job starts
54 fprintf(td->iolog_f, "%s %s\n", f->file_name, act[what]);
57 static void iolog_delay(struct thread_data *td, unsigned long delay)
59 unsigned long usec = utime_since_now(&td->last_issue);
67 * less than 100 usec delay, just regard it as noise
72 usec_sleep(td, delay);
75 static int ipo_special(struct thread_data *td, struct io_piece *ipo)
83 if (ipo->ddir != DDIR_INVAL)
86 f = td->files[ipo->fileno];
88 switch (ipo->file_action) {
89 case FIO_LOG_OPEN_FILE:
90 ret = td_io_open_file(td, f);
93 td_verror(td, ret, "iolog open file");
95 case FIO_LOG_CLOSE_FILE:
96 td_io_close_file(td, f);
98 case FIO_LOG_UNLINK_FILE:
102 log_err("fio: bad file action %d\n", ipo->file_action);
109 int read_iolog_get(struct thread_data *td, struct io_u *io_u)
111 struct io_piece *ipo;
112 unsigned long elapsed;
114 while (!flist_empty(&td->io_log_list)) {
117 ipo = flist_entry(td->io_log_list.next, struct io_piece, list);
118 flist_del(&ipo->list);
119 remove_trim_entry(td, ipo);
121 ret = ipo_special(td, ipo);
125 } else if (ret > 0) {
130 io_u->ddir = ipo->ddir;
131 if (ipo->ddir != DDIR_WAIT) {
132 io_u->offset = ipo->offset;
133 io_u->buflen = ipo->len;
134 io_u->file = td->files[ipo->fileno];
135 get_file(io_u->file);
136 dprint(FD_IO, "iolog: get %llu/%lu/%s\n", io_u->offset,
137 io_u->buflen, io_u->file->file_name);
139 iolog_delay(td, ipo->delay);
141 elapsed = mtime_since_genesis();
142 if (ipo->delay > elapsed)
143 usec_sleep(td, (ipo->delay - elapsed) * 1000);
148 if (io_u->ddir != DDIR_WAIT)
156 void prune_io_piece_log(struct thread_data *td)
158 struct io_piece *ipo;
161 while ((n = rb_first(&td->io_hist_tree)) != NULL) {
162 ipo = rb_entry(n, struct io_piece, rb_node);
163 rb_erase(n, &td->io_hist_tree);
164 remove_trim_entry(td, ipo);
169 while (!flist_empty(&td->io_hist_list)) {
170 ipo = flist_entry(td->io_hist_list.next, struct io_piece, list);
171 flist_del(&ipo->list);
172 remove_trim_entry(td, ipo);
179 * log a successful write, so we can unwind the log for verify
181 void log_io_piece(struct thread_data *td, struct io_u *io_u)
183 struct rb_node **p, *parent;
184 struct io_piece *ipo, *__ipo;
186 ipo = malloc(sizeof(struct io_piece));
188 ipo->file = io_u->file;
189 ipo->offset = io_u->offset;
190 ipo->len = io_u->buflen;
191 ipo->numberio = io_u->numberio;
193 if (io_u_should_trim(td, io_u)) {
194 flist_add_tail(&ipo->trim_list, &td->trim_list);
199 * We don't need to sort the entries, if:
201 * Sequential writes, or
202 * Random writes that lay out the file as it goes along
204 * For both these cases, just reading back data in the order we
205 * wrote it out is the fastest.
207 * One exception is if we don't have a random map AND we are doing
208 * verifies, in that case we need to check for duplicate blocks and
209 * drop the old one, which we rely on the rb insert/lookup for
212 if ((!td_random(td) || !td->o.overwrite) &&
213 (file_randommap(td, ipo->file) || td->o.verify == VERIFY_NONE)) {
214 INIT_FLIST_HEAD(&ipo->list);
215 flist_add_tail(&ipo->list, &td->io_hist_list);
216 ipo->flags |= IP_F_ONLIST;
221 RB_CLEAR_NODE(&ipo->rb_node);
224 * Sort the entry into the verification list
227 p = &td->io_hist_tree.rb_node;
232 __ipo = rb_entry(parent, struct io_piece, rb_node);
233 if (ipo->file < __ipo->file)
235 else if (ipo->file > __ipo->file)
237 else if (ipo->offset < __ipo->offset)
239 else if (ipo->offset > __ipo->offset)
242 dprint(FD_IO, "iolog: overlap %llu/%lu, %llu/%lu",
243 __ipo->offset, __ipo->len,
244 ipo->offset, ipo->len);
246 rb_erase(parent, &td->io_hist_tree);
247 remove_trim_entry(td, __ipo);
253 rb_link_node(&ipo->rb_node, parent, p);
254 rb_insert_color(&ipo->rb_node, &td->io_hist_tree);
255 ipo->flags |= IP_F_ONRB;
259 void write_iolog_close(struct thread_data *td)
265 td->iolog_buf = NULL;
269 * Read version 2 iolog data. It is enhanced to include per-file logging,
272 static int read_iolog2(struct thread_data *td, FILE *f)
274 unsigned long long offset;
276 int reads, writes, waits, fileno = 0, file_action = 0; /* stupid gcc */
281 free_release_files(td);
284 * Read in the read iolog and store it, reuse the infrastructure
285 * for doing verifications.
288 fname = malloc(256+16);
289 act = malloc(256+16);
291 reads = writes = waits = 0;
292 while ((p = fgets(str, 4096, f)) != NULL) {
293 struct io_piece *ipo;
296 r = sscanf(p, "%256s %256s %llu %u", fname, act, &offset,
302 if (!strcmp(act, "wait"))
304 else if (!strcmp(act, "read"))
306 else if (!strcmp(act, "write"))
308 else if (!strcmp(act, "sync"))
310 else if (!strcmp(act, "datasync"))
312 else if (!strcmp(act, "trim"))
315 log_err("fio: bad iolog file action: %s\n",
319 fileno = get_fileno(td, fname);
322 if (!strcmp(act, "add")) {
324 fileno = add_file(td, fname);
325 file_action = FIO_LOG_ADD_FILE;
327 } else if (!strcmp(act, "open")) {
328 fileno = get_fileno(td, fname);
329 file_action = FIO_LOG_OPEN_FILE;
330 } else if (!strcmp(act, "close")) {
331 fileno = get_fileno(td, fname);
332 file_action = FIO_LOG_CLOSE_FILE;
334 log_err("fio: bad iolog file action: %s\n",
339 log_err("bad iolog2: %s", p);
345 else if (rw == DDIR_WRITE) {
347 * Don't add a write for ro mode
352 } else if (rw == DDIR_WAIT) {
354 } else if (rw == DDIR_INVAL) {
355 } else if (!ddir_sync(rw)) {
356 log_err("bad ddir: %d\n", rw);
363 ipo = malloc(sizeof(*ipo));
366 if (rw == DDIR_WAIT) {
369 ipo->offset = offset;
371 if (bytes > td->o.max_bs[rw])
372 td->o.max_bs[rw] = bytes;
373 ipo->fileno = fileno;
374 ipo->file_action = file_action;
377 queue_io_piece(td, ipo);
384 if (writes && read_only) {
385 log_err("fio: <%s> skips replay of %d writes due to"
386 " read-only\n", td->o.name, writes);
390 if (!reads && !writes && !waits)
392 else if (reads && !writes)
393 td->o.td_ddir = TD_DDIR_READ;
394 else if (!reads && writes)
395 td->o.td_ddir = TD_DDIR_WRITE;
397 td->o.td_ddir = TD_DDIR_RW;
403 * open iolog, check version, and call appropriate parser
405 static int init_iolog_read(struct thread_data *td)
407 char buffer[256], *p;
411 f = fopen(td->o.read_iolog_file, "r");
413 perror("fopen read iolog");
417 p = fgets(buffer, sizeof(buffer), f);
419 td_verror(td, errno, "iolog read");
420 log_err("fio: unable to read iolog\n");
426 * version 2 of the iolog stores a specific string as the
427 * first line, check for that
429 if (!strncmp(iolog_ver2, buffer, strlen(iolog_ver2)))
430 ret = read_iolog2(td, f);
432 log_err("fio: iolog version 1 is no longer supported\n");
441 * Set up a log for storing io patterns.
443 static int init_iolog_write(struct thread_data *td)
449 f = fopen(td->o.write_iolog_file, "a");
451 perror("fopen write iolog");
456 * That's it for writing, setup a log buffer and we're done.
459 td->iolog_buf = malloc(8192);
460 setvbuf(f, td->iolog_buf, _IOFBF, 8192);
463 * write our version line
465 if (fprintf(f, "%s\n", iolog_ver2) < 0) {
466 perror("iolog init\n");
471 * add all known files
473 for_each_file(td, ff, i)
474 log_file(td, ff, FIO_LOG_ADD_FILE);
479 int init_iolog(struct thread_data *td)
483 if (td->o.read_iolog_file) {
487 * Check if it's a blktrace file and load that if possible.
488 * Otherwise assume it's a normal log file and load that.
490 if (is_blktrace(td->o.read_iolog_file, &need_swap))
491 ret = load_blktrace(td, td->o.read_iolog_file, need_swap);
493 ret = init_iolog_read(td);
494 } else if (td->o.write_iolog_file)
495 ret = init_iolog_write(td);
498 td_verror(td, EINVAL, "failed initializing iolog");
503 void setup_log(struct io_log **log, unsigned long avg_msec, int log_type)
505 struct io_log *l = malloc(sizeof(*l));
507 memset(l, 0, sizeof(*l));
509 l->max_samples = 1024;
510 l->log_type = log_type;
511 l->log = malloc(l->max_samples * sizeof(struct io_sample));
512 l->avg_msec = avg_msec;
516 void __finish_log(struct io_log *log, const char *name)
521 f = fopen(name, "a");
527 for (i = 0; i < log->nr_samples; i++) {
528 fprintf(f, "%lu, %lu, %u, %u\n",
529 (unsigned long) log->log[i].time,
530 (unsigned long) log->log[i].val,
531 log->log[i].ddir, log->log[i].bs);
539 void finish_log_named(struct thread_data *td, struct io_log *log,
540 const char *prefix, const char *postfix)
542 char file_name[256], *p;
544 snprintf(file_name, sizeof(file_name), "%s_%s.log", prefix, postfix);
545 p = basename(file_name);
547 if (td->client_type == FIO_CLIENT_TYPE_GUI) {
548 fio_send_iolog(td, log, p);
552 __finish_log(log, p);
555 void finish_log(struct thread_data *td, struct io_log *log, const char *name)
557 finish_log_named(td, log, td->o.name, name);