2 * Code related to writing an iolog of what a thread is doing, and to
3 * later read that back and replay
13 static const char iolog_ver2[] = "fio version 2 iolog";
15 void queue_io_piece(struct thread_data *td, struct io_piece *ipo)
17 flist_add_tail(&ipo->list, &td->io_log_list);
18 td->total_io_size += ipo->len;
21 void log_io_u(struct thread_data *td, struct io_u *io_u)
23 const char *act[] = { "read", "write", "sync", "datasync",
24 "sync_file_range", "wait", "trim" };
26 assert(io_u->ddir <= 6);
28 if (!td->o.write_iolog_file)
31 fprintf(td->iolog_f, "%s %s %llu %lu\n", io_u->file->file_name,
32 act[io_u->ddir], io_u->offset,
36 void log_file(struct thread_data *td, struct fio_file *f,
37 enum file_log_act what)
39 const char *act[] = { "add", "open", "close" };
43 if (!td->o.write_iolog_file)
48 * this happens on the pre-open/close done before the job starts
53 fprintf(td->iolog_f, "%s %s\n", f->file_name, act[what]);
56 static void iolog_delay(struct thread_data *td, unsigned long delay)
58 unsigned long usec = utime_since_now(&td->last_issue);
66 * less than 100 usec delay, just regard it as noise
71 usec_sleep(td, delay);
74 static int ipo_special(struct thread_data *td, struct io_piece *ipo)
82 if (ipo->ddir != DDIR_INVAL)
85 f = td->files[ipo->fileno];
87 switch (ipo->file_action) {
88 case FIO_LOG_OPEN_FILE:
89 ret = td_io_open_file(td, f);
92 td_verror(td, ret, "iolog open file");
94 case FIO_LOG_CLOSE_FILE:
95 td_io_close_file(td, f);
97 case FIO_LOG_UNLINK_FILE:
101 log_err("fio: bad file action %d\n", ipo->file_action);
108 int read_iolog_get(struct thread_data *td, struct io_u *io_u)
110 struct io_piece *ipo;
111 unsigned long elapsed;
113 while (!flist_empty(&td->io_log_list)) {
116 ipo = flist_entry(td->io_log_list.next, struct io_piece, list);
117 flist_del(&ipo->list);
119 ret = ipo_special(td, ipo);
123 } else if (ret > 0) {
128 io_u->ddir = ipo->ddir;
129 if (ipo->ddir != DDIR_WAIT) {
130 io_u->offset = ipo->offset;
131 io_u->buflen = ipo->len;
132 io_u->file = td->files[ipo->fileno];
133 get_file(io_u->file);
134 dprint(FD_IO, "iolog: get %llu/%lu/%s\n", io_u->offset,
135 io_u->buflen, io_u->file->file_name);
137 iolog_delay(td, ipo->delay);
139 elapsed = mtime_since_genesis();
140 if (ipo->delay > elapsed)
141 usec_sleep(td, (ipo->delay - elapsed) * 1000);
147 if (io_u->ddir != DDIR_WAIT)
155 void prune_io_piece_log(struct thread_data *td)
157 struct io_piece *ipo;
160 while ((n = rb_first(&td->io_hist_tree)) != NULL) {
161 ipo = rb_entry(n, struct io_piece, rb_node);
162 rb_erase(n, &td->io_hist_tree);
167 while (!flist_empty(&td->io_hist_list)) {
168 ipo = flist_entry(td->io_hist_list.next, struct io_piece, list);
169 flist_del(&ipo->list);
176 * log a successful write, so we can unwind the log for verify
178 void log_io_piece(struct thread_data *td, struct io_u *io_u)
180 struct rb_node **p, *parent;
181 struct io_piece *ipo, *__ipo;
183 ipo = malloc(sizeof(struct io_piece));
184 ipo->file = io_u->file;
185 ipo->offset = io_u->offset;
186 ipo->len = io_u->buflen;
189 * We don't need to sort the entries, if:
191 * Sequential writes, or
192 * Random writes that lay out the file as it goes along
194 * For both these cases, just reading back data in the order we
195 * wrote it out is the fastest.
197 * One exception is if we don't have a random map AND we are doing
198 * verifies, in that case we need to check for duplicate blocks and
199 * drop the old one, which we rely on the rb insert/lookup for
202 if ((!td_random(td) || !td->o.overwrite) &&
203 (file_randommap(td, ipo->file) || td->o.verify == VERIFY_NONE)) {
204 INIT_FLIST_HEAD(&ipo->list);
205 flist_add_tail(&ipo->list, &td->io_hist_list);
210 RB_CLEAR_NODE(&ipo->rb_node);
213 * Sort the entry into the verification list
216 p = &td->io_hist_tree.rb_node;
221 __ipo = rb_entry(parent, struct io_piece, rb_node);
222 if (ipo->file < __ipo->file)
224 else if (ipo->file > __ipo->file)
226 else if (ipo->offset < __ipo->offset)
228 else if (ipo->offset > __ipo->offset)
231 assert(ipo->len == __ipo->len);
233 rb_erase(parent, &td->io_hist_tree);
239 rb_link_node(&ipo->rb_node, parent, p);
240 rb_insert_color(&ipo->rb_node, &td->io_hist_tree);
244 void write_iolog_close(struct thread_data *td)
250 td->iolog_buf = NULL;
254 * Read version 2 iolog data. It is enhanced to include per-file logging,
257 static int read_iolog2(struct thread_data *td, FILE *f)
259 unsigned long long offset;
261 int reads, writes, waits, fileno = 0, file_action = 0; /* stupid gcc */
266 free_release_files(td);
269 * Read in the read iolog and store it, reuse the infrastructure
270 * for doing verifications.
273 fname = malloc(256+16);
274 act = malloc(256+16);
276 reads = writes = waits = 0;
277 while ((p = fgets(str, 4096, f)) != NULL) {
278 struct io_piece *ipo;
281 r = sscanf(p, "%256s %256s %llu %u", fname, act, &offset,
287 if (!strcmp(act, "wait"))
289 else if (!strcmp(act, "read"))
291 else if (!strcmp(act, "write"))
293 else if (!strcmp(act, "sync"))
295 else if (!strcmp(act, "datasync"))
297 else if (!strcmp(act, "trim"))
300 log_err("fio: bad iolog file action: %s\n",
306 if (!strcmp(act, "add")) {
308 fileno = add_file(td, fname);
309 file_action = FIO_LOG_ADD_FILE;
311 } else if (!strcmp(act, "open")) {
312 fileno = get_fileno(td, fname);
313 file_action = FIO_LOG_OPEN_FILE;
314 } else if (!strcmp(act, "close")) {
315 fileno = get_fileno(td, fname);
316 file_action = FIO_LOG_CLOSE_FILE;
318 log_err("fio: bad iolog file action: %s\n",
323 log_err("bad iolog2: %s", p);
329 else if (rw == DDIR_WRITE) {
331 * Don't add a write for ro mode
336 } else if (rw == DDIR_WAIT) {
338 } else if (rw == DDIR_INVAL) {
339 } else if (!ddir_sync(rw)) {
340 log_err("bad ddir: %d\n", rw);
347 ipo = malloc(sizeof(*ipo));
348 memset(ipo, 0, sizeof(*ipo));
349 INIT_FLIST_HEAD(&ipo->list);
351 if (rw == DDIR_WAIT) {
354 ipo->offset = offset;
356 if (bytes > td->o.max_bs[rw])
357 td->o.max_bs[rw] = bytes;
358 ipo->fileno = fileno;
359 ipo->file_action = file_action;
362 queue_io_piece(td, ipo);
369 if (writes && read_only) {
370 log_err("fio: <%s> skips replay of %d writes due to"
371 " read-only\n", td->o.name, writes);
375 if (!reads && !writes && !waits)
377 else if (reads && !writes)
378 td->o.td_ddir = TD_DDIR_READ;
379 else if (!reads && writes)
380 td->o.td_ddir = TD_DDIR_WRITE;
382 td->o.td_ddir = TD_DDIR_RW;
388 * open iolog, check version, and call appropriate parser
390 static int init_iolog_read(struct thread_data *td)
392 char buffer[256], *p;
396 f = fopen(td->o.read_iolog_file, "r");
398 perror("fopen read iolog");
402 p = fgets(buffer, sizeof(buffer), f);
404 td_verror(td, errno, "iolog read");
405 log_err("fio: unable to read iolog\n");
410 * version 2 of the iolog stores a specific string as the
411 * first line, check for that
413 if (!strncmp(iolog_ver2, buffer, strlen(iolog_ver2)))
414 ret = read_iolog2(td, f);
416 log_err("fio: iolog version 1 is no longer supported\n");
425 * Setup a log for storing io patterns.
427 static int init_iolog_write(struct thread_data *td)
433 f = fopen(td->o.write_iolog_file, "a");
435 perror("fopen write iolog");
440 * That's it for writing, setup a log buffer and we're done.
443 td->iolog_buf = malloc(8192);
444 setvbuf(f, td->iolog_buf, _IOFBF, 8192);
447 * write our version line
449 if (fprintf(f, "%s\n", iolog_ver2) < 0) {
450 perror("iolog init\n");
455 * add all known files
457 for_each_file(td, ff, i)
458 log_file(td, ff, FIO_LOG_ADD_FILE);
463 int init_iolog(struct thread_data *td)
467 if (td->o.read_iolog_file) {
469 * Check if it's a blktrace file and load that if possible.
470 * Otherwise assume it's a normal log file and load that.
472 if (is_blktrace(td->o.read_iolog_file))
473 ret = load_blktrace(td, td->o.read_iolog_file);
475 ret = init_iolog_read(td);
476 } else if (td->o.write_iolog_file)
477 ret = init_iolog_write(td);
482 void setup_log(struct io_log **log)
484 struct io_log *l = malloc(sizeof(*l));
487 l->max_samples = 1024;
488 l->log = malloc(l->max_samples * sizeof(struct io_sample));
492 void __finish_log(struct io_log *log, const char *name)
497 f = fopen(name, "a");
503 for (i = 0; i < log->nr_samples; i++) {
504 fprintf(f, "%lu, %lu, %u, %u\n", log->log[i].time,
515 void finish_log_named(struct thread_data *td, struct io_log *log,
516 const char *prefix, const char *postfix)
518 char file_name[256], *p;
520 snprintf(file_name, 200, "%s_%s.log", prefix, postfix);
521 p = basename(file_name);
522 __finish_log(log, p);
525 void finish_log(struct thread_data *td, struct io_log *log, const char *name)
527 finish_log_named(td, log, td->o.name, name);