2 * Code related to writing an iolog of what a thread is doing, and to
3 * later read that back and replay
14 static const char iolog_ver2[] = "fio version 2 iolog";
16 void queue_io_piece(struct thread_data *td, struct io_piece *ipo)
18 flist_add_tail(&ipo->list, &td->io_log_list);
19 td->total_io_size += ipo->len;
22 void log_io_u(struct thread_data *td, struct io_u *io_u)
24 const char *act[] = { "read", "write", "sync", "datasync",
25 "sync_file_range", "wait", "trim" };
27 assert(io_u->ddir <= 6);
29 if (!td->o.write_iolog_file)
32 fprintf(td->iolog_f, "%s %s %llu %lu\n", io_u->file->file_name,
33 act[io_u->ddir], io_u->offset,
37 void log_file(struct thread_data *td, struct fio_file *f,
38 enum file_log_act what)
40 const char *act[] = { "add", "open", "close" };
44 if (!td->o.write_iolog_file)
49 * this happens on the pre-open/close done before the job starts
54 fprintf(td->iolog_f, "%s %s\n", f->file_name, act[what]);
57 static void iolog_delay(struct thread_data *td, unsigned long delay)
59 unsigned long usec = utime_since_now(&td->last_issue);
67 * less than 100 usec delay, just regard it as noise
72 usec_sleep(td, delay);
75 static int ipo_special(struct thread_data *td, struct io_piece *ipo)
83 if (ipo->ddir != DDIR_INVAL)
86 f = td->files[ipo->fileno];
88 switch (ipo->file_action) {
89 case FIO_LOG_OPEN_FILE:
90 ret = td_io_open_file(td, f);
93 td_verror(td, ret, "iolog open file");
95 case FIO_LOG_CLOSE_FILE:
96 td_io_close_file(td, f);
98 case FIO_LOG_UNLINK_FILE:
102 log_err("fio: bad file action %d\n", ipo->file_action);
109 int read_iolog_get(struct thread_data *td, struct io_u *io_u)
111 struct io_piece *ipo;
112 unsigned long elapsed;
114 while (!flist_empty(&td->io_log_list)) {
117 ipo = flist_entry(td->io_log_list.next, struct io_piece, list);
118 flist_del(&ipo->list);
119 remove_trim_entry(td, ipo);
121 ret = ipo_special(td, ipo);
125 } else if (ret > 0) {
130 io_u->ddir = ipo->ddir;
131 if (ipo->ddir != DDIR_WAIT) {
132 io_u->offset = ipo->offset;
133 io_u->buflen = ipo->len;
134 io_u->file = td->files[ipo->fileno];
135 get_file(io_u->file);
136 dprint(FD_IO, "iolog: get %llu/%lu/%s\n", io_u->offset,
137 io_u->buflen, io_u->file->file_name);
139 iolog_delay(td, ipo->delay);
141 elapsed = mtime_since_genesis();
142 if (ipo->delay > elapsed)
143 usec_sleep(td, (ipo->delay - elapsed) * 1000);
148 if (io_u->ddir != DDIR_WAIT)
156 void prune_io_piece_log(struct thread_data *td)
158 struct io_piece *ipo;
161 while ((n = rb_first(&td->io_hist_tree)) != NULL) {
162 ipo = rb_entry(n, struct io_piece, rb_node);
163 rb_erase(n, &td->io_hist_tree);
164 remove_trim_entry(td, ipo);
169 while (!flist_empty(&td->io_hist_list)) {
170 ipo = flist_entry(td->io_hist_list.next, struct io_piece, list);
171 flist_del(&ipo->list);
172 remove_trim_entry(td, ipo);
179 * log a successful write, so we can unwind the log for verify
181 void log_io_piece(struct thread_data *td, struct io_u *io_u)
183 struct rb_node **p, *parent;
184 struct io_piece *ipo, *__ipo;
186 ipo = malloc(sizeof(struct io_piece));
188 ipo->file = io_u->file;
189 ipo->offset = io_u->offset;
190 ipo->len = io_u->buflen;
191 ipo->numberio = io_u->numberio;
192 ipo->flags = IP_F_IN_FLIGHT;
196 if (io_u_should_trim(td, io_u)) {
197 flist_add_tail(&ipo->trim_list, &td->trim_list);
202 * We don't need to sort the entries, if:
204 * Sequential writes, or
205 * Random writes that lay out the file as it goes along
207 * For both these cases, just reading back data in the order we
208 * wrote it out is the fastest.
210 * One exception is if we don't have a random map AND we are doing
211 * verifies, in that case we need to check for duplicate blocks and
212 * drop the old one, which we rely on the rb insert/lookup for
215 if (((!td->o.verifysort) || !td_random(td) || !td->o.overwrite) &&
216 (file_randommap(td, ipo->file) || td->o.verify == VERIFY_NONE)) {
217 INIT_FLIST_HEAD(&ipo->list);
218 flist_add_tail(&ipo->list, &td->io_hist_list);
219 ipo->flags |= IP_F_ONLIST;
224 RB_CLEAR_NODE(&ipo->rb_node);
227 * Sort the entry into the verification list
230 p = &td->io_hist_tree.rb_node;
235 __ipo = rb_entry(parent, struct io_piece, rb_node);
236 if (ipo->file < __ipo->file)
238 else if (ipo->file > __ipo->file)
240 else if (ipo->offset < __ipo->offset)
242 else if (ipo->offset > __ipo->offset)
245 dprint(FD_IO, "iolog: overlap %llu/%lu, %llu/%lu",
246 __ipo->offset, __ipo->len,
247 ipo->offset, ipo->len);
249 rb_erase(parent, &td->io_hist_tree);
250 remove_trim_entry(td, __ipo);
256 rb_link_node(&ipo->rb_node, parent, p);
257 rb_insert_color(&ipo->rb_node, &td->io_hist_tree);
258 ipo->flags |= IP_F_ONRB;
262 void write_iolog_close(struct thread_data *td)
268 td->iolog_buf = NULL;
272 * Read version 2 iolog data. It is enhanced to include per-file logging,
275 static int read_iolog2(struct thread_data *td, FILE *f)
277 unsigned long long offset;
279 int reads, writes, waits, fileno = 0, file_action = 0; /* stupid gcc */
284 free_release_files(td);
287 * Read in the read iolog and store it, reuse the infrastructure
288 * for doing verifications.
291 fname = malloc(256+16);
292 act = malloc(256+16);
294 reads = writes = waits = 0;
295 while ((p = fgets(str, 4096, f)) != NULL) {
296 struct io_piece *ipo;
299 r = sscanf(p, "%256s %256s %llu %u", fname, act, &offset,
305 if (!strcmp(act, "wait"))
307 else if (!strcmp(act, "read"))
309 else if (!strcmp(act, "write"))
311 else if (!strcmp(act, "sync"))
313 else if (!strcmp(act, "datasync"))
315 else if (!strcmp(act, "trim"))
318 log_err("fio: bad iolog file action: %s\n",
322 fileno = get_fileno(td, fname);
325 if (!strcmp(act, "add")) {
326 fileno = add_file(td, fname, 0, 1);
327 file_action = FIO_LOG_ADD_FILE;
329 } else if (!strcmp(act, "open")) {
330 fileno = get_fileno(td, fname);
331 file_action = FIO_LOG_OPEN_FILE;
332 } else if (!strcmp(act, "close")) {
333 fileno = get_fileno(td, fname);
334 file_action = FIO_LOG_CLOSE_FILE;
336 log_err("fio: bad iolog file action: %s\n",
341 log_err("bad iolog2: %s", p);
347 else if (rw == DDIR_WRITE) {
349 * Don't add a write for ro mode
354 } else if (rw == DDIR_WAIT) {
356 } else if (rw == DDIR_INVAL) {
357 } else if (!ddir_sync(rw)) {
358 log_err("bad ddir: %d\n", rw);
365 ipo = malloc(sizeof(*ipo));
368 if (rw == DDIR_WAIT) {
371 ipo->offset = offset;
373 if (bytes > td->o.max_bs[rw])
374 td->o.max_bs[rw] = bytes;
375 ipo->fileno = fileno;
376 ipo->file_action = file_action;
379 queue_io_piece(td, ipo);
386 if (writes && read_only) {
387 log_err("fio: <%s> skips replay of %d writes due to"
388 " read-only\n", td->o.name, writes);
392 if (!reads && !writes && !waits)
394 else if (reads && !writes)
395 td->o.td_ddir = TD_DDIR_READ;
396 else if (!reads && writes)
397 td->o.td_ddir = TD_DDIR_WRITE;
399 td->o.td_ddir = TD_DDIR_RW;
405 * open iolog, check version, and call appropriate parser
407 static int init_iolog_read(struct thread_data *td)
409 char buffer[256], *p;
413 f = fopen(td->o.read_iolog_file, "r");
415 perror("fopen read iolog");
419 p = fgets(buffer, sizeof(buffer), f);
421 td_verror(td, errno, "iolog read");
422 log_err("fio: unable to read iolog\n");
428 * version 2 of the iolog stores a specific string as the
429 * first line, check for that
431 if (!strncmp(iolog_ver2, buffer, strlen(iolog_ver2)))
432 ret = read_iolog2(td, f);
434 log_err("fio: iolog version 1 is no longer supported\n");
443 * Set up a log for storing io patterns.
445 static int init_iolog_write(struct thread_data *td)
451 f = fopen(td->o.write_iolog_file, "a");
453 perror("fopen write iolog");
458 * That's it for writing, setup a log buffer and we're done.
461 td->iolog_buf = malloc(8192);
462 setvbuf(f, td->iolog_buf, _IOFBF, 8192);
465 * write our version line
467 if (fprintf(f, "%s\n", iolog_ver2) < 0) {
468 perror("iolog init\n");
473 * add all known files
475 for_each_file(td, ff, i)
476 log_file(td, ff, FIO_LOG_ADD_FILE);
481 int init_iolog(struct thread_data *td)
485 if (td->o.read_iolog_file) {
489 * Check if it's a blktrace file and load that if possible.
490 * Otherwise assume it's a normal log file and load that.
492 if (is_blktrace(td->o.read_iolog_file, &need_swap))
493 ret = load_blktrace(td, td->o.read_iolog_file, need_swap);
495 ret = init_iolog_read(td);
496 } else if (td->o.write_iolog_file)
497 ret = init_iolog_write(td);
500 td_verror(td, EINVAL, "failed initializing iolog");
505 void setup_log(struct io_log **log, unsigned long avg_msec, int log_type)
507 struct io_log *l = malloc(sizeof(*l));
509 memset(l, 0, sizeof(*l));
511 l->max_samples = 1024;
512 l->log_type = log_type;
513 l->log = malloc(l->max_samples * sizeof(struct io_sample));
514 l->avg_msec = avg_msec;
518 void __finish_log(struct io_log *log, const char *name)
523 f = fopen(name, "a");
529 for (i = 0; i < log->nr_samples; i++) {
530 fprintf(f, "%lu, %lu, %u, %u\n",
531 (unsigned long) log->log[i].time,
532 (unsigned long) log->log[i].val,
533 log->log[i].ddir, log->log[i].bs);
541 void finish_log_named(struct thread_data *td, struct io_log *log,
542 const char *prefix, const char *postfix)
544 char file_name[256], *p;
546 snprintf(file_name, sizeof(file_name), "%s_%s.log", prefix, postfix);
547 p = basename(file_name);
549 if (td->client_type == FIO_CLIENT_TYPE_GUI) {
550 fio_send_iolog(td, log, p);
554 __finish_log(log, p);
557 void finish_log(struct thread_data *td, struct io_log *log, const char *name)
559 finish_log_named(td, log, td->o.name, name);