2 * Code related to writing an iolog of what a thread is doing, and to
3 * later read that back and replay
23 static const char iolog_ver2[] = "fio version 2 iolog";
27 struct iolog_compress {
28 struct flist_head list;
35 #define GZ_CHUNK 131072
37 static struct iolog_compress *get_new_chunk(unsigned int seq)
39 struct iolog_compress *c;
41 c = malloc(sizeof(*c));
42 INIT_FLIST_HEAD(&c->list);
43 c->buf = malloc(GZ_CHUNK);
50 static void free_chunk(struct iolog_compress *ic)
60 void queue_io_piece(struct thread_data *td, struct io_piece *ipo)
62 flist_add_tail(&ipo->list, &td->io_log_list);
63 td->total_io_size += ipo->len;
66 void log_io_u(struct thread_data *td, struct io_u *io_u)
68 const char *act[] = { "read", "write", "sync", "datasync",
69 "sync_file_range", "wait", "trim" };
71 assert(io_u->ddir <= 6);
73 if (!td->o.write_iolog_file)
76 fprintf(td->iolog_f, "%s %s %llu %lu\n", io_u->file->file_name,
77 act[io_u->ddir], io_u->offset,
81 void log_file(struct thread_data *td, struct fio_file *f,
82 enum file_log_act what)
84 const char *act[] = { "add", "open", "close" };
88 if (!td->o.write_iolog_file)
93 * this happens on the pre-open/close done before the job starts
98 fprintf(td->iolog_f, "%s %s\n", f->file_name, act[what]);
101 static void iolog_delay(struct thread_data *td, unsigned long delay)
103 unsigned long usec = utime_since_now(&td->last_issue);
104 unsigned long this_delay;
112 * less than 100 usec delay, just regard it as noise
117 while (delay && !td->terminate) {
119 if (this_delay > 500000)
122 usec_sleep(td, this_delay);
127 static int ipo_special(struct thread_data *td, struct io_piece *ipo)
135 if (ipo->ddir != DDIR_INVAL)
138 f = td->files[ipo->fileno];
140 switch (ipo->file_action) {
141 case FIO_LOG_OPEN_FILE:
142 ret = td_io_open_file(td, f);
145 td_verror(td, ret, "iolog open file");
147 case FIO_LOG_CLOSE_FILE:
148 td_io_close_file(td, f);
150 case FIO_LOG_UNLINK_FILE:
151 unlink(f->file_name);
154 log_err("fio: bad file action %d\n", ipo->file_action);
161 int read_iolog_get(struct thread_data *td, struct io_u *io_u)
163 struct io_piece *ipo;
164 unsigned long elapsed;
166 while (!flist_empty(&td->io_log_list)) {
169 ipo = flist_entry(td->io_log_list.next, struct io_piece, list);
170 flist_del(&ipo->list);
171 remove_trim_entry(td, ipo);
173 ret = ipo_special(td, ipo);
177 } else if (ret > 0) {
182 io_u->ddir = ipo->ddir;
183 if (ipo->ddir != DDIR_WAIT) {
184 io_u->offset = ipo->offset;
185 io_u->buflen = ipo->len;
186 io_u->file = td->files[ipo->fileno];
187 get_file(io_u->file);
188 dprint(FD_IO, "iolog: get %llu/%lu/%s\n", io_u->offset,
189 io_u->buflen, io_u->file->file_name);
191 iolog_delay(td, ipo->delay);
193 elapsed = mtime_since_genesis();
194 if (ipo->delay > elapsed)
195 usec_sleep(td, (ipo->delay - elapsed) * 1000);
200 if (io_u->ddir != DDIR_WAIT)
208 void prune_io_piece_log(struct thread_data *td)
210 struct io_piece *ipo;
213 while ((n = rb_first(&td->io_hist_tree)) != NULL) {
214 ipo = rb_entry(n, struct io_piece, rb_node);
215 rb_erase(n, &td->io_hist_tree);
216 remove_trim_entry(td, ipo);
221 while (!flist_empty(&td->io_hist_list)) {
222 ipo = flist_entry(td->io_hist_list.next, struct io_piece, list);
223 flist_del(&ipo->list);
224 remove_trim_entry(td, ipo);
231 * log a successful write, so we can unwind the log for verify
233 void log_io_piece(struct thread_data *td, struct io_u *io_u)
235 struct rb_node **p, *parent;
236 struct io_piece *ipo, *__ipo;
238 ipo = malloc(sizeof(struct io_piece));
240 ipo->file = io_u->file;
241 ipo->offset = io_u->offset;
242 ipo->len = io_u->buflen;
243 ipo->numberio = io_u->numberio;
244 ipo->flags = IP_F_IN_FLIGHT;
248 if (io_u_should_trim(td, io_u)) {
249 flist_add_tail(&ipo->trim_list, &td->trim_list);
254 * We don't need to sort the entries, if:
256 * Sequential writes, or
257 * Random writes that lay out the file as it goes along
259 * For both these cases, just reading back data in the order we
260 * wrote it out is the fastest.
262 * One exception is if we don't have a random map AND we are doing
263 * verifies, in that case we need to check for duplicate blocks and
264 * drop the old one, which we rely on the rb insert/lookup for
267 if (((!td->o.verifysort) || !td_random(td) || !td->o.overwrite) &&
268 (file_randommap(td, ipo->file) || td->o.verify == VERIFY_NONE)) {
269 INIT_FLIST_HEAD(&ipo->list);
270 flist_add_tail(&ipo->list, &td->io_hist_list);
271 ipo->flags |= IP_F_ONLIST;
276 RB_CLEAR_NODE(&ipo->rb_node);
279 * Sort the entry into the verification list
282 p = &td->io_hist_tree.rb_node;
287 __ipo = rb_entry(parent, struct io_piece, rb_node);
288 if (ipo->file < __ipo->file)
290 else if (ipo->file > __ipo->file)
292 else if (ipo->offset < __ipo->offset)
294 else if (ipo->offset > __ipo->offset)
297 dprint(FD_IO, "iolog: overlap %llu/%lu, %llu/%lu",
298 __ipo->offset, __ipo->len,
299 ipo->offset, ipo->len);
301 rb_erase(parent, &td->io_hist_tree);
302 remove_trim_entry(td, __ipo);
308 rb_link_node(&ipo->rb_node, parent, p);
309 rb_insert_color(&ipo->rb_node, &td->io_hist_tree);
310 ipo->flags |= IP_F_ONRB;
314 void unlog_io_piece(struct thread_data *td, struct io_u *io_u)
316 struct io_piece *ipo = io_u->ipo;
321 if (ipo->flags & IP_F_ONRB)
322 rb_erase(&ipo->rb_node, &td->io_hist_tree);
323 else if (ipo->flags & IP_F_ONLIST)
324 flist_del(&ipo->list);
331 void trim_io_piece(struct thread_data *td, struct io_u *io_u)
333 struct io_piece *ipo = io_u->ipo;
338 ipo->len = io_u->xfer_buflen - io_u->resid;
341 void write_iolog_close(struct thread_data *td)
347 td->iolog_buf = NULL;
351 * Read version 2 iolog data. It is enhanced to include per-file logging,
354 static int read_iolog2(struct thread_data *td, FILE *f)
356 unsigned long long offset;
358 int reads, writes, waits, fileno = 0, file_action = 0; /* stupid gcc */
363 free_release_files(td);
366 * Read in the read iolog and store it, reuse the infrastructure
367 * for doing verifications.
370 fname = malloc(256+16);
371 act = malloc(256+16);
373 reads = writes = waits = 0;
374 while ((p = fgets(str, 4096, f)) != NULL) {
375 struct io_piece *ipo;
378 r = sscanf(p, "%256s %256s %llu %u", fname, act, &offset,
384 if (!strcmp(act, "wait"))
386 else if (!strcmp(act, "read"))
388 else if (!strcmp(act, "write"))
390 else if (!strcmp(act, "sync"))
392 else if (!strcmp(act, "datasync"))
394 else if (!strcmp(act, "trim"))
397 log_err("fio: bad iolog file action: %s\n",
401 fileno = get_fileno(td, fname);
404 if (!strcmp(act, "add")) {
405 fileno = add_file(td, fname, 0, 1);
406 file_action = FIO_LOG_ADD_FILE;
408 } else if (!strcmp(act, "open")) {
409 fileno = get_fileno(td, fname);
410 file_action = FIO_LOG_OPEN_FILE;
411 } else if (!strcmp(act, "close")) {
412 fileno = get_fileno(td, fname);
413 file_action = FIO_LOG_CLOSE_FILE;
415 log_err("fio: bad iolog file action: %s\n",
420 log_err("bad iolog2: %s", p);
426 else if (rw == DDIR_WRITE) {
428 * Don't add a write for ro mode
433 } else if (rw == DDIR_WAIT) {
435 } else if (rw == DDIR_INVAL) {
436 } else if (!ddir_sync(rw)) {
437 log_err("bad ddir: %d\n", rw);
444 ipo = malloc(sizeof(*ipo));
447 if (rw == DDIR_WAIT) {
450 ipo->offset = offset;
452 if (rw != DDIR_INVAL && bytes > td->o.max_bs[rw])
453 td->o.max_bs[rw] = bytes;
454 ipo->fileno = fileno;
455 ipo->file_action = file_action;
459 queue_io_piece(td, ipo);
466 if (writes && read_only) {
467 log_err("fio: <%s> skips replay of %d writes due to"
468 " read-only\n", td->o.name, writes);
472 if (!reads && !writes && !waits)
474 else if (reads && !writes)
475 td->o.td_ddir = TD_DDIR_READ;
476 else if (!reads && writes)
477 td->o.td_ddir = TD_DDIR_WRITE;
479 td->o.td_ddir = TD_DDIR_RW;
485 * open iolog, check version, and call appropriate parser
487 static int init_iolog_read(struct thread_data *td)
489 char buffer[256], *p;
493 f = fopen(td->o.read_iolog_file, "r");
495 perror("fopen read iolog");
499 p = fgets(buffer, sizeof(buffer), f);
501 td_verror(td, errno, "iolog read");
502 log_err("fio: unable to read iolog\n");
508 * version 2 of the iolog stores a specific string as the
509 * first line, check for that
511 if (!strncmp(iolog_ver2, buffer, strlen(iolog_ver2)))
512 ret = read_iolog2(td, f);
514 log_err("fio: iolog version 1 is no longer supported\n");
523 * Set up a log for storing io patterns.
525 static int init_iolog_write(struct thread_data *td)
531 f = fopen(td->o.write_iolog_file, "a");
533 perror("fopen write iolog");
538 * That's it for writing, setup a log buffer and we're done.
541 td->iolog_buf = malloc(8192);
542 setvbuf(f, td->iolog_buf, _IOFBF, 8192);
545 * write our version line
547 if (fprintf(f, "%s\n", iolog_ver2) < 0) {
548 perror("iolog init\n");
553 * add all known files
555 for_each_file(td, ff, i)
556 log_file(td, ff, FIO_LOG_ADD_FILE);
561 int init_iolog(struct thread_data *td)
565 if (td->o.read_iolog_file) {
569 * Check if it's a blktrace file and load that if possible.
570 * Otherwise assume it's a normal log file and load that.
572 if (is_blktrace(td->o.read_iolog_file, &need_swap))
573 ret = load_blktrace(td, td->o.read_iolog_file, need_swap);
575 ret = init_iolog_read(td);
576 } else if (td->o.write_iolog_file)
577 ret = init_iolog_write(td);
580 td_verror(td, EINVAL, "failed initializing iolog");
585 void setup_log(struct io_log **log, struct log_params *p,
586 const char *filename)
588 struct io_log *l = malloc(sizeof(*l));
590 memset(l, 0, sizeof(*l));
592 l->max_samples = 1024;
593 l->log_type = p->log_type;
594 l->log_offset = p->log_offset;
595 l->log_gz = p->log_gz;
596 l->log_gz_store = p->log_gz_store;
597 l->log = malloc(l->max_samples * log_entry_sz(l));
598 l->avg_msec = p->avg_msec;
599 l->filename = strdup(filename);
603 l->log_ddir_mask = 0x80000000;
605 INIT_FLIST_HEAD(&l->chunk_list);
607 if (l->log_gz && !p->td)
609 else if (l->log_gz) {
610 pthread_mutex_init(&l->chunk_lock, NULL);
611 p->td->flags |= TD_F_COMPRESS_LOG;
617 #ifdef CONFIG_SETVBUF
618 static void *set_file_buffer(FILE *f)
620 size_t size = 1048576;
624 setvbuf(f, buf, _IOFBF, size);
628 static void clear_file_buffer(void *buf)
633 static void *set_file_buffer(FILE *f)
638 static void clear_file_buffer(void *buf)
643 void free_log(struct io_log *log)
650 static void flush_samples(FILE *f, void *samples, uint64_t sample_size)
654 uint64_t i, nr_samples;
659 s = __get_sample(samples, 0, 0);
660 if (s->__ddir & 0x80000000)
665 nr_samples = sample_size / __log_entry_sz(log_offset);
667 for (i = 0; i < nr_samples; i++) {
668 s = __get_sample(samples, log_offset, i);
671 fprintf(f, "%lu, %lu, %u, %u\n",
672 (unsigned long) s->time,
673 (unsigned long) s->val,
674 io_sample_ddir(s), s->bs);
676 struct io_sample_offset *so = (void *) s;
678 fprintf(f, "%lu, %lu, %u, %u, %llu\n",
679 (unsigned long) s->time,
680 (unsigned long) s->val,
681 io_sample_ddir(s), s->bs,
682 (unsigned long long) so->offset);
688 static int z_stream_init(z_stream *stream, int gz_hdr)
692 stream->zalloc = Z_NULL;
693 stream->zfree = Z_NULL;
694 stream->opaque = Z_NULL;
695 stream->next_in = Z_NULL;
700 if (inflateInit2(stream, wbits) != Z_OK)
706 struct flush_chunk_iter {
714 static void finish_chunk(z_stream *stream, FILE *f,
715 struct flush_chunk_iter *iter)
719 ret = inflateEnd(stream);
721 log_err("fio: failed to end log inflation (%d)\n", ret);
723 flush_samples(f, iter->buf, iter->buf_used);
726 iter->buf_size = iter->buf_used = 0;
729 static int flush_chunk(struct iolog_compress *ic, int gz_hdr, FILE *f,
730 z_stream *stream, struct flush_chunk_iter *iter)
732 if (ic->seq != iter->seq) {
734 finish_chunk(stream, f, iter);
736 z_stream_init(stream, gz_hdr);
740 stream->avail_in = ic->len;
741 stream->next_in = ic->buf;
743 if (!iter->buf_size) {
744 iter->buf_size = iter->chunk_sz;
745 iter->buf = malloc(iter->buf_size);
748 while (stream->avail_in) {
749 size_t this_out = iter->buf_size - iter->buf_used;
752 stream->avail_out = this_out;
753 stream->next_out = iter->buf + iter->buf_used;
755 err = inflate(stream, Z_NO_FLUSH);
757 log_err("fio: failed inflating log: %d\n", err);
761 iter->buf_used += this_out - stream->avail_out;
763 if (!stream->avail_out) {
764 iter->buf_size += iter->chunk_sz;
765 iter->buf = realloc(iter->buf, iter->buf_size);
769 if (err == Z_STREAM_END)
777 static void flush_gz_chunks(struct io_log *log, FILE *f)
779 struct flush_chunk_iter iter = { .chunk_sz = log->log_gz, };
780 struct flist_head *node;
783 while (!flist_empty(&log->chunk_list)) {
784 struct iolog_compress *ic;
786 node = log->chunk_list.next;
787 ic = flist_entry(node, struct iolog_compress, list);
788 flist_del(&ic->list);
790 if (log->log_gz_store) {
791 fwrite(ic->buf, ic->len, 1, f);
794 flush_chunk(ic, log->log_gz_store, f, &stream, &iter);
798 finish_chunk(&stream, f, &iter);
803 int iolog_file_inflate(const char *file)
805 struct flush_chunk_iter iter = { .chunk_sz = 64 * 1024 * 1024, };
806 struct iolog_compress ic;
812 f = fopen(file, "r");
818 if (stat(file, &sb) < 0) {
824 ic.buf = malloc(sb.st_size);
829 ret = fread(ic.buf, ic.len, 1, f);
834 } else if (ret != 1) {
835 log_err("fio: short read on reading log\n");
842 flush_chunk(&ic, 1, stdout, &stream, &iter);
845 finish_chunk(&stream, stdout, &iter);
855 static void flush_gz_chunks(struct io_log *log, FILE *f)
861 void flush_log(struct io_log *log)
866 f = fopen(log->filename, "w");
872 buf = set_file_buffer(f);
874 flush_gz_chunks(log, f);
876 flush_samples(f, log->log, log->nr_samples * log_entry_sz(log));
879 clear_file_buffer(buf);
882 static int finish_log(struct thread_data *td, struct io_log *log, int trylock)
888 if (fio_trylock_file(log->filename))
891 fio_lock_file(log->filename);
893 if (td->client_type == FIO_CLIENT_TYPE_GUI)
894 fio_send_iolog(td, log, log->filename);
898 fio_unlock_file(log->filename);
905 struct iolog_flush_data {
912 static int gz_work(struct tp_work *work)
914 struct iolog_flush_data *data;
915 struct iolog_compress *c;
916 struct flist_head list;
922 INIT_FLIST_HEAD(&list);
924 data = container_of(work, struct iolog_flush_data, work);
926 stream.zalloc = Z_NULL;
927 stream.zfree = Z_NULL;
928 stream.opaque = Z_NULL;
930 ret = deflateInit(&stream, Z_DEFAULT_COMPRESSION);
932 log_err("fio: failed to init gz stream\n");
936 seq = ++data->log->chunk_seq;
938 stream.next_in = (void *) data->samples;
939 stream.avail_in = data->nr_samples * log_entry_sz(data->log);
942 c = get_new_chunk(seq);
943 stream.avail_out = GZ_CHUNK;
944 stream.next_out = c->buf;
945 ret = deflate(&stream, Z_NO_FLUSH);
947 log_err("fio: deflate log (%d)\n", ret);
951 c->len = GZ_CHUNK - stream.avail_out;
952 flist_add_tail(&c->list, &list);
954 } while (stream.avail_in);
956 stream.next_out = c->buf + c->len;
957 stream.avail_out = GZ_CHUNK - c->len;
959 ret = deflate(&stream, Z_FINISH);
960 if (ret == Z_STREAM_END)
961 c->len = GZ_CHUNK - stream.avail_out;
964 c = get_new_chunk(seq);
965 stream.avail_out = GZ_CHUNK;
966 stream.next_out = c->buf;
967 ret = deflate(&stream, Z_FINISH);
968 c->len = GZ_CHUNK - stream.avail_out;
969 flist_add_tail(&c->list, &list);
970 } while (ret != Z_STREAM_END);
973 ret = deflateEnd(&stream);
975 log_err("fio: deflateEnd %d\n", ret);
979 if (!flist_empty(&list)) {
980 pthread_mutex_lock(&data->log->chunk_lock);
981 flist_splice_tail(&list, &data->log->chunk_list);
982 pthread_mutex_unlock(&data->log->chunk_lock);
987 pthread_cond_signal(&work->cv);
994 int iolog_flush(struct io_log *log, int wait)
996 struct tp_data *tdat = log->td->tp_data;
997 struct iolog_flush_data *data;
1000 data = malloc(sizeof(*data));
1006 sample_size = log->nr_samples * log_entry_sz(log);
1007 data->samples = malloc(sample_size);
1008 if (!data->samples) {
1013 memcpy(data->samples, log->log, sample_size);
1014 data->nr_samples = log->nr_samples;
1015 data->work.fn = gz_work;
1016 log->nr_samples = 0;
1019 pthread_mutex_init(&data->work.lock, NULL);
1020 pthread_cond_init(&data->work.cv, NULL);
1021 data->work.wait = 1;
1023 data->work.wait = 0;
1025 tp_queue_work(tdat, &data->work);
1028 pthread_mutex_lock(&data->work.lock);
1029 while (!data->work.done)
1030 pthread_cond_wait(&data->work.cv, &data->work.lock);
1031 pthread_mutex_unlock(&data->work.lock);
1040 int iolog_flush(struct io_log *log, int wait)
1047 static int write_iops_log(struct thread_data *td, int try)
1049 struct io_log *log = td->iops_log;
1054 return finish_log(td, log, try);
1057 static int write_slat_log(struct thread_data *td, int try)
1059 struct io_log *log = td->slat_log;
1064 return finish_log(td, log, try);
1067 static int write_clat_log(struct thread_data *td, int try)
1069 struct io_log *log = td->clat_log;
1074 return finish_log(td, log, try);
1077 static int write_lat_log(struct thread_data *td, int try)
1079 struct io_log *log = td->lat_log;
1084 return finish_log(td, log, try);
1087 static int write_bandw_log(struct thread_data *td, int try)
1089 struct io_log *log = td->bw_log;
1094 return finish_log(td, log, try);
1109 int (*fn)(struct thread_data *, int);
1112 static struct log_type log_types[] = {
1114 .mask = BW_LOG_MASK,
1115 .fn = write_bandw_log,
1118 .mask = LAT_LOG_MASK,
1119 .fn = write_lat_log,
1122 .mask = SLAT_LOG_MASK,
1123 .fn = write_slat_log,
1126 .mask = CLAT_LOG_MASK,
1127 .fn = write_clat_log,
1130 .mask = IOPS_LOG_MASK,
1131 .fn = write_iops_log,
1135 void fio_writeout_logs(struct thread_data *td)
1137 unsigned int log_mask = 0;
1138 unsigned int log_left = ALL_LOG_NR;
1141 old_state = td_bump_runstate(td, TD_FINISHING);
1146 int prev_log_left = log_left;
1148 for (i = 0; i < ALL_LOG_NR && log_left; i++) {
1149 struct log_type *lt = &log_types[i];
1152 if (!(log_mask & lt->mask)) {
1153 ret = lt->fn(td, log_left != 1);
1156 log_mask |= lt->mask;
1161 if (prev_log_left == log_left)
1165 td_restore_runstate(td, old_state);