2 * Code related to writing an iolog of what a thread is doing, and to
3 * later read that back and replay
23 static const char iolog_ver2[] = "fio version 2 iolog";
27 struct iolog_compress {
28 struct flist_head list;
35 #define GZ_CHUNK 131072
37 static struct iolog_compress *get_new_chunk(unsigned int seq)
39 struct iolog_compress *c;
41 c = malloc(sizeof(*c));
42 INIT_FLIST_HEAD(&c->list);
43 c->buf = malloc(GZ_CHUNK);
50 static void free_chunk(struct iolog_compress *ic)
60 void queue_io_piece(struct thread_data *td, struct io_piece *ipo)
62 flist_add_tail(&ipo->list, &td->io_log_list);
63 td->total_io_size += ipo->len;
66 void log_io_u(struct thread_data *td, struct io_u *io_u)
68 const char *act[] = { "read", "write", "sync", "datasync",
69 "sync_file_range", "wait", "trim" };
71 assert(io_u->ddir <= 6);
73 if (!td->o.write_iolog_file)
76 fprintf(td->iolog_f, "%s %s %llu %lu\n", io_u->file->file_name,
77 act[io_u->ddir], io_u->offset,
81 void log_file(struct thread_data *td, struct fio_file *f,
82 enum file_log_act what)
84 const char *act[] = { "add", "open", "close" };
88 if (!td->o.write_iolog_file)
93 * this happens on the pre-open/close done before the job starts
98 fprintf(td->iolog_f, "%s %s\n", f->file_name, act[what]);
101 static void iolog_delay(struct thread_data *td, unsigned long delay)
103 unsigned long usec = utime_since_now(&td->last_issue);
104 unsigned long this_delay;
112 * less than 100 usec delay, just regard it as noise
117 while (delay && !td->terminate) {
119 if (this_delay > 500000)
122 usec_sleep(td, this_delay);
127 static int ipo_special(struct thread_data *td, struct io_piece *ipo)
135 if (ipo->ddir != DDIR_INVAL)
138 f = td->files[ipo->fileno];
140 switch (ipo->file_action) {
141 case FIO_LOG_OPEN_FILE:
142 ret = td_io_open_file(td, f);
145 td_verror(td, ret, "iolog open file");
147 case FIO_LOG_CLOSE_FILE:
148 td_io_close_file(td, f);
150 case FIO_LOG_UNLINK_FILE:
151 unlink(f->file_name);
154 log_err("fio: bad file action %d\n", ipo->file_action);
161 int read_iolog_get(struct thread_data *td, struct io_u *io_u)
163 struct io_piece *ipo;
164 unsigned long elapsed;
166 while (!flist_empty(&td->io_log_list)) {
169 ipo = flist_entry(td->io_log_list.next, struct io_piece, list);
170 flist_del(&ipo->list);
171 remove_trim_entry(td, ipo);
173 ret = ipo_special(td, ipo);
177 } else if (ret > 0) {
182 io_u->ddir = ipo->ddir;
183 if (ipo->ddir != DDIR_WAIT) {
184 io_u->offset = ipo->offset;
185 io_u->buflen = ipo->len;
186 io_u->file = td->files[ipo->fileno];
187 get_file(io_u->file);
188 dprint(FD_IO, "iolog: get %llu/%lu/%s\n", io_u->offset,
189 io_u->buflen, io_u->file->file_name);
191 iolog_delay(td, ipo->delay);
193 elapsed = mtime_since_genesis();
194 if (ipo->delay > elapsed)
195 usec_sleep(td, (ipo->delay - elapsed) * 1000);
200 if (io_u->ddir != DDIR_WAIT)
208 void prune_io_piece_log(struct thread_data *td)
210 struct io_piece *ipo;
213 while ((n = rb_first(&td->io_hist_tree)) != NULL) {
214 ipo = rb_entry(n, struct io_piece, rb_node);
215 rb_erase(n, &td->io_hist_tree);
216 remove_trim_entry(td, ipo);
221 while (!flist_empty(&td->io_hist_list)) {
222 ipo = flist_entry(td->io_hist_list.next, struct io_piece, list);
223 flist_del(&ipo->list);
224 remove_trim_entry(td, ipo);
231 * log a successful write, so we can unwind the log for verify
233 void log_io_piece(struct thread_data *td, struct io_u *io_u)
235 struct rb_node **p, *parent;
236 struct io_piece *ipo, *__ipo;
238 ipo = malloc(sizeof(struct io_piece));
240 ipo->file = io_u->file;
241 ipo->offset = io_u->offset;
242 ipo->len = io_u->buflen;
243 ipo->numberio = io_u->numberio;
244 ipo->flags = IP_F_IN_FLIGHT;
248 if (io_u_should_trim(td, io_u)) {
249 flist_add_tail(&ipo->trim_list, &td->trim_list);
254 * We don't need to sort the entries, if:
256 * Sequential writes, or
257 * Random writes that lay out the file as it goes along
259 * For both these cases, just reading back data in the order we
260 * wrote it out is the fastest.
262 * One exception is if we don't have a random map AND we are doing
263 * verifies, in that case we need to check for duplicate blocks and
264 * drop the old one, which we rely on the rb insert/lookup for
267 if (((!td->o.verifysort) || !td_random(td) || !td->o.overwrite) &&
268 (file_randommap(td, ipo->file) || td->o.verify == VERIFY_NONE)) {
269 INIT_FLIST_HEAD(&ipo->list);
270 flist_add_tail(&ipo->list, &td->io_hist_list);
271 ipo->flags |= IP_F_ONLIST;
276 RB_CLEAR_NODE(&ipo->rb_node);
279 * Sort the entry into the verification list
282 p = &td->io_hist_tree.rb_node;
287 __ipo = rb_entry(parent, struct io_piece, rb_node);
288 if (ipo->file < __ipo->file)
290 else if (ipo->file > __ipo->file)
292 else if (ipo->offset < __ipo->offset)
294 else if (ipo->offset > __ipo->offset)
297 dprint(FD_IO, "iolog: overlap %llu/%lu, %llu/%lu",
298 __ipo->offset, __ipo->len,
299 ipo->offset, ipo->len);
301 rb_erase(parent, &td->io_hist_tree);
302 remove_trim_entry(td, __ipo);
308 rb_link_node(&ipo->rb_node, parent, p);
309 rb_insert_color(&ipo->rb_node, &td->io_hist_tree);
310 ipo->flags |= IP_F_ONRB;
314 void unlog_io_piece(struct thread_data *td, struct io_u *io_u)
316 struct io_piece *ipo = io_u->ipo;
321 if (ipo->flags & IP_F_ONRB)
322 rb_erase(&ipo->rb_node, &td->io_hist_tree);
323 else if (ipo->flags & IP_F_ONLIST)
324 flist_del(&ipo->list);
331 void trim_io_piece(struct thread_data *td, struct io_u *io_u)
333 struct io_piece *ipo = io_u->ipo;
338 ipo->len = io_u->xfer_buflen - io_u->resid;
341 void write_iolog_close(struct thread_data *td)
347 td->iolog_buf = NULL;
351 * Read version 2 iolog data. It is enhanced to include per-file logging,
354 static int read_iolog2(struct thread_data *td, FILE *f)
356 unsigned long long offset;
358 int reads, writes, waits, fileno = 0, file_action = 0; /* stupid gcc */
363 free_release_files(td);
366 * Read in the read iolog and store it, reuse the infrastructure
367 * for doing verifications.
370 fname = malloc(256+16);
371 act = malloc(256+16);
373 reads = writes = waits = 0;
374 while ((p = fgets(str, 4096, f)) != NULL) {
375 struct io_piece *ipo;
378 r = sscanf(p, "%256s %256s %llu %u", fname, act, &offset,
384 if (!strcmp(act, "wait"))
386 else if (!strcmp(act, "read"))
388 else if (!strcmp(act, "write"))
390 else if (!strcmp(act, "sync"))
392 else if (!strcmp(act, "datasync"))
394 else if (!strcmp(act, "trim"))
397 log_err("fio: bad iolog file action: %s\n",
401 fileno = get_fileno(td, fname);
404 if (!strcmp(act, "add")) {
405 fileno = add_file(td, fname, 0, 1);
406 file_action = FIO_LOG_ADD_FILE;
408 } else if (!strcmp(act, "open")) {
409 fileno = get_fileno(td, fname);
410 file_action = FIO_LOG_OPEN_FILE;
411 } else if (!strcmp(act, "close")) {
412 fileno = get_fileno(td, fname);
413 file_action = FIO_LOG_CLOSE_FILE;
415 log_err("fio: bad iolog file action: %s\n",
420 log_err("bad iolog2: %s", p);
426 else if (rw == DDIR_WRITE) {
428 * Don't add a write for ro mode
433 } else if (rw == DDIR_WAIT) {
435 } else if (rw == DDIR_INVAL) {
436 } else if (!ddir_sync(rw)) {
437 log_err("bad ddir: %d\n", rw);
444 ipo = malloc(sizeof(*ipo));
447 if (rw == DDIR_WAIT) {
450 ipo->offset = offset;
452 if (rw != DDIR_INVAL && bytes > td->o.max_bs[rw])
453 td->o.max_bs[rw] = bytes;
454 ipo->fileno = fileno;
455 ipo->file_action = file_action;
459 queue_io_piece(td, ipo);
466 if (writes && read_only) {
467 log_err("fio: <%s> skips replay of %d writes due to"
468 " read-only\n", td->o.name, writes);
472 if (!reads && !writes && !waits)
474 else if (reads && !writes)
475 td->o.td_ddir = TD_DDIR_READ;
476 else if (!reads && writes)
477 td->o.td_ddir = TD_DDIR_WRITE;
479 td->o.td_ddir = TD_DDIR_RW;
485 * open iolog, check version, and call appropriate parser
487 static int init_iolog_read(struct thread_data *td)
489 char buffer[256], *p;
493 f = fopen(td->o.read_iolog_file, "r");
495 perror("fopen read iolog");
499 p = fgets(buffer, sizeof(buffer), f);
501 td_verror(td, errno, "iolog read");
502 log_err("fio: unable to read iolog\n");
508 * version 2 of the iolog stores a specific string as the
509 * first line, check for that
511 if (!strncmp(iolog_ver2, buffer, strlen(iolog_ver2)))
512 ret = read_iolog2(td, f);
514 log_err("fio: iolog version 1 is no longer supported\n");
523 * Set up a log for storing io patterns.
525 static int init_iolog_write(struct thread_data *td)
531 f = fopen(td->o.write_iolog_file, "a");
533 perror("fopen write iolog");
538 * That's it for writing, setup a log buffer and we're done.
541 td->iolog_buf = malloc(8192);
542 setvbuf(f, td->iolog_buf, _IOFBF, 8192);
545 * write our version line
547 if (fprintf(f, "%s\n", iolog_ver2) < 0) {
548 perror("iolog init\n");
553 * add all known files
555 for_each_file(td, ff, i)
556 log_file(td, ff, FIO_LOG_ADD_FILE);
561 int init_iolog(struct thread_data *td)
565 if (td->o.read_iolog_file) {
569 * Check if it's a blktrace file and load that if possible.
570 * Otherwise assume it's a normal log file and load that.
572 if (is_blktrace(td->o.read_iolog_file, &need_swap))
573 ret = load_blktrace(td, td->o.read_iolog_file, need_swap);
575 ret = init_iolog_read(td);
576 } else if (td->o.write_iolog_file)
577 ret = init_iolog_write(td);
580 td_verror(td, EINVAL, "failed initializing iolog");
585 void setup_log(struct io_log **log, struct log_params *p,
586 const char *filename)
588 struct io_log *l = malloc(sizeof(*l));
590 memset(l, 0, sizeof(*l));
592 l->max_samples = 1024;
593 l->log_type = p->log_type;
594 l->log_offset = p->log_offset;
595 l->log_gz = p->log_gz;
596 l->log_gz_store = p->log_gz_store;
597 l->log = malloc(l->max_samples * log_entry_sz(l));
598 l->avg_msec = p->avg_msec;
599 l->filename = strdup(filename);
603 l->log_ddir_mask = 0x80000000;
605 INIT_FLIST_HEAD(&l->chunk_list);
607 if (l->log_gz && !p->td)
609 else if (l->log_gz) {
610 pthread_mutex_init(&l->chunk_lock, NULL);
611 p->td->flags |= TD_F_COMPRESS_LOG;
617 #ifdef CONFIG_SETVBUF
618 static void *set_file_buffer(FILE *f)
620 size_t size = 1048576;
624 setvbuf(f, buf, _IOFBF, size);
628 static void clear_file_buffer(void *buf)
633 static void *set_file_buffer(FILE *f)
638 static void clear_file_buffer(void *buf)
643 void free_log(struct io_log *log)
650 static void flush_samples(FILE *f, void *samples, uint64_t sample_size)
654 uint64_t i, nr_samples;
659 s = __get_sample(samples, 0, 0);
660 if (s->__ddir & 0x80000000)
665 nr_samples = sample_size / __log_entry_sz(log_offset);
667 for (i = 0; i < nr_samples; i++) {
668 s = __get_sample(samples, log_offset, i);
671 fprintf(f, "%lu, %lu, %u, %u\n",
672 (unsigned long) s->time,
673 (unsigned long) s->val,
674 io_sample_ddir(s), s->bs);
676 struct io_sample_offset *so = (void *) s;
678 fprintf(f, "%lu, %lu, %u, %u, %llu\n",
679 (unsigned long) s->time,
680 (unsigned long) s->val,
681 io_sample_ddir(s), s->bs,
682 (unsigned long long) so->offset);
688 static int z_stream_init(z_stream *stream, int gz_hdr)
692 stream->zalloc = Z_NULL;
693 stream->zfree = Z_NULL;
694 stream->opaque = Z_NULL;
695 stream->next_in = Z_NULL;
700 if (inflateInit2(stream, wbits) != Z_OK)
706 struct flush_chunk_iter {
714 static void finish_chunk(z_stream *stream, FILE *f,
715 struct flush_chunk_iter *iter)
719 ret = inflateEnd(stream);
721 log_err("fio: failed to end log inflation (%d)\n", ret);
723 flush_samples(f, iter->buf, iter->buf_used);
726 iter->buf_size = iter->buf_used = 0;
729 static int flush_chunk(struct iolog_compress *ic, int gz_hdr, FILE *f,
730 z_stream *stream, struct flush_chunk_iter *iter)
732 if (ic->seq != iter->seq) {
734 finish_chunk(stream, f, iter);
736 z_stream_init(stream, gz_hdr);
740 stream->avail_in = ic->len;
741 stream->next_in = ic->buf;
743 if (!iter->buf_size) {
744 iter->buf_size = iter->chunk_sz;
745 iter->buf = malloc(iter->buf_size);
748 while (stream->avail_in) {
749 size_t this_out = iter->buf_size - iter->buf_used;
752 stream->avail_out = this_out;
753 stream->next_out = iter->buf + iter->buf_used;
755 err = inflate(stream, Z_NO_FLUSH);
757 log_err("fio: failed inflating log: %d\n", err);
761 iter->buf_used += this_out - stream->avail_out;
763 if (!stream->avail_out) {
764 iter->buf_size += iter->chunk_sz;
765 iter->buf = realloc(iter->buf, iter->buf_size);
769 if (err == Z_STREAM_END)
777 static void flush_gz_chunks(struct io_log *log, FILE *f)
779 struct flush_chunk_iter iter = { .chunk_sz = log->log_gz, };
780 struct flist_head *node;
783 while (!flist_empty(&log->chunk_list)) {
784 struct iolog_compress *ic;
786 node = log->chunk_list.next;
787 ic = flist_entry(node, struct iolog_compress, list);
788 flist_del(&ic->list);
790 if (log->log_gz_store) {
791 fwrite(ic->buf, ic->len, 1, f);
794 flush_chunk(ic, log->log_gz_store, f, &stream, &iter);
798 finish_chunk(&stream, f, &iter);
803 int iolog_file_inflate(const char *file)
805 struct flush_chunk_iter iter = { .chunk_sz = 64 * 1024 * 1024, };
806 struct iolog_compress ic;
812 if (stat(file, &sb) < 0) {
817 f = fopen(file, "r");
823 ic.buf = malloc(sb.st_size);
828 ret = fread(ic.buf, ic.len, 1, f);
833 } else if (ret != 1) {
834 log_err("fio: short read on reading log\n");
841 flush_chunk(&ic, 1, stdout, &stream, &iter);
844 finish_chunk(&stream, stdout, &iter);
854 static void flush_gz_chunks(struct io_log *log, FILE *f)
860 void flush_log(struct io_log *log)
865 f = fopen(log->filename, "w");
871 buf = set_file_buffer(f);
873 flush_gz_chunks(log, f);
875 flush_samples(f, log->log, log->nr_samples * log_entry_sz(log));
878 clear_file_buffer(buf);
881 static int finish_log(struct thread_data *td, struct io_log *log, int trylock)
887 if (fio_trylock_file(log->filename))
890 fio_lock_file(log->filename);
892 if (td->client_type == FIO_CLIENT_TYPE_GUI)
893 fio_send_iolog(td, log, log->filename);
897 fio_unlock_file(log->filename);
904 struct iolog_flush_data {
911 static int gz_work(struct tp_work *work)
913 struct iolog_flush_data *data;
914 struct iolog_compress *c;
915 struct flist_head list;
921 INIT_FLIST_HEAD(&list);
923 data = container_of(work, struct iolog_flush_data, work);
925 stream.zalloc = Z_NULL;
926 stream.zfree = Z_NULL;
927 stream.opaque = Z_NULL;
929 ret = deflateInit(&stream, Z_DEFAULT_COMPRESSION);
931 log_err("fio: failed to init gz stream\n");
935 seq = ++data->log->chunk_seq;
937 stream.next_in = (void *) data->samples;
938 stream.avail_in = data->nr_samples * log_entry_sz(data->log);
941 c = get_new_chunk(seq);
942 stream.avail_out = GZ_CHUNK;
943 stream.next_out = c->buf;
944 ret = deflate(&stream, Z_NO_FLUSH);
946 log_err("fio: deflate log (%d)\n", ret);
950 c->len = GZ_CHUNK - stream.avail_out;
951 flist_add_tail(&c->list, &list);
953 } while (stream.avail_in);
955 stream.next_out = c->buf + c->len;
956 stream.avail_out = GZ_CHUNK - c->len;
958 ret = deflate(&stream, Z_FINISH);
959 if (ret == Z_STREAM_END)
960 c->len = GZ_CHUNK - stream.avail_out;
963 c = get_new_chunk(seq);
964 stream.avail_out = GZ_CHUNK;
965 stream.next_out = c->buf;
966 ret = deflate(&stream, Z_FINISH);
967 c->len = GZ_CHUNK - stream.avail_out;
968 flist_add_tail(&c->list, &list);
969 } while (ret != Z_STREAM_END);
972 ret = deflateEnd(&stream);
974 log_err("fio: deflateEnd %d\n", ret);
978 if (!flist_empty(&list)) {
979 pthread_mutex_lock(&data->log->chunk_lock);
980 flist_splice_tail(&list, &data->log->chunk_list);
981 pthread_mutex_unlock(&data->log->chunk_lock);
986 pthread_cond_signal(&work->cv);
993 int iolog_flush(struct io_log *log, int wait)
995 struct tp_data *tdat = log->td->tp_data;
996 struct iolog_flush_data *data;
999 data = malloc(sizeof(*data));
1005 sample_size = log->nr_samples * log_entry_sz(log);
1006 data->samples = malloc(sample_size);
1007 if (!data->samples) {
1012 memcpy(data->samples, log->log, sample_size);
1013 data->nr_samples = log->nr_samples;
1014 data->work.fn = gz_work;
1015 log->nr_samples = 0;
1018 pthread_mutex_init(&data->work.lock, NULL);
1019 pthread_cond_init(&data->work.cv, NULL);
1020 data->work.wait = 1;
1022 data->work.wait = 0;
1024 tp_queue_work(tdat, &data->work);
1027 pthread_mutex_lock(&data->work.lock);
1028 while (!data->work.done)
1029 pthread_cond_wait(&data->work.cv, &data->work.lock);
1030 pthread_mutex_unlock(&data->work.lock);
1039 int iolog_flush(struct io_log *log, int wait)
1046 static int write_iops_log(struct thread_data *td, int try)
1048 struct io_log *log = td->iops_log;
1053 return finish_log(td, log, try);
1056 static int write_slat_log(struct thread_data *td, int try)
1058 struct io_log *log = td->slat_log;
1063 return finish_log(td, log, try);
1066 static int write_clat_log(struct thread_data *td, int try)
1068 struct io_log *log = td->clat_log;
1073 return finish_log(td, log, try);
1076 static int write_lat_log(struct thread_data *td, int try)
1078 struct io_log *log = td->lat_log;
1083 return finish_log(td, log, try);
1086 static int write_bandw_log(struct thread_data *td, int try)
1088 struct io_log *log = td->bw_log;
1093 return finish_log(td, log, try);
1108 int (*fn)(struct thread_data *, int);
1111 static struct log_type log_types[] = {
1113 .mask = BW_LOG_MASK,
1114 .fn = write_bandw_log,
1117 .mask = LAT_LOG_MASK,
1118 .fn = write_lat_log,
1121 .mask = SLAT_LOG_MASK,
1122 .fn = write_slat_log,
1125 .mask = CLAT_LOG_MASK,
1126 .fn = write_clat_log,
1129 .mask = IOPS_LOG_MASK,
1130 .fn = write_iops_log,
1134 void fio_writeout_logs(struct thread_data *td)
1136 unsigned int log_mask = 0;
1137 unsigned int log_left = ALL_LOG_NR;
1140 old_state = td_bump_runstate(td, TD_FINISHING);
1145 int prev_log_left = log_left;
1147 for (i = 0; i < ALL_LOG_NR && log_left; i++) {
1148 struct log_type *lt = &log_types[i];
1151 if (!(log_mask & lt->mask)) {
1152 ret = lt->fn(td, log_left != 1);
1155 log_mask |= lt->mask;
1160 if (prev_log_left == log_left)
1164 td_restore_runstate(td, old_state);