2 * Code related to writing an iolog of what a thread is doing, and to
3 * later read that back and replay
23 static const char iolog_ver2[] = "fio version 2 iolog";
27 struct iolog_compress {
28 struct flist_head list;
35 #define GZ_CHUNK 131072
37 static struct iolog_compress *get_new_chunk(unsigned int seq)
39 struct iolog_compress *c;
41 c = malloc(sizeof(*c));
42 INIT_FLIST_HEAD(&c->list);
43 c->buf = malloc(GZ_CHUNK);
50 static void free_chunk(struct iolog_compress *ic)
60 void queue_io_piece(struct thread_data *td, struct io_piece *ipo)
62 flist_add_tail(&ipo->list, &td->io_log_list);
63 td->total_io_size += ipo->len;
66 void log_io_u(struct thread_data *td, struct io_u *io_u)
68 const char *act[] = { "read", "write", "sync", "datasync",
69 "sync_file_range", "wait", "trim" };
71 assert(io_u->ddir <= 6);
73 if (!td->o.write_iolog_file)
76 fprintf(td->iolog_f, "%s %s %llu %lu\n", io_u->file->file_name,
77 act[io_u->ddir], io_u->offset,
81 void log_file(struct thread_data *td, struct fio_file *f,
82 enum file_log_act what)
84 const char *act[] = { "add", "open", "close" };
88 if (!td->o.write_iolog_file)
93 * this happens on the pre-open/close done before the job starts
98 fprintf(td->iolog_f, "%s %s\n", f->file_name, act[what]);
101 static void iolog_delay(struct thread_data *td, unsigned long delay)
103 unsigned long usec = utime_since_now(&td->last_issue);
104 unsigned long this_delay;
112 * less than 100 usec delay, just regard it as noise
117 while (delay && !td->terminate) {
119 if (this_delay > 500000)
122 usec_sleep(td, this_delay);
127 static int ipo_special(struct thread_data *td, struct io_piece *ipo)
135 if (ipo->ddir != DDIR_INVAL)
138 f = td->files[ipo->fileno];
140 switch (ipo->file_action) {
141 case FIO_LOG_OPEN_FILE:
142 ret = td_io_open_file(td, f);
145 td_verror(td, ret, "iolog open file");
147 case FIO_LOG_CLOSE_FILE:
148 td_io_close_file(td, f);
150 case FIO_LOG_UNLINK_FILE:
151 unlink(f->file_name);
154 log_err("fio: bad file action %d\n", ipo->file_action);
161 int read_iolog_get(struct thread_data *td, struct io_u *io_u)
163 struct io_piece *ipo;
164 unsigned long elapsed;
166 while (!flist_empty(&td->io_log_list)) {
169 ipo = flist_entry(td->io_log_list.next, struct io_piece, list);
170 flist_del(&ipo->list);
171 remove_trim_entry(td, ipo);
173 ret = ipo_special(td, ipo);
177 } else if (ret > 0) {
182 io_u->ddir = ipo->ddir;
183 if (ipo->ddir != DDIR_WAIT) {
184 io_u->offset = ipo->offset;
185 io_u->buflen = ipo->len;
186 io_u->file = td->files[ipo->fileno];
187 get_file(io_u->file);
188 dprint(FD_IO, "iolog: get %llu/%lu/%s\n", io_u->offset,
189 io_u->buflen, io_u->file->file_name);
191 iolog_delay(td, ipo->delay);
193 elapsed = mtime_since_genesis();
194 if (ipo->delay > elapsed)
195 usec_sleep(td, (ipo->delay - elapsed) * 1000);
200 if (io_u->ddir != DDIR_WAIT)
208 void prune_io_piece_log(struct thread_data *td)
210 struct io_piece *ipo;
213 while ((n = rb_first(&td->io_hist_tree)) != NULL) {
214 ipo = rb_entry(n, struct io_piece, rb_node);
215 rb_erase(n, &td->io_hist_tree);
216 remove_trim_entry(td, ipo);
221 while (!flist_empty(&td->io_hist_list)) {
222 ipo = flist_entry(td->io_hist_list.next, struct io_piece, list);
223 flist_del(&ipo->list);
224 remove_trim_entry(td, ipo);
231 * log a successful write, so we can unwind the log for verify
233 void log_io_piece(struct thread_data *td, struct io_u *io_u)
235 struct rb_node **p, *parent;
236 struct io_piece *ipo, *__ipo;
238 ipo = malloc(sizeof(struct io_piece));
240 ipo->file = io_u->file;
241 ipo->offset = io_u->offset;
242 ipo->len = io_u->buflen;
243 ipo->numberio = io_u->numberio;
244 ipo->flags = IP_F_IN_FLIGHT;
248 if (io_u_should_trim(td, io_u)) {
249 flist_add_tail(&ipo->trim_list, &td->trim_list);
254 * We don't need to sort the entries, if:
256 * Sequential writes, or
257 * Random writes that lay out the file as it goes along
259 * For both these cases, just reading back data in the order we
260 * wrote it out is the fastest.
262 * One exception is if we don't have a random map AND we are doing
263 * verifies, in that case we need to check for duplicate blocks and
264 * drop the old one, which we rely on the rb insert/lookup for
267 if (((!td->o.verifysort) || !td_random(td) || !td->o.overwrite) &&
268 (file_randommap(td, ipo->file) || td->o.verify == VERIFY_NONE)) {
269 INIT_FLIST_HEAD(&ipo->list);
270 flist_add_tail(&ipo->list, &td->io_hist_list);
271 ipo->flags |= IP_F_ONLIST;
276 RB_CLEAR_NODE(&ipo->rb_node);
279 * Sort the entry into the verification list
282 p = &td->io_hist_tree.rb_node;
287 __ipo = rb_entry(parent, struct io_piece, rb_node);
288 if (ipo->file < __ipo->file)
290 else if (ipo->file > __ipo->file)
292 else if (ipo->offset < __ipo->offset)
294 else if (ipo->offset > __ipo->offset)
297 dprint(FD_IO, "iolog: overlap %llu/%lu, %llu/%lu",
298 __ipo->offset, __ipo->len,
299 ipo->offset, ipo->len);
301 rb_erase(parent, &td->io_hist_tree);
302 remove_trim_entry(td, __ipo);
308 rb_link_node(&ipo->rb_node, parent, p);
309 rb_insert_color(&ipo->rb_node, &td->io_hist_tree);
310 ipo->flags |= IP_F_ONRB;
314 void unlog_io_piece(struct thread_data *td, struct io_u *io_u)
316 struct io_piece *ipo = io_u->ipo;
321 if (ipo->flags & IP_F_ONRB)
322 rb_erase(&ipo->rb_node, &td->io_hist_tree);
323 else if (ipo->flags & IP_F_ONLIST)
324 flist_del(&ipo->list);
331 void trim_io_piece(struct thread_data *td, struct io_u *io_u)
333 struct io_piece *ipo = io_u->ipo;
338 ipo->len = io_u->xfer_buflen - io_u->resid;
341 void write_iolog_close(struct thread_data *td)
347 td->iolog_buf = NULL;
351 * Read version 2 iolog data. It is enhanced to include per-file logging,
354 static int read_iolog2(struct thread_data *td, FILE *f)
356 unsigned long long offset;
358 int reads, writes, waits, fileno = 0, file_action = 0; /* stupid gcc */
363 free_release_files(td);
366 * Read in the read iolog and store it, reuse the infrastructure
367 * for doing verifications.
370 fname = malloc(256+16);
371 act = malloc(256+16);
373 reads = writes = waits = 0;
374 while ((p = fgets(str, 4096, f)) != NULL) {
375 struct io_piece *ipo;
378 r = sscanf(p, "%256s %256s %llu %u", fname, act, &offset,
384 if (!strcmp(act, "wait"))
386 else if (!strcmp(act, "read"))
388 else if (!strcmp(act, "write"))
390 else if (!strcmp(act, "sync"))
392 else if (!strcmp(act, "datasync"))
394 else if (!strcmp(act, "trim"))
397 log_err("fio: bad iolog file action: %s\n",
401 fileno = get_fileno(td, fname);
404 if (!strcmp(act, "add")) {
405 fileno = add_file(td, fname, 0, 1);
406 file_action = FIO_LOG_ADD_FILE;
408 } else if (!strcmp(act, "open")) {
409 fileno = get_fileno(td, fname);
410 file_action = FIO_LOG_OPEN_FILE;
411 } else if (!strcmp(act, "close")) {
412 fileno = get_fileno(td, fname);
413 file_action = FIO_LOG_CLOSE_FILE;
415 log_err("fio: bad iolog file action: %s\n",
420 log_err("bad iolog2: %s", p);
426 else if (rw == DDIR_WRITE) {
428 * Don't add a write for ro mode
433 } else if (rw == DDIR_WAIT) {
435 } else if (rw == DDIR_INVAL) {
436 } else if (!ddir_sync(rw)) {
437 log_err("bad ddir: %d\n", rw);
444 ipo = malloc(sizeof(*ipo));
447 if (rw == DDIR_WAIT) {
450 ipo->offset = offset;
452 if (rw != DDIR_INVAL && bytes > td->o.max_bs[rw])
453 td->o.max_bs[rw] = bytes;
454 ipo->fileno = fileno;
455 ipo->file_action = file_action;
459 queue_io_piece(td, ipo);
466 if (writes && read_only) {
467 log_err("fio: <%s> skips replay of %d writes due to"
468 " read-only\n", td->o.name, writes);
472 if (!reads && !writes && !waits)
474 else if (reads && !writes)
475 td->o.td_ddir = TD_DDIR_READ;
476 else if (!reads && writes)
477 td->o.td_ddir = TD_DDIR_WRITE;
479 td->o.td_ddir = TD_DDIR_RW;
485 * open iolog, check version, and call appropriate parser
487 static int init_iolog_read(struct thread_data *td)
489 char buffer[256], *p;
493 f = fopen(td->o.read_iolog_file, "r");
495 perror("fopen read iolog");
499 p = fgets(buffer, sizeof(buffer), f);
501 td_verror(td, errno, "iolog read");
502 log_err("fio: unable to read iolog\n");
508 * version 2 of the iolog stores a specific string as the
509 * first line, check for that
511 if (!strncmp(iolog_ver2, buffer, strlen(iolog_ver2)))
512 ret = read_iolog2(td, f);
514 log_err("fio: iolog version 1 is no longer supported\n");
523 * Set up a log for storing io patterns.
525 static int init_iolog_write(struct thread_data *td)
531 f = fopen(td->o.write_iolog_file, "a");
533 perror("fopen write iolog");
538 * That's it for writing, setup a log buffer and we're done.
541 td->iolog_buf = malloc(8192);
542 setvbuf(f, td->iolog_buf, _IOFBF, 8192);
545 * write our version line
547 if (fprintf(f, "%s\n", iolog_ver2) < 0) {
548 perror("iolog init\n");
553 * add all known files
555 for_each_file(td, ff, i)
556 log_file(td, ff, FIO_LOG_ADD_FILE);
561 int init_iolog(struct thread_data *td)
565 if (td->o.read_iolog_file) {
569 * Check if it's a blktrace file and load that if possible.
570 * Otherwise assume it's a normal log file and load that.
572 if (is_blktrace(td->o.read_iolog_file, &need_swap))
573 ret = load_blktrace(td, td->o.read_iolog_file, need_swap);
575 ret = init_iolog_read(td);
576 } else if (td->o.write_iolog_file)
577 ret = init_iolog_write(td);
580 td_verror(td, EINVAL, "failed initializing iolog");
585 void setup_log(struct io_log **log, struct log_params *p,
586 const char *filename)
588 struct io_log *l = malloc(sizeof(*l));
590 memset(l, 0, sizeof(*l));
592 l->max_samples = 1024;
593 l->log_type = p->log_type;
594 l->log_offset = p->log_offset;
595 l->log_gz = p->log_gz;
596 l->log_gz_store = p->log_gz_store;
597 l->log = malloc(l->max_samples * log_entry_sz(l));
598 l->avg_msec = p->avg_msec;
599 l->filename = strdup(filename);
603 l->log_ddir_mask = 0x80000000;
605 INIT_FLIST_HEAD(&l->chunk_list);
607 if (l->log_gz && !p->td)
609 else if (l->log_gz) {
610 pthread_mutex_init(&l->chunk_lock, NULL);
611 p->td->flags |= TD_F_COMPRESS_LOG;
617 #ifdef CONFIG_SETVBUF
618 static void *set_file_buffer(FILE *f)
620 size_t size = 1048576;
624 setvbuf(f, buf, _IOFBF, size);
628 static void clear_file_buffer(void *buf)
633 static void *set_file_buffer(FILE *f)
638 static void clear_file_buffer(void *buf)
643 void free_log(struct io_log *log)
650 static void flush_samples(FILE *f, void *samples, uint64_t sample_size)
654 uint64_t i, nr_samples;
659 s = __get_sample(samples, 0, 0);
660 if (s->__ddir & 0x80000000)
665 nr_samples = sample_size / __log_entry_sz(log_offset);
667 for (i = 0; i < nr_samples; i++) {
668 s = __get_sample(samples, log_offset, i);
671 fprintf(f, "%lu, %lu, %u, %u\n",
672 (unsigned long) s->time,
673 (unsigned long) s->val,
674 io_sample_ddir(s), s->bs);
676 struct io_sample_offset *so = (void *) s;
678 fprintf(f, "%lu, %lu, %u, %u, %llu\n",
679 (unsigned long) s->time,
680 (unsigned long) s->val,
681 io_sample_ddir(s), s->bs,
682 (unsigned long long) so->offset);
688 static int z_stream_init(z_stream *stream, int gz_hdr)
692 stream->zalloc = Z_NULL;
693 stream->zfree = Z_NULL;
694 stream->opaque = Z_NULL;
695 stream->next_in = Z_NULL;
700 if (inflateInit2(stream, wbits) != Z_OK)
706 struct flush_chunk_iter {
714 static void finish_chunk(z_stream *stream, FILE *f,
715 struct flush_chunk_iter *iter)
719 ret = inflateEnd(stream);
721 log_err("fio: failed to end log inflation (%d)\n", ret);
723 flush_samples(f, iter->buf, iter->buf_used);
726 iter->buf_size = iter->buf_used = 0;
729 static int flush_chunk(struct iolog_compress *ic, int gz_hdr, FILE *f,
730 z_stream *stream, struct flush_chunk_iter *iter)
732 if (ic->seq != iter->seq) {
734 finish_chunk(stream, f, iter);
736 z_stream_init(stream, gz_hdr);
740 stream->avail_in = ic->len;
741 stream->next_in = ic->buf;
743 if (!iter->buf_size) {
744 iter->buf_size = iter->chunk_sz;
745 iter->buf = malloc(iter->buf_size);
748 while (stream->avail_in) {
749 size_t this_out = iter->buf_size - iter->buf_used;
752 stream->avail_out = this_out;
753 stream->next_out = iter->buf + iter->buf_used;
755 err = inflate(stream, Z_NO_FLUSH);
757 log_err("fio: failed inflating log: %d\n", err);
761 iter->buf_used += this_out - stream->avail_out;
763 if (!stream->avail_out) {
764 iter->buf_size += iter->chunk_sz;
765 iter->buf = realloc(iter->buf, iter->buf_size);
769 if (err == Z_STREAM_END)
777 static void flush_gz_chunks(struct io_log *log, FILE *f)
779 struct flush_chunk_iter iter = { .chunk_sz = log->log_gz, };
780 struct flist_head *node;
783 while (!flist_empty(&log->chunk_list)) {
784 struct iolog_compress *ic;
786 node = log->chunk_list.next;
787 ic = flist_entry(node, struct iolog_compress, list);
788 flist_del(&ic->list);
790 if (log->log_gz_store) {
791 fwrite(ic->buf, ic->len, 1, f);
794 flush_chunk(ic, log->log_gz_store, f, &stream, &iter);
798 finish_chunk(&stream, f, &iter);
803 int iolog_file_inflate(const char *file)
805 struct flush_chunk_iter iter = { .chunk_sz = 64 * 1024 * 1024, };
806 struct iolog_compress ic;
812 if (stat(file, &sb) < 0) {
817 f = fopen(file, "r");
823 ic.buf = malloc(sb.st_size);
828 ret = fread(ic.buf, ic.len, 1, f);
833 } else if (ret != 1) {
834 log_err("fio: short read on reading log\n");
841 flush_chunk(&ic, 1, stdout, &stream, &iter);
844 finish_chunk(&stream, stdout, &iter);
854 static void flush_gz_chunks(struct io_log *log, FILE *f)
860 void flush_log(struct io_log *log)
865 f = fopen(log->filename, "w");
871 buf = set_file_buffer(f);
873 flush_gz_chunks(log, f);
875 flush_samples(f, log->log, log->nr_samples * log_entry_sz(log));
878 clear_file_buffer(buf);
881 static int finish_log(struct thread_data *td, struct io_log *log, int trylock)
887 if (fio_trylock_file(log->filename))
890 fio_lock_file(log->filename);
892 if (td->client_type == FIO_CLIENT_TYPE_GUI)
893 fio_send_iolog(td, log, log->filename);
897 fio_unlock_file(log->filename);
904 struct iolog_flush_data {
911 static int gz_work(struct tp_work *work)
913 struct iolog_flush_data *data;
914 struct iolog_compress *c;
915 struct flist_head list;
921 INIT_FLIST_HEAD(&list);
923 data = container_of(work, struct iolog_flush_data, work);
925 stream.zalloc = Z_NULL;
926 stream.zfree = Z_NULL;
927 stream.opaque = Z_NULL;
930 * Store gz header if storing to a file
934 if (data->log->log_gz_store)
937 ret = deflateInit2(&stream, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
938 31, 8, Z_DEFAULT_STRATEGY);
940 ret = deflateInit(&stream, Z_DEFAULT_COMPRESSION);
943 log_err("fio: failed to init gz stream\n");
947 seq = ++data->log->chunk_seq;
949 stream.next_in = (void *) data->samples;
950 stream.avail_in = data->nr_samples * log_entry_sz(data->log);
953 c = get_new_chunk(seq);
954 stream.avail_out = GZ_CHUNK;
955 stream.next_out = c->buf;
956 ret = deflate(&stream, Z_NO_FLUSH);
958 log_err("fio: deflate log (%d)\n", ret);
962 c->len = GZ_CHUNK - stream.avail_out;
963 flist_add_tail(&c->list, &list);
965 } while (stream.avail_in);
967 stream.next_out = c->buf + c->len;
968 stream.avail_out = GZ_CHUNK - c->len;
970 ret = deflate(&stream, Z_FINISH);
971 if (ret == Z_STREAM_END)
972 c->len = GZ_CHUNK - stream.avail_out;
975 c = get_new_chunk(seq);
976 stream.avail_out = GZ_CHUNK;
977 stream.next_out = c->buf;
978 ret = deflate(&stream, Z_FINISH);
979 c->len = GZ_CHUNK - stream.avail_out;
980 flist_add_tail(&c->list, &list);
981 } while (ret != Z_STREAM_END);
984 ret = deflateEnd(&stream);
986 log_err("fio: deflateEnd %d\n", ret);
990 if (!flist_empty(&list)) {
991 pthread_mutex_lock(&data->log->chunk_lock);
992 flist_splice_tail(&list, &data->log->chunk_list);
993 pthread_mutex_unlock(&data->log->chunk_lock);
998 pthread_cond_signal(&work->cv);
1005 int iolog_flush(struct io_log *log, int wait)
1007 struct tp_data *tdat = log->td->tp_data;
1008 struct iolog_flush_data *data;
1011 data = malloc(sizeof(*data));
1017 sample_size = log->nr_samples * log_entry_sz(log);
1018 data->samples = malloc(sample_size);
1019 if (!data->samples) {
1024 memcpy(data->samples, log->log, sample_size);
1025 data->nr_samples = log->nr_samples;
1026 data->work.fn = gz_work;
1027 log->nr_samples = 0;
1030 pthread_mutex_init(&data->work.lock, NULL);
1031 pthread_cond_init(&data->work.cv, NULL);
1032 data->work.wait = 1;
1034 data->work.wait = 0;
1036 tp_queue_work(tdat, &data->work);
1039 pthread_mutex_lock(&data->work.lock);
1040 while (!data->work.done)
1041 pthread_cond_wait(&data->work.cv, &data->work.lock);
1042 pthread_mutex_unlock(&data->work.lock);
1051 int iolog_flush(struct io_log *log, int wait)
1058 static int write_iops_log(struct thread_data *td, int try)
1060 struct io_log *log = td->iops_log;
1065 return finish_log(td, log, try);
1068 static int write_slat_log(struct thread_data *td, int try)
1070 struct io_log *log = td->slat_log;
1075 return finish_log(td, log, try);
1078 static int write_clat_log(struct thread_data *td, int try)
1080 struct io_log *log = td->clat_log;
1085 return finish_log(td, log, try);
1088 static int write_lat_log(struct thread_data *td, int try)
1090 struct io_log *log = td->lat_log;
1095 return finish_log(td, log, try);
1098 static int write_bandw_log(struct thread_data *td, int try)
1100 struct io_log *log = td->bw_log;
1105 return finish_log(td, log, try);
1120 int (*fn)(struct thread_data *, int);
1123 static struct log_type log_types[] = {
1125 .mask = BW_LOG_MASK,
1126 .fn = write_bandw_log,
1129 .mask = LAT_LOG_MASK,
1130 .fn = write_lat_log,
1133 .mask = SLAT_LOG_MASK,
1134 .fn = write_slat_log,
1137 .mask = CLAT_LOG_MASK,
1138 .fn = write_clat_log,
1141 .mask = IOPS_LOG_MASK,
1142 .fn = write_iops_log,
1146 void fio_writeout_logs(struct thread_data *td)
1148 unsigned int log_mask = 0;
1149 unsigned int log_left = ALL_LOG_NR;
1152 old_state = td_bump_runstate(td, TD_FINISHING);
1157 int prev_log_left = log_left;
1159 for (i = 0; i < ALL_LOG_NR && log_left; i++) {
1160 struct log_type *lt = &log_types[i];
1163 if (!(log_mask & lt->mask)) {
1164 ret = lt->fn(td, log_left != 1);
1167 log_mask |= lt->mask;
1172 if (prev_log_left == log_left)
1176 td_restore_runstate(td, old_state);