X-Git-Url: https://git.kernel.dk/?p=fio.git;a=blobdiff_plain;f=iolog.c;h=460d7a2e51af8ada55d8b324547d6fb1dc12b74b;hp=975ce6f7a481c438025992f4b828b6d69378b290;hb=ae626d4ead6416adf464cf209cdf3e8b85d58190;hpb=9973b0f961a57c19f885ffca05f86ae6ef85f8c7 diff --git a/iolog.c b/iolog.c index 975ce6f7..460d7a2e 100644 --- a/iolog.c +++ b/iolog.c @@ -19,6 +19,8 @@ #include "trim.h" #include "filelock.h" #include "smalloc.h" +#include "blktrace.h" +#include "pshared.h" static int iolog_flush(struct io_log *log); @@ -64,7 +66,7 @@ static void iolog_delay(struct thread_data *td, unsigned long delay) { uint64_t usec = utime_since_now(&td->last_issue); uint64_t this_delay; - struct timeval tv; + struct timespec ts; if (delay < td->time_offset) { td->time_offset = 0; @@ -77,7 +79,7 @@ static void iolog_delay(struct thread_data *td, unsigned long delay) delay -= usec; - fio_gettime(&tv, NULL); + fio_gettime(&ts, NULL); while (delay && !td->terminate) { this_delay = delay; if (this_delay > 500000) @@ -87,7 +89,7 @@ static void iolog_delay(struct thread_data *td, unsigned long delay) delay -= this_delay; } - usec = utime_since_now(&tv); + usec = utime_since_now(&ts); if (usec > delay) td->time_offset = usec - delay; else @@ -109,6 +111,11 @@ static int ipo_special(struct thread_data *td, struct io_piece *ipo) switch (ipo->file_action) { case FIO_LOG_OPEN_FILE: + if (td->o.replay_redirect && fio_file_open(f)) { + dprint(FD_FILE, "iolog: ignoring re-open of file %s\n", + f->file_name); + break; + } ret = td_io_open_file(td, f); if (!ret) break; @@ -178,7 +185,7 @@ int read_iolog_get(struct thread_data *td, struct io_u *io_u) void prune_io_piece_log(struct thread_data *td) { struct io_piece *ipo; - struct rb_node *n; + struct fio_rb_node *n; while ((n = rb_first(&td->io_hist_tree)) != NULL) { ipo = rb_entry(n, struct io_piece, rb_node); @@ -202,7 +209,7 @@ void prune_io_piece_log(struct thread_data *td) */ void log_io_piece(struct thread_data *td, struct io_u *io_u) { - struct rb_node **p, *parent; + struct fio_rb_node **p, *parent; struct io_piece *ipo, *__ipo; ipo = malloc(sizeof(struct io_piece)); @@ -221,21 +228,16 @@ void log_io_piece(struct thread_data *td, struct io_u *io_u) } /* - * We don't need to sort the entries, if: - * - * Sequential writes, or - * Random writes that lay out the file as it goes along + * We don't need to sort the entries if we only performed sequential + * writes. In this case, just reading back data in the order we wrote + * it out is the faster but still safe. * - * For both these cases, just reading back data in the order we - * wrote it out is the fastest. - * - * One exception is if we don't have a random map AND we are doing - * verifies, in that case we need to check for duplicate blocks and - * drop the old one, which we rely on the rb insert/lookup for - * handling. + * One exception is if we don't have a random map in which case we need + * to check for duplicate blocks and drop the old one, which we rely on + * the rb insert/lookup for handling. */ - if (((!td->o.verifysort) || !td_random(td) || !td->o.overwrite) && - (file_randommap(td, ipo->file) || td->o.verify == VERIFY_NONE)) { + if (((!td->o.verifysort) || !td_random(td)) && + file_randommap(td, ipo->file)) { INIT_FLIST_HEAD(&ipo->list); flist_add_tail(&ipo->list, &td->io_hist_list); ipo->flags |= IP_F_ONLIST; @@ -272,13 +274,14 @@ restart: overlap = 1; if (overlap) { - dprint(FD_IO, "iolog: overlap %llu/%lu, %llu/%lu", + dprint(FD_IO, "iolog: overlap %llu/%lu, %llu/%lu\n", __ipo->offset, __ipo->len, ipo->offset, ipo->len); td->io_hist_len--; rb_erase(parent, &td->io_hist_tree); remove_trim_entry(td, __ipo); - free(__ipo); + if (!(__ipo->flags & IP_F_IN_FLIGHT)) + free(__ipo); goto restart; } } @@ -346,7 +349,7 @@ static int read_iolog2(struct thread_data *td, FILE *f) unsigned long long offset; unsigned int bytes; int reads, writes, waits, fileno = 0, file_action = 0; /* stupid gcc */ - char *fname, *act; + char *rfname, *fname, *act; char *str, *p; enum fio_ddir rw; @@ -357,7 +360,7 @@ static int read_iolog2(struct thread_data *td, FILE *f) * for doing verifications. */ str = malloc(4096); - fname = malloc(256+16); + rfname = fname = malloc(256+16); act = malloc(256+16); reads = writes = waits = 0; @@ -365,8 +368,12 @@ static int read_iolog2(struct thread_data *td, FILE *f) struct io_piece *ipo; int r; - r = sscanf(p, "%256s %256s %llu %u", fname, act, &offset, + r = sscanf(p, "%256s %256s %llu %u", rfname, act, &offset, &bytes); + + if (td->o.replay_redirect) + fname = td->o.replay_redirect; + if (r == 4) { /* * Check action first @@ -392,8 +399,14 @@ static int read_iolog2(struct thread_data *td, FILE *f) } else if (r == 2) { rw = DDIR_INVAL; if (!strcmp(act, "add")) { - fileno = add_file(td, fname, 0, 1); - file_action = FIO_LOG_ADD_FILE; + if (td->o.replay_redirect && + get_fileno(td, fname) != -1) { + dprint(FD_FILE, "iolog: ignoring" + " re-add of file %s\n", fname); + } else { + fileno = add_file(td, fname, 0, 1); + file_action = FIO_LOG_ADD_FILE; + } continue; } else if (!strcmp(act, "open")) { fileno = get_fileno(td, fname); @@ -407,7 +420,7 @@ static int read_iolog2(struct thread_data *td, FILE *f) continue; } } else { - log_err("bad iolog2: %s", p); + log_err("bad iolog2: %s\n", p); continue; } @@ -421,6 +434,8 @@ static int read_iolog2(struct thread_data *td, FILE *f) continue; writes++; } else if (rw == DDIR_WAIT) { + if (td->o.no_stall) + continue; waits++; } else if (rw == DDIR_INVAL) { } else if (!ddir_sync(rw)) { @@ -437,7 +452,12 @@ static int read_iolog2(struct thread_data *td, FILE *f) if (rw == DDIR_WAIT) { ipo->delay = offset; } else { - ipo->offset = offset; + if (td->o.replay_scale) + ipo->offset = offset / td->o.replay_scale; + else + ipo->offset = offset; + ipo_bytes_align(td->o.replay_align, ipo); + ipo->len = bytes; if (rw != DDIR_INVAL && bytes > td->o.max_bs[rw]) td->o.max_bs[rw] = bytes; @@ -451,7 +471,7 @@ static int read_iolog2(struct thread_data *td, FILE *f) free(str); free(act); - free(fname); + free(rfname); if (writes && read_only) { log_err("fio: <%s> skips replay of %d writes due to" @@ -576,6 +596,9 @@ void setup_log(struct io_log **log, struct log_params *p, const char *filename) { struct io_log *l; + int i; + struct io_u_plat_entry *entry; + struct flist_head *list; l = scalloc(1, sizeof(*l)); INIT_FLIST_HEAD(&l->io_logs); @@ -589,6 +612,16 @@ void setup_log(struct io_log **log, struct log_params *p, l->filename = strdup(filename); l->td = p->td; + /* Initialize histogram lists for each r/w direction, + * with initial io_u_plat of all zeros: + */ + for (i = 0; i < DDIR_RWDIR_CNT; i++) { + list = &l->hist_window[i].list; + INIT_FLIST_HEAD(list); + entry = calloc(1, sizeof(struct io_u_plat_entry)); + flist_add(&entry->list, list); + } + if (l->td && l->td->o.io_submit_mode != IO_MODE_OFFLOAD) { struct io_logs *p; @@ -607,6 +640,7 @@ void setup_log(struct io_log **log, struct log_params *p, l->log_gz = 0; else if (l->log_gz || l->log_gz_store) { mutex_init_pshared(&l->chunk_lock); + mutex_init_pshared(&l->deferred_free_lock); p->td->flags |= TD_F_COMPRESS_LOG; } @@ -661,24 +695,32 @@ void free_log(struct io_log *log) sfree(log); } -static inline unsigned long hist_sum(int j, int stride, unsigned int *io_u_plat) +uint64_t hist_sum(int j, int stride, uint64_t *io_u_plat, + uint64_t *io_u_plat_last) { - unsigned long sum; + uint64_t sum; int k; - for (k = sum = 0; k < stride; k++) - sum += io_u_plat[j + k]; + if (io_u_plat_last) { + for (k = sum = 0; k < stride; k++) + sum += io_u_plat[j + k] - io_u_plat_last[j + k]; + } else { + for (k = sum = 0; k < stride; k++) + sum += io_u_plat[j + k]; + } return sum; } -void flush_hist_samples(FILE *f, int hist_coarseness, void *samples, - uint64_t sample_size) +static void flush_hist_samples(FILE *f, int hist_coarseness, void *samples, + uint64_t sample_size) { struct io_sample *s; int log_offset; uint64_t i, j, nr_samples; - unsigned int *io_u_plat; + struct io_u_plat_entry *entry, *entry_before; + uint64_t *io_u_plat; + uint64_t *io_u_plat_before; int stride = 1 << hist_coarseness; @@ -692,15 +734,25 @@ void flush_hist_samples(FILE *f, int hist_coarseness, void *samples, for (i = 0; i < nr_samples; i++) { s = __get_sample(samples, log_offset, i); - io_u_plat = (unsigned int *) (uintptr_t) s->val; - fprintf(f, "%lu, %u, %u, ", (unsigned long)s->time, - io_sample_ddir(s), s->bs); + + entry = s->data.plat_entry; + io_u_plat = entry->io_u_plat; + + entry_before = flist_first_entry(&entry->list, struct io_u_plat_entry, list); + io_u_plat_before = entry_before->io_u_plat; + + fprintf(f, "%lu, %u, %u, ", (unsigned long) s->time, + io_sample_ddir(s), s->bs); for (j = 0; j < FIO_IO_U_PLAT_NR - stride; j += stride) { - fprintf(f, "%lu, ", hist_sum(j, stride, io_u_plat)); + fprintf(f, "%llu, ", (unsigned long long) + hist_sum(j, stride, io_u_plat, io_u_plat_before)); } - fprintf(f, "%lu\n", (unsigned long) - hist_sum(FIO_IO_U_PLAT_NR - stride, stride, io_u_plat)); - free(io_u_plat); + fprintf(f, "%llu\n", (unsigned long long) + hist_sum(FIO_IO_U_PLAT_NR - stride, stride, io_u_plat, + io_u_plat_before)); + + flist_del(&entry_before->list); + free(entry_before); } } @@ -722,16 +774,16 @@ void flush_samples(FILE *f, void *samples, uint64_t sample_size) s = __get_sample(samples, log_offset, i); if (!log_offset) { - fprintf(f, "%lu, %lu, %u, %u\n", + fprintf(f, "%lu, %" PRId64 ", %u, %u\n", (unsigned long) s->time, - (unsigned long) s->val, + s->data.val, io_sample_ddir(s), s->bs); } else { struct io_sample_offset *so = (void *) s; - fprintf(f, "%lu, %lu, %u, %u, %llu\n", + fprintf(f, "%lu, %" PRId64 ", %u, %u, %llu\n", (unsigned long) s->time, - (unsigned long) s->val, + s->data.val, io_sample_ddir(s), s->bs, (unsigned long long) so->offset); } @@ -1034,11 +1086,11 @@ void flush_log(struct io_log *log, bool do_append) cur_log = flist_first_entry(&log->io_logs, struct io_logs, list); flist_del_init(&cur_log->list); - if (log == log->td->clat_hist_log) + if (log->td && log == log->td->clat_hist_log) flush_hist_samples(f, log->hist_coarseness, cur_log->log, - cur_log->nr_samples * log_entry_sz(log)); + log_sample_sz(log, cur_log)); else - flush_samples(f, cur_log->log, cur_log->nr_samples * log_entry_sz(log)); + flush_samples(f, cur_log->log, log_sample_sz(log, cur_log)); sfree(cur_log); } @@ -1090,6 +1142,38 @@ size_t log_chunk_sizes(struct io_log *log) #ifdef CONFIG_ZLIB +static void iolog_put_deferred(struct io_log *log, void *ptr) +{ + if (!ptr) + return; + + pthread_mutex_lock(&log->deferred_free_lock); + if (log->deferred < IOLOG_MAX_DEFER) { + log->deferred_items[log->deferred] = ptr; + log->deferred++; + } else if (!fio_did_warn(FIO_WARN_IOLOG_DROP)) + log_err("fio: had to drop log entry free\n"); + pthread_mutex_unlock(&log->deferred_free_lock); +} + +static void iolog_free_deferred(struct io_log *log) +{ + int i; + + if (!log->deferred) + return; + + pthread_mutex_lock(&log->deferred_free_lock); + + for (i = 0; i < log->deferred; i++) { + free(log->deferred_items[i]); + log->deferred_items[i] = NULL; + } + + log->deferred = 0; + pthread_mutex_unlock(&log->deferred_free_lock); +} + static int gz_work(struct iolog_flush_data *data) { struct iolog_compress *c = NULL; @@ -1122,7 +1206,8 @@ static int gz_work(struct iolog_flush_data *data) data->log->filename); do { if (c) - dprint(FD_COMPRESS, "seq=%d, chunk=%lu\n", seq, c->len); + dprint(FD_COMPRESS, "seq=%d, chunk=%lu\n", seq, + (unsigned long) c->len); c = get_new_chunk(seq); stream.avail_out = GZ_CHUNK; stream.next_out = c->buf; @@ -1159,7 +1244,7 @@ static int gz_work(struct iolog_flush_data *data) total -= c->len; c->len = GZ_CHUNK - stream.avail_out; total += c->len; - dprint(FD_COMPRESS, "seq=%d, chunk=%lu\n", seq, c->len); + dprint(FD_COMPRESS, "seq=%d, chunk=%lu\n", seq, (unsigned long) c->len); if (ret != Z_STREAM_END) { do { @@ -1170,7 +1255,8 @@ static int gz_work(struct iolog_flush_data *data) c->len = GZ_CHUNK - stream.avail_out; total += c->len; flist_add_tail(&c->list, &list); - dprint(FD_COMPRESS, "seq=%d, chunk=%lu\n", seq, c->len); + dprint(FD_COMPRESS, "seq=%d, chunk=%lu\n", seq, + (unsigned long) c->len); } while (ret != Z_STREAM_END); } @@ -1180,7 +1266,7 @@ static int gz_work(struct iolog_flush_data *data) if (ret != Z_OK) log_err("fio: deflateEnd %d\n", ret); - free(data->samples); + iolog_put_deferred(data->log, data->samples); if (!flist_empty(&list)) { pthread_mutex_lock(&data->log->chunk_lock); @@ -1191,7 +1277,7 @@ static int gz_work(struct iolog_flush_data *data) ret = 0; done: if (data->free) - free(data); + sfree(data); return ret; err: while (!flist_empty(&list)) { @@ -1292,7 +1378,7 @@ int iolog_cur_flush(struct io_log *log, struct io_logs *cur_log) { struct iolog_flush_data *data; - data = malloc(sizeof(*data)); + data = smalloc(sizeof(*data)); if (!data) return 1; @@ -1306,6 +1392,9 @@ int iolog_cur_flush(struct io_log *log, struct io_logs *cur_log) cur_log->log = NULL; workqueue_enqueue(&log->td->log_compress_wq, &data->work); + + iolog_free_deferred(log); + return 0; } #else