X-Git-Url: https://git.kernel.dk/?p=fio.git;a=blobdiff_plain;f=iolog.c;h=d7474724f0d140cd4e66b5136f47aedb2002ddcd;hp=2f84c8280240eab3e3ca3d5bdb70f87777017216;hb=94a78d52c83ab998d0b4614e472338ab959bb224;hpb=bead01d7dcd76467f0aba0d32b173442bbdaa020 diff --git a/iolog.c b/iolog.c index 2f84c828..d7474724 100644 --- a/iolog.c +++ b/iolog.c @@ -4,7 +4,6 @@ */ #include #include -#include #include #include #include @@ -15,9 +14,13 @@ #include "flist.h" #include "fio.h" -#include "verify.h" #include "trim.h" #include "filelock.h" +#include "smalloc.h" +#include "blktrace.h" +#include "pshared.h" + +static int iolog_flush(struct io_log *log); static const char iolog_ver2[] = "fio version 2 iolog"; @@ -60,8 +63,9 @@ void log_file(struct thread_data *td, struct fio_file *f, static void iolog_delay(struct thread_data *td, unsigned long delay) { uint64_t usec = utime_since_now(&td->last_issue); + unsigned long orig_delay = delay; uint64_t this_delay; - struct timeval tv; + struct timespec ts; if (delay < td->time_offset) { td->time_offset = 0; @@ -74,7 +78,7 @@ static void iolog_delay(struct thread_data *td, unsigned long delay) delay -= usec; - fio_gettime(&tv, NULL); + fio_gettime(&ts, NULL); while (delay && !td->terminate) { this_delay = delay; if (this_delay > 500000) @@ -84,9 +88,9 @@ static void iolog_delay(struct thread_data *td, unsigned long delay) delay -= this_delay; } - usec = utime_since_now(&tv); - if (usec > delay) - td->time_offset = usec - delay; + usec = utime_since_now(&ts); + if (usec > orig_delay) + td->time_offset = usec - orig_delay; else td->time_offset = 0; } @@ -106,6 +110,11 @@ static int ipo_special(struct thread_data *td, struct io_piece *ipo) switch (ipo->file_action) { case FIO_LOG_OPEN_FILE: + if (td->o.replay_redirect && fio_file_open(f)) { + dprint(FD_FILE, "iolog: ignoring re-open of file %s\n", + f->file_name); + break; + } ret = td_io_open_file(td, f); if (!ret) break; @@ -175,7 +184,7 @@ int read_iolog_get(struct thread_data *td, struct io_u *io_u) void prune_io_piece_log(struct thread_data *td) { struct io_piece *ipo; - struct rb_node *n; + struct fio_rb_node *n; while ((n = rb_first(&td->io_hist_tree)) != NULL) { ipo = rb_entry(n, struct io_piece, rb_node); @@ -199,10 +208,10 @@ void prune_io_piece_log(struct thread_data *td) */ void log_io_piece(struct thread_data *td, struct io_u *io_u) { - struct rb_node **p, *parent; + struct fio_rb_node **p, *parent; struct io_piece *ipo, *__ipo; - ipo = malloc(sizeof(struct io_piece)); + ipo = calloc(1, sizeof(struct io_piece)); init_ipo(ipo); ipo->file = io_u->file; ipo->offset = io_u->offset; @@ -218,21 +227,11 @@ void log_io_piece(struct thread_data *td, struct io_u *io_u) } /* - * We don't need to sort the entries, if: - * - * Sequential writes, or - * Random writes that lay out the file as it goes along - * - * For both these cases, just reading back data in the order we - * wrote it out is the fastest. - * - * One exception is if we don't have a random map AND we are doing - * verifies, in that case we need to check for duplicate blocks and - * drop the old one, which we rely on the rb insert/lookup for - * handling. + * Only sort writes if we don't have a random map in which case we need + * to check for duplicate blocks and drop the old one, which we rely on + * the rb insert/lookup for handling. */ - if (((!td->o.verifysort) || !td_random(td) || !td->o.overwrite) && - (file_randommap(td, ipo->file) || td->o.verify == VERIFY_NONE)) { + if (file_randommap(td, ipo->file)) { INIT_FLIST_HEAD(&ipo->list); flist_add_tail(&ipo->list, &td->io_hist_list); ipo->flags |= IP_F_ONLIST; @@ -269,13 +268,14 @@ restart: overlap = 1; if (overlap) { - dprint(FD_IO, "iolog: overlap %llu/%lu, %llu/%lu", + dprint(FD_IO, "iolog: overlap %llu/%lu, %llu/%lu\n", __ipo->offset, __ipo->len, ipo->offset, ipo->len); td->io_hist_len--; rb_erase(parent, &td->io_hist_tree); remove_trim_entry(td, __ipo); - free(__ipo); + if (!(__ipo->flags & IP_F_IN_FLIGHT)) + free(__ipo); goto restart; } } @@ -315,7 +315,7 @@ void unlog_io_piece(struct thread_data *td, struct io_u *io_u) td->io_hist_len--; } -void trim_io_piece(struct thread_data *td, const struct io_u *io_u) +void trim_io_piece(const struct io_u *io_u) { struct io_piece *ipo = io_u->ipo; @@ -338,12 +338,12 @@ void write_iolog_close(struct thread_data *td) * Read version 2 iolog data. It is enhanced to include per-file logging, * syncs, etc. */ -static int read_iolog2(struct thread_data *td, FILE *f) +static bool read_iolog2(struct thread_data *td, FILE *f) { unsigned long long offset; unsigned int bytes; int reads, writes, waits, fileno = 0, file_action = 0; /* stupid gcc */ - char *fname, *act; + char *rfname, *fname, *act; char *str, *p; enum fio_ddir rw; @@ -354,7 +354,7 @@ static int read_iolog2(struct thread_data *td, FILE *f) * for doing verifications. */ str = malloc(4096); - fname = malloc(256+16); + rfname = fname = malloc(256+16); act = malloc(256+16); reads = writes = waits = 0; @@ -362,8 +362,12 @@ static int read_iolog2(struct thread_data *td, FILE *f) struct io_piece *ipo; int r; - r = sscanf(p, "%256s %256s %llu %u", fname, act, &offset, + r = sscanf(p, "%256s %256s %llu %u", rfname, act, &offset, &bytes); + + if (td->o.replay_redirect) + fname = td->o.replay_redirect; + if (r == 4) { /* * Check action first @@ -389,8 +393,14 @@ static int read_iolog2(struct thread_data *td, FILE *f) } else if (r == 2) { rw = DDIR_INVAL; if (!strcmp(act, "add")) { - fileno = add_file(td, fname, 0, 1); - file_action = FIO_LOG_ADD_FILE; + if (td->o.replay_redirect && + get_fileno(td, fname) != -1) { + dprint(FD_FILE, "iolog: ignoring" + " re-add of file %s\n", fname); + } else { + fileno = add_file(td, fname, 0, 1); + file_action = FIO_LOG_ADD_FILE; + } continue; } else if (!strcmp(act, "open")) { fileno = get_fileno(td, fname); @@ -404,7 +414,7 @@ static int read_iolog2(struct thread_data *td, FILE *f) continue; } } else { - log_err("bad iolog2: %s", p); + log_err("bad iolog2: %s\n", p); continue; } @@ -418,6 +428,8 @@ static int read_iolog2(struct thread_data *td, FILE *f) continue; writes++; } else if (rw == DDIR_WAIT) { + if (td->o.no_stall) + continue; waits++; } else if (rw == DDIR_INVAL) { } else if (!ddir_sync(rw)) { @@ -428,13 +440,18 @@ static int read_iolog2(struct thread_data *td, FILE *f) /* * Make note of file */ - ipo = malloc(sizeof(*ipo)); + ipo = calloc(1, sizeof(*ipo)); init_ipo(ipo); ipo->ddir = rw; if (rw == DDIR_WAIT) { ipo->delay = offset; } else { - ipo->offset = offset; + if (td->o.replay_scale) + ipo->offset = offset / td->o.replay_scale; + else + ipo->offset = offset; + ipo_bytes_align(td->o.replay_align, ipo); + ipo->len = bytes; if (rw != DDIR_INVAL && bytes > td->o.max_bs[rw]) td->o.max_bs[rw] = bytes; @@ -448,7 +465,7 @@ static int read_iolog2(struct thread_data *td, FILE *f) free(str); free(act); - free(fname); + free(rfname); if (writes && read_only) { log_err("fio: <%s> skips replay of %d writes due to" @@ -457,7 +474,7 @@ static int read_iolog2(struct thread_data *td, FILE *f) } if (!reads && !writes && !waits) - return 1; + return false; else if (reads && !writes) td->o.td_ddir = TD_DDIR_READ; else if (!reads && writes) @@ -465,22 +482,22 @@ static int read_iolog2(struct thread_data *td, FILE *f) else td->o.td_ddir = TD_DDIR_RW; - return 0; + return true; } /* * open iolog, check version, and call appropriate parser */ -static int init_iolog_read(struct thread_data *td) +static bool init_iolog_read(struct thread_data *td) { char buffer[256], *p; FILE *f; - int ret; + bool ret; f = fopen(td->o.read_iolog_file, "r"); if (!f) { perror("fopen read iolog"); - return 1; + return false; } p = fgets(buffer, sizeof(buffer), f); @@ -488,7 +505,7 @@ static int init_iolog_read(struct thread_data *td) td_verror(td, errno, "iolog read"); log_err("fio: unable to read iolog\n"); fclose(f); - return 1; + return false; } /* @@ -499,7 +516,7 @@ static int init_iolog_read(struct thread_data *td) ret = read_iolog2(td, f); else { log_err("fio: iolog version 1 is no longer supported\n"); - ret = 1; + ret = false; } fclose(f); @@ -509,7 +526,7 @@ static int init_iolog_read(struct thread_data *td) /* * Set up a log for storing io patterns. */ -static int init_iolog_write(struct thread_data *td) +static bool init_iolog_write(struct thread_data *td) { struct fio_file *ff; FILE *f; @@ -518,7 +535,7 @@ static int init_iolog_write(struct thread_data *td) f = fopen(td->o.write_iolog_file, "a"); if (!f) { perror("fopen write iolog"); - return 1; + return false; } /* @@ -533,7 +550,7 @@ static int init_iolog_write(struct thread_data *td) */ if (fprintf(f, "%s\n", iolog_ver2) < 0) { perror("iolog init\n"); - return 1; + return false; } /* @@ -542,12 +559,12 @@ static int init_iolog_write(struct thread_data *td) for_each_file(td, ff, i) log_file(td, ff, FIO_LOG_ADD_FILE); - return 0; + return true; } -int init_iolog(struct thread_data *td) +bool init_iolog(struct thread_data *td) { - int ret = 0; + bool ret; if (td->o.read_iolog_file) { int need_swap; @@ -562,8 +579,10 @@ int init_iolog(struct thread_data *td) ret = init_iolog_read(td); } else if (td->o.write_iolog_file) ret = init_iolog_write(td); + else + ret = true; - if (ret) + if (!ret) td_verror(td, EINVAL, "failed initializing iolog"); return ret; @@ -573,19 +592,41 @@ void setup_log(struct io_log **log, struct log_params *p, const char *filename) { struct io_log *l; + int i; + struct io_u_plat_entry *entry; + struct flist_head *list; - l = calloc(1, sizeof(*l)); - l->nr_samples = 0; - l->max_samples = 1024; + l = scalloc(1, sizeof(*l)); + INIT_FLIST_HEAD(&l->io_logs); l->log_type = p->log_type; l->log_offset = p->log_offset; l->log_gz = p->log_gz; l->log_gz_store = p->log_gz_store; - l->log = malloc(l->max_samples * log_entry_sz(l)); l->avg_msec = p->avg_msec; + l->hist_msec = p->hist_msec; + l->hist_coarseness = p->hist_coarseness; l->filename = strdup(filename); l->td = p->td; + /* Initialize histogram lists for each r/w direction, + * with initial io_u_plat of all zeros: + */ + for (i = 0; i < DDIR_RWDIR_CNT; i++) { + list = &l->hist_window[i].list; + INIT_FLIST_HEAD(list); + entry = calloc(1, sizeof(struct io_u_plat_entry)); + flist_add(&entry->list, list); + } + + if (l->td && l->td->o.io_submit_mode != IO_MODE_OFFLOAD) { + struct io_logs *p; + + p = calloc(1, sizeof(*l->pending)); + p->max_samples = DEF_LOG_ENTRIES; + p->log = calloc(p->max_samples, log_entry_sz(l)); + l->pending = p; + } + if (l->log_offset) l->log_ddir_mask = LOG_OFFSET_SAMPLE_BIT; @@ -594,7 +635,8 @@ void setup_log(struct io_log **log, struct log_params *p, if (l->log_gz && !p->td) l->log_gz = 0; else if (l->log_gz || l->log_gz_store) { - pthread_mutex_init(&l->chunk_lock, NULL); + mutex_init_pshared(&l->chunk_lock); + mutex_init_pshared(&l->deferred_free_lock); p->td->flags |= TD_F_COMPRESS_LOG; } @@ -629,9 +671,85 @@ static void clear_file_buffer(void *buf) void free_log(struct io_log *log) { - free(log->log); + while (!flist_empty(&log->io_logs)) { + struct io_logs *cur_log; + + cur_log = flist_first_entry(&log->io_logs, struct io_logs, list); + flist_del_init(&cur_log->list); + free(cur_log->log); + sfree(cur_log); + } + + if (log->pending) { + free(log->pending->log); + free(log->pending); + log->pending = NULL; + } + + free(log->pending); free(log->filename); - free(log); + sfree(log); +} + +uint64_t hist_sum(int j, int stride, uint64_t *io_u_plat, + uint64_t *io_u_plat_last) +{ + uint64_t sum; + int k; + + if (io_u_plat_last) { + for (k = sum = 0; k < stride; k++) + sum += io_u_plat[j + k] - io_u_plat_last[j + k]; + } else { + for (k = sum = 0; k < stride; k++) + sum += io_u_plat[j + k]; + } + + return sum; +} + +static void flush_hist_samples(FILE *f, int hist_coarseness, void *samples, + uint64_t sample_size) +{ + struct io_sample *s; + int log_offset; + uint64_t i, j, nr_samples; + struct io_u_plat_entry *entry, *entry_before; + uint64_t *io_u_plat; + uint64_t *io_u_plat_before; + + int stride = 1 << hist_coarseness; + + if (!sample_size) + return; + + s = __get_sample(samples, 0, 0); + log_offset = (s->__ddir & LOG_OFFSET_SAMPLE_BIT) != 0; + + nr_samples = sample_size / __log_entry_sz(log_offset); + + for (i = 0; i < nr_samples; i++) { + s = __get_sample(samples, log_offset, i); + + entry = s->data.plat_entry; + io_u_plat = entry->io_u_plat; + + entry_before = flist_first_entry(&entry->list, struct io_u_plat_entry, list); + io_u_plat_before = entry_before->io_u_plat; + + fprintf(f, "%lu, %u, %u, ", (unsigned long) s->time, + io_sample_ddir(s), s->bs); + for (j = 0; j < FIO_IO_U_PLAT_NR - stride; j += stride) { + fprintf(f, "%llu, ", (unsigned long long) + hist_sum(j, stride, io_u_plat, io_u_plat_before)); + } + fprintf(f, "%llu\n", (unsigned long long) + hist_sum(FIO_IO_U_PLAT_NR - stride, stride, io_u_plat, + io_u_plat_before)); + + flist_del(&entry_before->list); + free(entry_before); + } } void flush_samples(FILE *f, void *samples, uint64_t sample_size) @@ -652,16 +770,16 @@ void flush_samples(FILE *f, void *samples, uint64_t sample_size) s = __get_sample(samples, log_offset, i); if (!log_offset) { - fprintf(f, "%lu, %lu, %u, %u\n", + fprintf(f, "%lu, %" PRId64 ", %u, %u\n", (unsigned long) s->time, - (unsigned long) s->val, + s->data.val, io_sample_ddir(s), s->bs); } else { struct io_sample_offset *so = (void *) s; - fprintf(f, "%lu, %lu, %u, %u, %llu\n", + fprintf(f, "%lu, %" PRId64 ", %u, %u, %llu\n", (unsigned long) s->time, - (unsigned long) s->val, + s->data.val, io_sample_ddir(s), s->bs, (unsigned long long) so->offset); } @@ -672,21 +790,10 @@ void flush_samples(FILE *f, void *samples, uint64_t sample_size) struct iolog_flush_data { struct workqueue_work work; - pthread_mutex_t lock; - pthread_cond_t cv; - int wait; - volatile int done; - volatile int refs; struct io_log *log; void *samples; - uint64_t nr_samples; -}; - -struct iolog_compress { - struct flist_head list; - void *buf; - size_t len; - unsigned int seq; + uint32_t nr_samples; + bool free; }; #define GZ_CHUNK 131072 @@ -713,6 +820,7 @@ static int z_stream_init(z_stream *stream, int gz_hdr) { int wbits = 15; + memset(stream, 0, sizeof(*stream)); stream->zalloc = Z_NULL; stream->zfree = Z_NULL; stream->opaque = Z_NULL; @@ -747,7 +855,8 @@ static void finish_chunk(z_stream *stream, FILE *f, ret = inflateEnd(stream); if (ret != Z_OK) - log_err("fio: failed to end log inflation (%d)\n", ret); + log_err("fio: failed to end log inflation seq %d (%d)\n", + iter->seq, ret); flush_samples(f, iter->buf, iter->buf_used); free(iter->buf); @@ -764,7 +873,7 @@ static size_t inflate_chunk(struct iolog_compress *ic, int gz_hdr, FILE *f, { size_t ret; - dprint(FD_COMPRESS, "inflate chunk size=%lu, seq=%u", + dprint(FD_COMPRESS, "inflate chunk size=%lu, seq=%u\n", (unsigned long) ic->len, ic->seq); if (ic->seq != iter->seq) { @@ -811,7 +920,7 @@ static size_t inflate_chunk(struct iolog_compress *ic, int gz_hdr, FILE *f, ret = (void *) stream->next_in - ic->buf; - dprint(FD_COMPRESS, "inflated to size=%lu\n", (unsigned long) ret); + dprint(FD_COMPRESS, "inflated to size=%lu\n", (unsigned long) iter->buf_size); return ret; } @@ -867,7 +976,7 @@ int iolog_file_inflate(const char *file) struct iolog_compress ic; z_stream stream; struct stat sb; - ssize_t ret; + size_t ret; size_t total; void *buf; FILE *f; @@ -889,12 +998,12 @@ int iolog_file_inflate(const char *file) ic.seq = 1; ret = fread(ic.buf, ic.len, 1, f); - if (ret < 0) { + if (ret == 0 && ferror(f)) { perror("fread"); fclose(f); free(buf); return 1; - } else if (ret != 1) { + } else if (ferror(f) || (!feof(f) && ret != 1)) { log_err("fio: short read on reading log\n"); fclose(f); free(buf); @@ -949,7 +1058,7 @@ int iolog_file_inflate(const char *file) #endif -void flush_log(struct io_log *log, int do_append) +void flush_log(struct io_log *log, bool do_append) { void *buf; FILE *f; @@ -967,7 +1076,20 @@ void flush_log(struct io_log *log, int do_append) inflate_gz_chunks(log, f); - flush_samples(f, log->log, log->nr_samples * log_entry_sz(log)); + while (!flist_empty(&log->io_logs)) { + struct io_logs *cur_log; + + cur_log = flist_first_entry(&log->io_logs, struct io_logs, list); + flist_del_init(&cur_log->list); + + if (log->td && log == log->td->clat_hist_log) + flush_hist_samples(f, log->hist_coarseness, cur_log->log, + log_sample_sz(log, cur_log)); + else + flush_samples(f, cur_log->log, log_sample_sz(log, cur_log)); + + sfree(cur_log); + } fclose(f); clear_file_buffer(buf); @@ -976,7 +1098,7 @@ void flush_log(struct io_log *log, int do_append) static int finish_log(struct thread_data *td, struct io_log *log, int trylock) { if (td->flags & TD_F_COMPRESS_LOG) - iolog_flush(log, 1); + iolog_flush(log); if (trylock) { if (fio_trylock_file(log->filename)) @@ -984,13 +1106,7 @@ static int finish_log(struct thread_data *td, struct io_log *log, int trylock) } else fio_lock_file(log->filename); - /* - * We should do this for any networked client. Will enable when - * the kinks are ironed out. - * - * if (td->client_type == FIO_CLIENT_TYPE_GUI || is_backed) - */ - if (td->client_type == FIO_CLIENT_TYPE_GUI) + if (td->client_type == FIO_CLIENT_TYPE_GUI || is_backend) fio_send_iolog(td, log, log->filename); else flush_log(log, !td->o.per_job_logs); @@ -1000,31 +1116,63 @@ static int finish_log(struct thread_data *td, struct io_log *log, int trylock) return 0; } +size_t log_chunk_sizes(struct io_log *log) +{ + struct flist_head *entry; + size_t ret; + + if (flist_empty(&log->chunk_list)) + return 0; + + ret = 0; + pthread_mutex_lock(&log->chunk_lock); + flist_for_each(entry, &log->chunk_list) { + struct iolog_compress *c; + + c = flist_entry(entry, struct iolog_compress, list); + ret += c->len; + } + pthread_mutex_unlock(&log->chunk_lock); + return ret; +} + #ifdef CONFIG_ZLIB -static void drop_data_unlock(struct iolog_flush_data *data) +static void iolog_put_deferred(struct io_log *log, void *ptr) +{ + if (!ptr) + return; + + pthread_mutex_lock(&log->deferred_free_lock); + if (log->deferred < IOLOG_MAX_DEFER) { + log->deferred_items[log->deferred] = ptr; + log->deferred++; + } else if (!fio_did_warn(FIO_WARN_IOLOG_DROP)) + log_err("fio: had to drop log entry free\n"); + pthread_mutex_unlock(&log->deferred_free_lock); +} + +static void iolog_free_deferred(struct io_log *log) { - int refs; + int i; + + if (!log->deferred) + return; - refs = --data->refs; - pthread_mutex_unlock(&data->lock); + pthread_mutex_lock(&log->deferred_free_lock); - if (!refs) { - free(data); - pthread_mutex_destroy(&data->lock); - pthread_cond_destroy(&data->cv); + for (i = 0; i < log->deferred; i++) { + free(log->deferred_items[i]); + log->deferred_items[i] = NULL; } + + log->deferred = 0; + pthread_mutex_unlock(&log->deferred_free_lock); } -/* - * Invoked from our compress helper thread, when logging would have exceeded - * the specified memory limitation. Compresses the previously stored - * entries. - */ -static int gz_work(struct submit_worker *sw, struct workqueue_work *work) +static int gz_work(struct iolog_flush_data *data) { - struct iolog_flush_data *data; - struct iolog_compress *c; + struct iolog_compress *c = NULL; struct flist_head list; unsigned int seq; z_stream stream; @@ -1033,8 +1181,7 @@ static int gz_work(struct submit_worker *sw, struct workqueue_work *work) INIT_FLIST_HEAD(&list); - data = container_of(work, struct iolog_flush_data, work); - + memset(&stream, 0, sizeof(stream)); stream.zalloc = Z_NULL; stream.zfree = Z_NULL; stream.opaque = Z_NULL; @@ -1042,7 +1189,7 @@ static int gz_work(struct submit_worker *sw, struct workqueue_work *work) ret = deflateInit(&stream, Z_DEFAULT_COMPRESSION); if (ret != Z_OK) { log_err("fio: failed to init gz stream\n"); - return 0; + goto err; } seq = ++data->log->chunk_seq; @@ -1050,9 +1197,13 @@ static int gz_work(struct submit_worker *sw, struct workqueue_work *work) stream.next_in = (void *) data->samples; stream.avail_in = data->nr_samples * log_entry_sz(data->log); - dprint(FD_COMPRESS, "deflate input size=%lu, seq=%u\n", - (unsigned long) stream.avail_in, seq); + dprint(FD_COMPRESS, "deflate input size=%lu, seq=%u, log=%s\n", + (unsigned long) stream.avail_in, seq, + data->log->filename); do { + if (c) + dprint(FD_COMPRESS, "seq=%d, chunk=%lu\n", seq, + (unsigned long) c->len); c = get_new_chunk(seq); stream.avail_out = GZ_CHUNK; stream.next_out = c->buf; @@ -1072,9 +1223,26 @@ static int gz_work(struct submit_worker *sw, struct workqueue_work *work) stream.avail_out = GZ_CHUNK - c->len; ret = deflate(&stream, Z_FINISH); - if (ret == Z_STREAM_END) - c->len = GZ_CHUNK - stream.avail_out; - else { + if (ret < 0) { + /* + * Z_BUF_ERROR is special, it just means we need more + * output space. We'll handle that below. Treat any other + * error as fatal. + */ + if (ret != Z_BUF_ERROR) { + log_err("fio: deflate log (%d)\n", ret); + flist_del(&c->list); + free_chunk(c); + goto err; + } + } + + total -= c->len; + c->len = GZ_CHUNK - stream.avail_out; + total += c->len; + dprint(FD_COMPRESS, "seq=%d, chunk=%lu\n", seq, (unsigned long) c->len); + + if (ret != Z_STREAM_END) { do { c = get_new_chunk(seq); stream.avail_out = GZ_CHUNK; @@ -1083,6 +1251,8 @@ static int gz_work(struct submit_worker *sw, struct workqueue_work *work) c->len = GZ_CHUNK - stream.avail_out; total += c->len; flist_add_tail(&c->list, &list); + dprint(FD_COMPRESS, "seq=%d, chunk=%lu\n", seq, + (unsigned long) c->len); } while (ret != Z_STREAM_END); } @@ -1092,7 +1262,7 @@ static int gz_work(struct submit_worker *sw, struct workqueue_work *work) if (ret != Z_OK) log_err("fio: deflateEnd %d\n", ret); - free(data->samples); + iolog_put_deferred(data->log, data->samples); if (!flist_empty(&list)) { pthread_mutex_lock(&data->log->chunk_lock); @@ -1102,14 +1272,8 @@ static int gz_work(struct submit_worker *sw, struct workqueue_work *work) ret = 0; done: - if (data->wait) { - pthread_mutex_lock(&data->lock); - data->done = 1; - pthread_cond_signal(&data->cv); - - drop_data_unlock(data); - } else - free(data); + if (data->free) + sfree(data); return ret; err: while (!flist_empty(&list)) { @@ -1121,6 +1285,16 @@ err: goto done; } +/* + * Invoked from our compress helper thread, when logging would have exceeded + * the specified memory limitation. Compresses the previously stored + * entries. + */ +static int gz_work_async(struct submit_worker *sw, struct workqueue_work *work) +{ + return gz_work(container_of(work, struct iolog_flush_data, work)); +} + static int gz_init_worker(struct submit_worker *sw) { struct thread_data *td = sw->wq->td; @@ -1137,7 +1311,7 @@ static int gz_init_worker(struct submit_worker *sw) } static struct workqueue_ops log_compress_wq_ops = { - .fn = gz_work, + .fn = gz_work_async, .init_worker_fn = gz_init_worker, .nice = 1, }; @@ -1163,52 +1337,70 @@ void iolog_compress_exit(struct thread_data *td) * Queue work item to compress the existing log entries. We reset the * current log to a small size, and reference the existing log in the * data that we queue for compression. Once compression has been done, - * this old log is freed. If called with wait == 1, will not return until - * the log compression has completed. + * this old log is freed. If called with finish == true, will not return + * until the log compression has completed, and will flush all previous + * logs too */ -int iolog_flush(struct io_log *log, int wait) +static int iolog_flush(struct io_log *log) { struct iolog_flush_data *data; - io_u_quiesce(log->td); - data = malloc(sizeof(*data)); if (!data) return 1; data->log = log; + data->free = false; - data->samples = log->log; - data->nr_samples = log->nr_samples; + while (!flist_empty(&log->io_logs)) { + struct io_logs *cur_log; - log->nr_samples = 0; - log->max_samples = 128; - log->log = malloc(log->max_samples * log_entry_sz(log)); + cur_log = flist_first_entry(&log->io_logs, struct io_logs, list); + flist_del_init(&cur_log->list); - data->wait = wait; - if (data->wait) { - pthread_mutex_init(&data->lock, NULL); - pthread_cond_init(&data->cv, NULL); - data->done = 0; - data->refs = 2; - } - - workqueue_enqueue(&log->td->log_compress_wq, &data->work); + data->samples = cur_log->log; + data->nr_samples = cur_log->nr_samples; - if (wait) { - pthread_mutex_lock(&data->lock); - while (!data->done) - pthread_cond_wait(&data->cv, &data->lock); + sfree(cur_log); - drop_data_unlock(data); + gz_work(data); } + free(data); return 0; } +int iolog_cur_flush(struct io_log *log, struct io_logs *cur_log) +{ + struct iolog_flush_data *data; + + data = smalloc(sizeof(*data)); + if (!data) + return 1; + + data->log = log; + + data->samples = cur_log->log; + data->nr_samples = cur_log->nr_samples; + data->free = true; + + cur_log->nr_samples = cur_log->max_samples = 0; + cur_log->log = NULL; + + workqueue_enqueue(&log->td->log_compress_wq, &data->work); + + iolog_free_deferred(log); + + return 0; +} #else -int iolog_flush(struct io_log *log, int wait) +static int iolog_flush(struct io_log *log) +{ + return 1; +} + +int iolog_cur_flush(struct io_log *log, struct io_logs *cur_log) { return 1; } @@ -1224,6 +1416,29 @@ void iolog_compress_exit(struct thread_data *td) #endif +struct io_logs *iolog_cur_log(struct io_log *log) +{ + if (flist_empty(&log->io_logs)) + return NULL; + + return flist_last_entry(&log->io_logs, struct io_logs, list); +} + +uint64_t iolog_nr_samples(struct io_log *iolog) +{ + struct flist_head *entry; + uint64_t ret = 0; + + flist_for_each(entry, &iolog->io_logs) { + struct io_logs *cur_log; + + cur_log = flist_entry(entry, struct io_logs, list); + ret += cur_log->nr_samples; + } + + return ret; +} + static int __write_log(struct thread_data *td, struct io_log *log, int try) { if (log) @@ -1232,29 +1447,88 @@ static int __write_log(struct thread_data *td, struct io_log *log, int try) return 0; } -static int write_iops_log(struct thread_data *td, int try) +static int write_iops_log(struct thread_data *td, int try, bool unit_log) { - return __write_log(td, td->iops_log, try); + int ret; + + if (per_unit_log(td->iops_log) != unit_log) + return 0; + + ret = __write_log(td, td->iops_log, try); + if (!ret) + td->iops_log = NULL; + + return ret; } -static int write_slat_log(struct thread_data *td, int try) +static int write_slat_log(struct thread_data *td, int try, bool unit_log) { - return __write_log(td, td->slat_log, try); + int ret; + + if (!unit_log) + return 0; + + ret = __write_log(td, td->slat_log, try); + if (!ret) + td->slat_log = NULL; + + return ret; } -static int write_clat_log(struct thread_data *td, int try) +static int write_clat_log(struct thread_data *td, int try, bool unit_log) { - return __write_log(td, td->clat_log, try); + int ret; + + if (!unit_log) + return 0; + + ret = __write_log(td, td->clat_log, try); + if (!ret) + td->clat_log = NULL; + + return ret; } -static int write_lat_log(struct thread_data *td, int try) +static int write_clat_hist_log(struct thread_data *td, int try, bool unit_log) { - return __write_log(td, td->lat_log, try); + int ret; + + if (!unit_log) + return 0; + + ret = __write_log(td, td->clat_hist_log, try); + if (!ret) + td->clat_hist_log = NULL; + + return ret; +} + +static int write_lat_log(struct thread_data *td, int try, bool unit_log) +{ + int ret; + + if (!unit_log) + return 0; + + ret = __write_log(td, td->lat_log, try); + if (!ret) + td->lat_log = NULL; + + return ret; } -static int write_bandw_log(struct thread_data *td, int try) +static int write_bandw_log(struct thread_data *td, int try, bool unit_log) { - return __write_log(td, td->bw_log, try); + int ret; + + if (per_unit_log(td->bw_log) != unit_log) + return 0; + + ret = __write_log(td, td->bw_log, try); + if (!ret) + td->bw_log = NULL; + + return ret; } enum { @@ -1263,13 +1537,14 @@ enum { SLAT_LOG_MASK = 4, CLAT_LOG_MASK = 8, IOPS_LOG_MASK = 16, + CLAT_HIST_LOG_MASK = 32, - ALL_LOG_NR = 5, + ALL_LOG_NR = 6, }; struct log_type { unsigned int mask; - int (*fn)(struct thread_data *, int); + int (*fn)(struct thread_data *, int, bool); }; static struct log_type log_types[] = { @@ -1293,9 +1568,13 @@ static struct log_type log_types[] = { .mask = IOPS_LOG_MASK, .fn = write_iops_log, }, + { + .mask = CLAT_HIST_LOG_MASK, + .fn = write_clat_hist_log, + } }; -void fio_writeout_logs(struct thread_data *td) +void td_writeout_logs(struct thread_data *td, bool unit_logs) { unsigned int log_mask = 0; unsigned int log_left = ALL_LOG_NR; @@ -1303,7 +1582,7 @@ void fio_writeout_logs(struct thread_data *td) old_state = td_bump_runstate(td, TD_FINISHING); - finalize_logs(td); + finalize_logs(td, unit_logs); while (log_left) { int prev_log_left = log_left; @@ -1313,7 +1592,7 @@ void fio_writeout_logs(struct thread_data *td) int ret; if (!(log_mask & lt->mask)) { - ret = lt->fn(td, log_left != 1); + ret = lt->fn(td, log_left != 1, unit_logs); if (!ret) { log_left--; log_mask |= lt->mask; @@ -1327,3 +1606,12 @@ void fio_writeout_logs(struct thread_data *td) td_restore_runstate(td, old_state); } + +void fio_writeout_logs(bool unit_logs) +{ + struct thread_data *td; + int i; + + for_each_td(td, i) + td_writeout_logs(td, unit_logs); +}