From ff58fcede39d16a2c642897cbe5a7f28b2da1950 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 25 Aug 2010 12:02:08 +0200 Subject: [PATCH] Add support for replaying blktrace trim/discard Signed-off-by: Jens Axboe --- blktrace.c | 31 +++++++++++++++++++++++++++++++ blktrace_api.h | 7 +++++++ engines/mmap.c | 8 +++++++- engines/sync.c | 4 ++-- fio.c | 8 ++++++-- init.c | 2 ++ io_ddir.h | 5 +++++ io_u.c | 24 ++++++++++++++++++------ ioengines.c | 16 ++++++++-------- log.c | 2 ++ stat.c | 29 ++++++++++++++++++++++++++--- 11 files changed, 114 insertions(+), 22 deletions(-) diff --git a/blktrace.c b/blktrace.c index 6cf8d461..f22ab1b9 100644 --- a/blktrace.c +++ b/blktrace.c @@ -228,12 +228,41 @@ static void handle_trace_notify(struct blk_io_trace *t) case BLK_TN_TIMESTAMP: printf("got timestamp notify: %x, %d\n", t->action, t->pid); break; + case BLK_TN_MESSAGE: + break; default: dprint(FD_BLKTRACE, "unknown trace act %x\n", t->action); break; } } +static void handle_trace_discard(struct thread_data *td, struct blk_io_trace *t, + unsigned long long ttime, unsigned long *ios) +{ + struct io_piece *ipo = malloc(sizeof(*ipo)); + + trace_add_file(td, t->device); + + ios[DDIR_WRITE]++; + td->o.size += t->bytes; + + memset(ipo, 0, sizeof(*ipo)); + INIT_FLIST_HEAD(&ipo->list); + + /* + * the 512 is wrong here, it should be the hardware sector size... + */ + ipo->offset = t->sector * 512; + ipo->len = t->bytes; + ipo->delay = ttime / 1000; + ipo->ddir = DDIR_TRIM; + + dprint(FD_BLKTRACE, "store discard, off=%llu, len=%lu, delay=%lu\n", + ipo->offset, ipo->len, + ipo->delay); + queue_io_piece(td, ipo); +} + static void handle_trace_fs(struct thread_data *td, struct blk_io_trace *t, unsigned long long ttime, unsigned long *ios, unsigned int *bs) @@ -267,6 +296,8 @@ static void handle_trace(struct thread_data *td, struct blk_io_trace *t, if (t->action & BLK_TC_ACT(BLK_TC_NOTIFY)) handle_trace_notify(t); + else if (t->action & BLK_TC_ACT(BLK_TC_DISCARD)) + handle_trace_discard(td, t, ttime, ios); else handle_trace_fs(td, t, ttime, ios, bs); } diff --git a/blktrace_api.h b/blktrace_api.h index 61b405a2..3df3347d 100644 --- a/blktrace_api.h +++ b/blktrace_api.h @@ -20,6 +20,8 @@ enum { BLK_TC_NOTIFY = 1 << 10, /* special message */ BLK_TC_AHEAD = 1 << 11, /* readahead */ BLK_TC_META = 1 << 12, /* metadata */ + BLK_TC_DISCARD = 1 << 13, /* discard requests */ + BLK_TC_DRV_DATA = 1 << 14, /* binary per-driver data */ BLK_TC_END = 1 << 15, /* only 16-bits, reminder */ }; @@ -46,6 +48,8 @@ enum { __BLK_TA_SPLIT, /* bio was split */ __BLK_TA_BOUNCE, /* bio was bounced */ __BLK_TA_REMAP, /* bio was remapped */ + __BLK_TA_ABORT, /* request aborted */ + __BLK_TA_DRV_DATA, /* driver-specific binary data */ }; /* @@ -54,6 +58,7 @@ enum { enum blktrace_notify { __BLK_TN_PROCESS = 0, /* establish pid/name mapping */ __BLK_TN_TIMESTAMP, /* include system clock */ + __BLK_TN_MESSAGE, /* Character string message */ }; /* @@ -74,9 +79,11 @@ enum blktrace_notify { #define BLK_TA_SPLIT (__BLK_TA_SPLIT) #define BLK_TA_BOUNCE (__BLK_TA_BOUNCE) #define BLK_TA_REMAP (__BLK_TA_REMAP | BLK_TC_ACT(BLK_TC_QUEUE)) +#define BLK_TA_DRV_DATA (__BLK_TA_DRV_DATA | BLK_TC_ACT(BLK_TC_DRV_DATA)) #define BLK_TN_PROCESS (__BLK_TN_PROCESS | BLK_TC_ACT(BLK_TC_NOTIFY)) #define BLK_TN_TIMESTAMP (__BLK_TN_TIMESTAMP | BLK_TC_ACT(BLK_TC_NOTIFY)) +#define BLK_TN_MESSAGE (__BLK_TN_MESSAGE | BLK_TC_ACT(BLK_TC_NOTIFY)) #define BLK_IO_TRACE_MAGIC 0x65617400 #define BLK_IO_TRACE_VERSION 0x07 diff --git a/engines/mmap.c b/engines/mmap.c index 53fd358c..002918c1 100644 --- a/engines/mmap.c +++ b/engines/mmap.c @@ -154,12 +154,18 @@ static int fio_mmapio_queue(struct thread_data *td, struct io_u *io_u) io_u->error = errno; td_verror(td, io_u->error, "msync"); } + } else if (io_u->ddir == DDIR_TRIM) { + int ret = do_io_u_trim(td, io_u); + + if (!ret) + td_verror(td, io_u->error, "trim"); } + /* * not really direct, but should drop the pages from the cache */ - if (td->o.odirect && !ddir_sync(io_u->ddir)) { + if (td->o.odirect && ddir_rw(io_u->ddir)) { if (msync(io_u->mmap_data, io_u->xfer_buflen, MS_SYNC) < 0) { io_u->error = errno; td_verror(td, io_u->error, "msync"); diff --git a/engines/sync.c b/engines/sync.c index 3dbce47b..4eea2f92 100644 --- a/engines/sync.c +++ b/engines/sync.c @@ -30,7 +30,7 @@ static int fio_syncio_prep(struct thread_data *td, struct io_u *io_u) { struct fio_file *f = io_u->file; - if (ddir_sync(io_u->ddir)) + if (!ddir_rw(io_u->ddir)) return 0; if (f->file_pos != -1ULL && f->file_pos == io_u->offset) @@ -46,7 +46,7 @@ static int fio_syncio_prep(struct thread_data *td, struct io_u *io_u) static int fio_io_end(struct thread_data *td, struct io_u *io_u, int ret) { - if (io_u->file && ret >= 0) + if (io_u->file && ret >= 0 && ddir_rw(io_u->ddir)) io_u->file->file_pos = io_u->offset + ret; if (ret != (int) io_u->xfer_buflen) { diff --git a/fio.c b/fio.c index 6ab0f4ad..1d20cf79 100644 --- a/fio.c +++ b/fio.c @@ -176,6 +176,8 @@ static int __check_min_rate(struct thread_data *td, struct timeval *now, unsigned int rate_iops = 0; unsigned int rate_iops_min = 0; + assert(ddir_rw(ddir)); + if (!td->o.ratemin[ddir] && !td->o.rate_iops_min[ddir]) return 0; @@ -491,7 +493,8 @@ static void do_verify(struct thread_data *td) io_u->xfer_buf += bytes; io_u->offset += bytes; - td->ts.short_io_u[io_u->ddir]++; + if (ddir_rw(io_u->ddir)) + td->ts.short_io_u[io_u->ddir]++; if (io_u->offset == f->real_file_size) goto sync_done; @@ -636,7 +639,8 @@ static void do_io(struct thread_data *td) io_u->xfer_buf += bytes; io_u->offset += bytes; - td->ts.short_io_u[io_u->ddir]++; + if (ddir_rw(io_u->ddir)) + td->ts.short_io_u[io_u->ddir]++; if (io_u->offset == f->real_file_size) goto sync_done; diff --git a/init.c b/init.c index ff7da40d..90e2063f 100644 --- a/init.c +++ b/init.c @@ -194,6 +194,8 @@ static int __setup_rate(struct thread_data *td, enum fio_ddir ddir) unsigned int bs = td->o.min_bs[ddir]; unsigned long long bytes_per_sec; + assert(ddir_rw(ddir)); + if (td->o.rate[ddir]) bytes_per_sec = td->o.rate[ddir]; else diff --git a/io_ddir.h b/io_ddir.h index f83fc9b2..b2342562 100644 --- a/io_ddir.h +++ b/io_ddir.h @@ -34,4 +34,9 @@ static inline int ddir_sync(enum fio_ddir ddir) ddir == DDIR_SYNC_FILE_RANGE; } +static inline int ddir_rw(enum fio_ddir ddir) +{ + return ddir == DDIR_READ || ddir == DDIR_WRITE; +} + #endif diff --git a/io_u.c b/io_u.c index 53bf4923..21a801f8 100644 --- a/io_u.c +++ b/io_u.c @@ -103,6 +103,8 @@ static unsigned long long last_block(struct thread_data *td, struct fio_file *f, unsigned long long max_blocks; unsigned long long max_size; + assert(ddir_rw(ddir)); + /* * Hmm, should we make sure that ->io_size <= ->real_file_size? */ @@ -211,6 +213,8 @@ static int get_next_rand_block(struct thread_data *td, struct fio_file *f, static int get_next_seq_block(struct thread_data *td, struct fio_file *f, enum fio_ddir ddir, unsigned long long *b) { + assert(ddir_rw(ddir)); + if (f->last_pos < f->real_file_size) { *b = (f->last_pos - f->file_offset) / td->o.min_bs[ddir]; return 0; @@ -225,6 +229,8 @@ static int get_next_block(struct thread_data *td, struct io_u *io_u, struct fio_file *f = io_u->file; int ret; + assert(ddir_rw(ddir)); + if (rw_seq) { if (td_random(td)) ret = get_next_rand_block(td, f, ddir, b); @@ -264,6 +270,8 @@ static int __get_next_offset(struct thread_data *td, struct io_u *io_u) enum fio_ddir ddir = io_u->ddir; int rw_seq_hit = 0; + assert(ddir_rw(ddir)); + if (td->o.ddir_seq_nr && !--td->ddir_seq_nr) { rw_seq_hit = 1; td->ddir_seq_nr = td->o.ddir_seq_nr; @@ -308,6 +316,8 @@ static unsigned int __get_next_buflen(struct thread_data *td, struct io_u *io_u) unsigned int minbs, maxbs; long r; + assert(ddir_rw(ddir)); + minbs = td->o.min_bs[ddir]; maxbs = td->o.max_bs[ddir]; @@ -388,6 +398,8 @@ static enum fio_ddir rate_ddir(struct thread_data *td, enum fio_ddir ddir) struct timeval t; long usec; + assert(ddir_rw(ddir)); + if (td->rate_pending_usleep[ddir] <= 0) return ddir; @@ -531,7 +543,7 @@ void requeue_io_u(struct thread_data *td, struct io_u **io_u) td_io_u_lock(td); __io_u->flags |= IO_U_F_FREE; - if ((__io_u->flags & IO_U_F_FLIGHT) && !ddir_sync(__io_u->ddir)) + if ((__io_u->flags & IO_U_F_FLIGHT) && ddir_rw(__io_u->ddir)) td->io_issues[__io_u->ddir]--; __io_u->flags &= ~IO_U_F_FLIGHT; @@ -551,9 +563,9 @@ static int fill_io_u(struct thread_data *td, struct io_u *io_u) io_u->ddir = get_rw_ddir(td); /* - * fsync() or fdatasync(), we are done + * fsync() or fdatasync() or trim etc, we are done */ - if (ddir_sync(io_u->ddir)) + if (!ddir_rw(io_u->ddir)) goto out; /* @@ -1023,7 +1035,7 @@ struct io_u *get_io_u(struct thread_data *td) f = io_u->file; assert(fio_file_open(f)); - if (!ddir_sync(io_u->ddir)) { + if (ddir_rw(io_u->ddir)) { if (!io_u->buflen && !(td->io_ops->flags & FIO_NOIO)) { dprint(FD_IO, "get_io_u: zero buflen on %p\n", io_u); goto err_put; @@ -1114,7 +1126,7 @@ static void io_completed(struct thread_data *td, struct io_u *io_u, td->last_was_sync = 0; td->last_ddir = io_u->ddir; - if (!io_u->error) { + if (!io_u->error && ddir_rw(io_u->ddir)) { unsigned int bytes = io_u->buflen - io_u->resid; const enum fio_ddir idx = io_u->ddir; const enum fio_ddir odx = io_u->ddir ^ 1; @@ -1180,7 +1192,7 @@ static void io_completed(struct thread_data *td, struct io_u *io_u, if (ret && !icd->error) icd->error = ret; } - } else { + } else if (io_u->error) { icd->error = io_u->error; io_u_log_error(td, io_u); } diff --git a/ioengines.c b/ioengines.c index 4e059a81..f976efbf 100644 --- a/ioengines.c +++ b/ioengines.c @@ -241,7 +241,7 @@ int td_io_queue(struct thread_data *td, struct io_u *io_u) sizeof(struct timeval)); } - if (!ddir_sync(io_u->ddir)) + if (ddir_rw(io_u->ddir)) td->io_issues[io_u->ddir]++; ret = td->io_ops->queue(td, io_u); @@ -254,7 +254,7 @@ int td_io_queue(struct thread_data *td, struct io_u *io_u) * IO, then it's likely an alignment problem or because the host fs * does not support O_DIRECT */ - if (io_u->error == EINVAL && td->io_issues[io_u->ddir] == 1 && + if (io_u->error == EINVAL && td->io_issues[io_u->ddir & 1] == 1 && td->o.odirect) { log_info("fio: first direct IO errored. File system may not " "support direct IO, or iomem_align= is bad.\n"); @@ -266,14 +266,14 @@ int td_io_queue(struct thread_data *td, struct io_u *io_u) } if (ret == FIO_Q_COMPLETED) { - if (!ddir_sync(io_u->ddir)) { + if (ddir_rw(io_u->ddir)) { io_u_mark_depth(td, 1); td->ts.total_io_u[io_u->ddir]++; } } else if (ret == FIO_Q_QUEUED) { int r; - if (!ddir_sync(io_u->ddir)) { + if (ddir_rw(io_u->ddir)) { td->io_u_queued++; td->ts.total_io_u[io_u->ddir]++; } @@ -483,16 +483,16 @@ int do_io_u_trim(struct thread_data *td, struct io_u *io_u) { #ifndef FIO_HAVE_TRIM io_u->error = EINVAL; - return io_u->xfer_buflen; + return 0; #else struct fio_file *f = io_u->file; int ret; ret = os_trim(f->fd, io_u->offset + f->file_offset, io_u->xfer_buflen); if (!ret) - return 0; + return io_u->xfer_buflen;; - io_u->error = errno; - return io_u->xfer_buflen; + io_u->error = ret; + return 0; #endif } diff --git a/log.c b/log.c index 829de94c..ce4ac9f4 100644 --- a/log.c +++ b/log.c @@ -294,6 +294,8 @@ static int read_iolog2(struct thread_data *td, FILE *f) rw = DDIR_SYNC; else if (!strcmp(act, "datasync")) rw = DDIR_DATASYNC; + else if (!strcmp(act, "trim")) + rw = DDIR_TRIM; else { log_err("fio: bad iolog file action: %s\n", act); diff --git a/stat.c b/stat.c index 8a0fab05..8e9fba0e 100644 --- a/stat.c +++ b/stat.c @@ -157,6 +157,8 @@ static void show_ddir_status(struct group_run_stats *rs, struct thread_stat *ts, char *io_p, *bw_p, *iops_p; int i2p; + assert(ddir_rw(ddir)); + if (!ts->runtime[ddir]) return; @@ -370,6 +372,8 @@ static void show_ddir_status_terse(struct thread_stat *ts, unsigned long long bw; double mean, dev; + assert(ddir_rw(ddir)); + bw = 0; if (ts->runtime[ddir]) bw = ts->io_bytes[ddir] / ts->runtime[ddir]; @@ -735,13 +739,20 @@ static void add_log_sample(struct thread_data *td, struct io_log *iolog, unsigned long val, enum fio_ddir ddir, unsigned int bs) { + if (!ddir_rw(ddir)) + return; + __add_log_sample(iolog, val, ddir, bs, mtime_since_now(&td->epoch)); } void add_agg_sample(unsigned long val, enum fio_ddir ddir, unsigned int bs) { - struct io_log *iolog = agg_io_log[ddir]; + struct io_log *iolog; + if (!ddir_rw(ddir)) + return; + + iolog = agg_io_log[ddir]; __add_log_sample(iolog, val, ddir, bs, mtime_since_genesis()); } @@ -750,6 +761,9 @@ void add_clat_sample(struct thread_data *td, enum fio_ddir ddir, { struct thread_stat *ts = &td->ts; + if (!ddir_rw(ddir)) + return; + add_stat_sample(&ts->clat_stat[ddir], usec); if (ts->clat_log) @@ -761,6 +775,9 @@ void add_slat_sample(struct thread_data *td, enum fio_ddir ddir, { struct thread_stat *ts = &td->ts; + if (!ddir_rw(ddir)) + return; + add_stat_sample(&ts->slat_stat[ddir], usec); if (ts->slat_log) @@ -772,6 +789,9 @@ void add_lat_sample(struct thread_data *td, enum fio_ddir ddir, { struct thread_stat *ts = &td->ts; + if (!ddir_rw(ddir)) + return; + add_stat_sample(&ts->lat_stat[ddir], usec); if (ts->lat_log) @@ -782,9 +802,12 @@ void add_bw_sample(struct thread_data *td, enum fio_ddir ddir, unsigned int bs, struct timeval *t) { struct thread_stat *ts = &td->ts; - unsigned long spent = mtime_since(&ts->stat_sample_time[ddir], t); - unsigned long rate; + unsigned long spent, rate; + + if (!ddir_rw(ddir)) + return; + spent = mtime_since(&ts->stat_sample_time[ddir], t); if (spent < td->o.bw_avg_time) return; -- 2.25.1