From: Jens Axboe Date: Mon, 22 Sep 2014 16:02:07 +0000 (-0600) Subject: Basic support for dedupe X-Git-Tag: fio-2.1.13~58 X-Git-Url: https://git.kernel.dk/?p=fio.git;a=commitdiff_plain;h=5c94b00876437a27f1761e47437166780c3c3b93;hp=8fc46b4d03bc61c4e72b69c3bc59bd50c4943e03 Basic support for dedupe This adds and option, dedupe_percentage, that controls how many of the write IO buffers are identical. For instance, if this is set: dedupe_percentage=70 then 70% of the write IO buffers will have identical contents. The specific contents are, as before, controlled by the various options that set buffer contents or buffer compressibility. Signed-off-by: Jens Axboe --- diff --git a/HOWTO b/HOWTO index 73e58ff6..23746cec 100644 --- a/HOWTO +++ b/HOWTO @@ -565,12 +565,20 @@ buffer_compress_chunk=int See buffer_compress_percentage. This alternate random and zeroed data throughout the IO buffer. -buffer_pattern=str If set, fio will fill the io buffers with this pattern. - If not set, the contents of io buffers is defined by the other - options related to buffer contents. The setting can be any - pattern of bytes, and can be prefixed with 0x for hex values. - It may also be a string, where the string must then be - wrapped with "". +buffer_pattern=str If set, fio will fill the io buffers with this + pattern. If not set, the contents of io buffers is defined by + the other options related to buffer contents. The setting can + be any pattern of bytes, and can be prefixed with 0x for hex + values. It may also be a string, where the string must then + be wrapped with "". + +dedupe_percentage=int If set, fio will generate this percentage of + identical buffers when writing. These buffers will be + naturally dedupable. The contents of the buffers depend on + what other buffer compression settings have been set. It's + possible to have the individual buffers either fully + compressible, or not at all. This option only controls the + distribution of unique buffers. nrfiles=int Number of files to use for this job. Defaults to 1. diff --git a/cconv.c b/cconv.c index d4fb1588..4a40ed0d 100644 --- a/cconv.c +++ b/cconv.c @@ -241,6 +241,7 @@ void convert_thread_options_to_cpu(struct thread_options *o, o->latency_percentile.u.f = fio_uint64_to_double(le64_to_cpu(top->latency_percentile.u.i)); o->compress_percentage = le32_to_cpu(top->compress_percentage); o->compress_chunk = le32_to_cpu(top->compress_chunk); + o->dedupe_percentage = le32_to_cpu(top->dedupe_percentage); o->trim_backlog = le64_to_cpu(top->trim_backlog); @@ -401,6 +402,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top, top->latency_percentile.u.i = __cpu_to_le64(fio_double_to_uint64(o->latency_percentile.u.f)); top->compress_percentage = cpu_to_le32(o->compress_percentage); top->compress_chunk = cpu_to_le32(o->compress_chunk); + top->dedupe_percentage = cpu_to_le32(o->dedupe_percentage); for (i = 0; i < DDIR_RWDIR_CNT; i++) { top->bs[i] = cpu_to_le32(o->bs[i]); diff --git a/fio.1 b/fio.1 index e3334bd2..bc6c9fa6 100644 --- a/fio.1 +++ b/fio.1 @@ -481,6 +481,13 @@ setting can be any pattern of bytes, and can be prefixed with 0x for hex values. It may also be a string, where the string must then be wrapped with "". .TP +.BI dedupe_percentage \fR=\fPint +If set, fio will generate this percentage of identical buffers when writing. +These buffers will be naturally dedupable. The contents of the buffers depend +on what other buffer compression settings have been set. It's possible to have +the individual buffers either fully compressible, or not at all. This option +only controls the distribution of unique buffers. +.TP .BI nrfiles \fR=\fPint Number of files to use for this job. Default: 1. .TP diff --git a/fio.h b/fio.h index dfbad6d6..136b4308 100644 --- a/fio.h +++ b/fio.h @@ -89,6 +89,7 @@ enum { FIO_RAND_SEQ_RAND_WRITE_OFF, FIO_RAND_SEQ_RAND_TRIM_OFF, FIO_RAND_START_DELAY, + FIO_DEDUPE_OFF, FIO_RAND_NR_OFFS, }; @@ -177,6 +178,8 @@ struct thread_data { }; struct frand_state buf_state; + struct frand_state buf_state_prev; + struct frand_state dedupe_state; unsigned int verify_batch; unsigned int trim_batch; diff --git a/init.c b/init.c index 62c7dc24..5b0290d0 100644 --- a/init.c +++ b/init.c @@ -836,7 +836,9 @@ static void td_fill_rand_seeds_internal(struct thread_data *td) void td_fill_rand_seeds(struct thread_data *td) { if (td->o.allrand_repeatable) { - for (int i = 0; i < FIO_RAND_NR_OFFS; i++) + unsigned int i; + + for (i = 0; i < FIO_RAND_NR_OFFS; i++) td->rand_seeds[i] = FIO_RANDSEED * td->thread_number + i; } @@ -847,6 +849,9 @@ void td_fill_rand_seeds(struct thread_data *td) td_fill_rand_seeds_internal(td); init_rand_seed(&td->buf_state, td->rand_seeds[FIO_RAND_BUF_OFF]); + frand_copy(&td->buf_state_prev, &td->buf_state); + + init_rand_seed(&td->dedupe_state, td->rand_seeds[FIO_DEDUPE_OFF]); } /* diff --git a/io_u.c b/io_u.c index 7cbdb915..af3b4151 100644 --- a/io_u.c +++ b/io_u.c @@ -1828,6 +1828,32 @@ void io_u_queued(struct thread_data *td, struct io_u *io_u) } } +/* + * See if we should reuse the last seed, if dedupe is enabled + */ +static struct frand_state *get_buf_state(struct thread_data *td) +{ + unsigned int v; + unsigned long r; + + if (!td->o.dedupe_percentage) + return &td->buf_state; + + r = __rand(&td->dedupe_state); + v = 1 + (int) (100.0 * (r / (FRAND_MAX + 1.0))); + + if (v <= td->o.dedupe_percentage) + return &td->buf_state_prev; + + return &td->buf_state; +} + +static void save_buf_state(struct thread_data *td, struct frand_state *rs) +{ + if (rs == &td->buf_state) + frand_copy(&td->buf_state_prev, rs); +} + void fill_io_buffer(struct thread_data *td, void *buf, unsigned int min_write, unsigned int max_bs) { @@ -1835,6 +1861,9 @@ void fill_io_buffer(struct thread_data *td, void *buf, unsigned int min_write, fill_buffer_pattern(td, buf, max_bs); else if (!td->o.zero_buffers) { unsigned int perc = td->o.compress_percentage; + struct frand_state *rs; + + rs = get_buf_state(td); if (perc) { unsigned int seg = min_write; @@ -1843,10 +1872,12 @@ void fill_io_buffer(struct thread_data *td, void *buf, unsigned int min_write, if (!seg) seg = min_write; - fill_random_buf_percentage(&td->buf_state, buf, - perc, seg, max_bs); - } else - fill_random_buf(&td->buf_state, buf, max_bs); + fill_random_buf_percentage(rs, buf, perc, seg,max_bs); + save_buf_state(td, rs); + } else { + fill_random_buf(rs, buf, max_bs); + save_buf_state(td, rs); + } } else memset(buf, 0, max_bs); } diff --git a/lib/rand.h b/lib/rand.h index d62ebe5f..8c35ab1f 100644 --- a/lib/rand.h +++ b/lib/rand.h @@ -7,6 +7,14 @@ struct frand_state { unsigned int s1, s2, s3; }; +static inline void frand_copy(struct frand_state *dst, + struct frand_state *src) +{ + dst->s1 = src->s1; + dst->s2 = src->s2; + dst->s3 = src->s3; +} + static inline unsigned int __rand(struct frand_state *state) { #define TAUSWORTHE(s,a,b,c,d) ((s&c)<>b) diff --git a/options.c b/options.c index ce955133..593f7171 100644 --- a/options.c +++ b/options.c @@ -1050,6 +1050,16 @@ static int str_buffer_compress_cb(void *data, unsigned long long *il) return 0; } +static int str_dedupe_cb(void *data, unsigned long long *il) +{ + struct thread_data *td = data; + + td->flags |= TD_F_COMPRESS; + td->o.dedupe_percentage = *il; + td->o.refill_buffers = 1; + return 0; +} + static int str_verify_pattern_cb(void *data, const char *input) { struct thread_data *td = data; @@ -3256,6 +3266,18 @@ struct fio_option fio_options[FIO_MAX_OPTS] = { .category = FIO_OPT_C_IO, .group = FIO_OPT_G_IO_BUF, }, + { + .name = "dedupe_percentage", + .lname = "Dedupe percentage", + .type = FIO_OPT_INT, + .cb = str_dedupe_cb, + .maxval = 100, + .minval = 0, + .help = "Percentage of buffers that are dedupable", + .interval = 1, + .category = FIO_OPT_C_IO, + .group = FIO_OPT_G_IO_BUF, + }, { .name = "clat_percentiles", .lname = "Completion latency percentiles", diff --git a/server.h b/server.h index cc4c5b43..1b131b92 100644 --- a/server.h +++ b/server.h @@ -38,7 +38,7 @@ struct fio_net_cmd_reply { }; enum { - FIO_SERVER_VER = 35, + FIO_SERVER_VER = 36, FIO_SERVER_MAX_FRAGMENT_PDU = 1024, FIO_SERVER_MAX_CMD_MB = 2048, diff --git a/thread_options.h b/thread_options.h index e545a8fd..a45d7b79 100644 --- a/thread_options.h +++ b/thread_options.h @@ -184,6 +184,7 @@ struct thread_options { unsigned int buffer_pattern_bytes; unsigned int compress_percentage; unsigned int compress_chunk; + unsigned int dedupe_percentage; unsigned int time_based; unsigned int disable_lat; unsigned int disable_clat; @@ -403,8 +404,9 @@ struct thread_options_pack { uint32_t scramble_buffers; uint8_t buffer_pattern[MAX_PATTERN_SIZE]; uint32_t buffer_pattern_bytes; - unsigned int compress_percentage; - unsigned int compress_chunk; + uint32_t compress_percentage; + uint32_t compress_chunk; + uint32_t dedupe_percentage; uint32_t time_based; uint32_t disable_lat; uint32_t disable_clat;