From eb57e710557147c880ade7568538ad7435a6f0dd Mon Sep 17 00:00:00 2001 From: Bar David Date: Sun, 24 Oct 2021 13:59:50 +0300 Subject: [PATCH] Mixed dedup and compression Introducing support for dedupe and compression on the same job. When used together, compression is calculated from unique capacity. E.g. when using dedupe_percentage=50 and buffer_compress_percentage=50, then total reduction should be 75% - 50% would be deduped while 50% of the remaining buffers would be compressed Signed-off-by: Bar David --- DEDUPE-TODO | 3 --- dedupe.c | 12 ++++++++++-- io_u.c | 29 ++++++++++++++++------------- 3 files changed, 26 insertions(+), 18 deletions(-) diff --git a/DEDUPE-TODO b/DEDUPE-TODO index 1f3ee9da..4b0bfd1d 100644 --- a/DEDUPE-TODO +++ b/DEDUPE-TODO @@ -1,6 +1,3 @@ -- Mixed buffers of dedupe-able and compressible data. - Major usecase in performance benchmarking of storage subsystems. - - Shifted dedup-able data. Allow for dedup buffer generation to shift contents by random number of sectors (fill the gaps with uncompressible data). Some storage diff --git a/dedupe.c b/dedupe.c index 043a376c..fd116dfb 100644 --- a/dedupe.c +++ b/dedupe.c @@ -2,12 +2,14 @@ int init_dedupe_working_set_seeds(struct thread_data *td) { - unsigned long long i; + unsigned long long i, j, num_seed_advancements; struct frand_state dedupe_working_set_state = {0}; if (!td->o.dedupe_percentage || !(td->o.dedupe_mode == DEDUPE_MODE_WORKING_SET)) return 0; + num_seed_advancements = td->o.min_bs[DDIR_WRITE] / + min_not_zero(td->o.min_bs[DDIR_WRITE], (unsigned long long) td->o.compress_chunk); /* * The dedupe working set keeps seeds of unique data (generated by buf_state). * Dedupe-ed pages will be generated using those seeds. @@ -21,7 +23,13 @@ int init_dedupe_working_set_seeds(struct thread_data *td) frand_copy(&dedupe_working_set_state, &td->buf_state); for (i = 0; i < td->num_unique_pages; i++) { frand_copy(&td->dedupe_working_set_states[i], &dedupe_working_set_state); - __get_next_seed(&dedupe_working_set_state); + /* + * When compression is used the seed is advanced multiple times to + * generate the buffer. We want to regenerate the same buffer when + * deduping against this page + */ + for (j = 0; j < num_seed_advancements; j++) + __get_next_seed(&dedupe_working_set_state); } return 0; diff --git a/io_u.c b/io_u.c index 586a4bef..3c72d63d 100644 --- a/io_u.c +++ b/io_u.c @@ -2230,27 +2230,30 @@ void fill_io_buffer(struct thread_data *td, void *buf, unsigned long long min_wr if (o->compress_percentage || o->dedupe_percentage) { unsigned int perc = td->o.compress_percentage; - struct frand_state *rs; + struct frand_state *rs = NULL; unsigned long long left = max_bs; unsigned long long this_write; do { - rs = get_buf_state(td); + /* + * Buffers are either entirely dedupe-able or not. + * If we choose to dedup, the buffer should undergo + * the same manipulation as the original write. Which + * means we should retrack the steps we took for compression + * as well. + */ + if (!rs) + rs = get_buf_state(td); min_write = min(min_write, left); - if (perc) { - this_write = min_not_zero(min_write, - (unsigned long long) td->o.compress_chunk); + this_write = min_not_zero(min_write, + (unsigned long long) td->o.compress_chunk); - fill_random_buf_percentage(rs, buf, perc, - this_write, this_write, - o->buffer_pattern, - o->buffer_pattern_bytes); - } else { - fill_random_buf(rs, buf, min_write); - this_write = min_write; - } + fill_random_buf_percentage(rs, buf, perc, + this_write, this_write, + o->buffer_pattern, + o->buffer_pattern_bytes); buf += this_write; left -= this_write; -- 2.25.1