Mixed dedup and compression
authorBar David <bardavvid@gmail.com>
Sun, 24 Oct 2021 10:59:50 +0000 (13:59 +0300)
committerBar David <bardavvid@gmail.com>
Sun, 21 Nov 2021 07:16:59 +0000 (09:16 +0200)
Introducing support for dedupe and compression
on the same job. When used together, compression is
calculated from unique capacity. E.g. when using
dedupe_percentage=50 and buffer_compress_percentage=50,
then total reduction should be 75% - 50% would be deduped
while 50% of the remaining buffers would be compressed

Signed-off-by: Bar David <bardavvid@gmail.com>
DEDUPE-TODO
dedupe.c
io_u.c

index 1f3ee9da641455637f304a4c37e2270c7f3bd4f4..4b0bfd1d62ce26a5ddbe801662ec8c462f850306 100644 (file)
@@ -1,6 +1,3 @@
-- Mixed buffers of dedupe-able and compressible data.
-  Major usecase in performance benchmarking of storage subsystems.
-
 - Shifted dedup-able data.
   Allow for dedup buffer generation to shift contents by random number
   of sectors (fill the gaps with uncompressible data). Some storage
index 043a376c6757641b5f2817c21b6d9b819e037bbb..fd116dfba4933396ee6a928dd87866d417cf67a1 100644 (file)
--- a/dedupe.c
+++ b/dedupe.c
@@ -2,12 +2,14 @@
 
 int init_dedupe_working_set_seeds(struct thread_data *td)
 {
-       unsigned long long i;
+       unsigned long long i, j, num_seed_advancements;
        struct frand_state dedupe_working_set_state = {0};
 
        if (!td->o.dedupe_percentage || !(td->o.dedupe_mode == DEDUPE_MODE_WORKING_SET))
                return 0;
 
+       num_seed_advancements = td->o.min_bs[DDIR_WRITE] /
+               min_not_zero(td->o.min_bs[DDIR_WRITE], (unsigned long long) td->o.compress_chunk);
        /*
         * The dedupe working set keeps seeds of unique data (generated by buf_state).
         * Dedupe-ed pages will be generated using those seeds.
@@ -21,7 +23,13 @@ int init_dedupe_working_set_seeds(struct thread_data *td)
        frand_copy(&dedupe_working_set_state, &td->buf_state);
        for (i = 0; i < td->num_unique_pages; i++) {
                frand_copy(&td->dedupe_working_set_states[i], &dedupe_working_set_state);
-               __get_next_seed(&dedupe_working_set_state);
+               /*
+                * When compression is used the seed is advanced multiple times to
+                * generate the buffer. We want to regenerate the same buffer when
+                * deduping against this page
+                */
+               for (j = 0; j < num_seed_advancements; j++)
+                       __get_next_seed(&dedupe_working_set_state);
        }
 
        return 0;
diff --git a/io_u.c b/io_u.c
index 586a4befdce0969e596f9243abc73044b4b75fda..3c72d63d0d5368db1ecae9158371f99efb9a27e0 100644 (file)
--- a/io_u.c
+++ b/io_u.c
@@ -2230,27 +2230,30 @@ void fill_io_buffer(struct thread_data *td, void *buf, unsigned long long min_wr
 
        if (o->compress_percentage || o->dedupe_percentage) {
                unsigned int perc = td->o.compress_percentage;
-               struct frand_state *rs;
+               struct frand_state *rs = NULL;
                unsigned long long left = max_bs;
                unsigned long long this_write;
 
                do {
-                       rs = get_buf_state(td);
+                       /*
+                        * Buffers are either entirely dedupe-able or not.
+                        * If we choose to dedup, the buffer should undergo
+                        * the same manipulation as the original write. Which
+                        * means we should retrack the steps we took for compression
+                        * as well.
+                        */
+                       if (!rs)
+                               rs = get_buf_state(td);
 
                        min_write = min(min_write, left);
 
-                       if (perc) {
-                               this_write = min_not_zero(min_write,
-                                                       (unsigned long long) td->o.compress_chunk);
+                       this_write = min_not_zero(min_write,
+                                               (unsigned long long) td->o.compress_chunk);
 
-                               fill_random_buf_percentage(rs, buf, perc,
-                                       this_write, this_write,
-                                       o->buffer_pattern,
-                                       o->buffer_pattern_bytes);
-                       } else {
-                               fill_random_buf(rs, buf, min_write);
-                               this_write = min_write;
-                       }
+                       fill_random_buf_percentage(rs, buf, perc,
+                               this_write, this_write,
+                               o->buffer_pattern,
+                               o->buffer_pattern_bytes);
 
                        buf += this_write;
                        left -= this_write;