Mixed dedup and compression

author Bar David <bardavvid@gmail.com>

Sun, 24 Oct 2021 10:59:50 +0000 (13:59 +0300)

committer Bar David <bardavvid@gmail.com>

Sun, 21 Nov 2021 07:16:59 +0000 (09:16 +0200)
author Bar David <bardavvid@gmail.com>
Sun, 24 Oct 2021 10:59:50 +0000 (13:59 +0300)
committer Bar David <bardavvid@gmail.com>
Sun, 21 Nov 2021 07:16:59 +0000 (09:16 +0200)
diff --git a/DEDUPE-TODO b/DEDUPE-TODO

index 1f3ee9da641455637f304a4c37e2270c7f3bd4f4..4b0bfd1d62ce26a5ddbe801662ec8c462f850306 100644 (file)
--- a/DEDUPE-TODO
+++ b/DEDUPE-TODO
@@ -1,6 +1,3 @@
-- Mixed buffers of dedupe-able and compressible data.
-  Major usecase in performance benchmarking of storage subsystems.
-
  - Shifted dedup-able data.
    Allow for dedup buffer generation to shift contents by random number
    of sectors (fill the gaps with uncompressible data). Some storage
diff --git a/dedupe.c b/dedupe.c

index 043a376c6757641b5f2817c21b6d9b819e037bbb..fd116dfba4933396ee6a928dd87866d417cf67a1 100644 (file)
--- a/dedupe.c
+++ b/dedupe.c
@@ -2,12 +2,14 @@
  
  int init_dedupe_working_set_seeds(struct thread_data *td)
  {
-       unsigned long long i;
+       unsigned long long i, j, num_seed_advancements;
         struct frand_state dedupe_working_set_state = {0};
  
         if (!td->o.dedupe_percentage || !(td->o.dedupe_mode == DEDUPE_MODE_WORKING_SET))
                 return 0;
  
+       num_seed_advancements = td->o.min_bs[DDIR_WRITE] /
+               min_not_zero(td->o.min_bs[DDIR_WRITE], (unsigned long long) td->o.compress_chunk);
         /*
          * The dedupe working set keeps seeds of unique data (generated by buf_state).
          * Dedupe-ed pages will be generated using those seeds.
@@ -21,7 +23,13 @@ int init_dedupe_working_set_seeds(struct thread_data *td)
         frand_copy(&dedupe_working_set_state, &td->buf_state);
         for (i = 0; i < td->num_unique_pages; i++) {
                 frand_copy(&td->dedupe_working_set_states[i], &dedupe_working_set_state);
-               __get_next_seed(&dedupe_working_set_state);
+               /*
+                * When compression is used the seed is advanced multiple times to
+                * generate the buffer. We want to regenerate the same buffer when
+                * deduping against this page
+                */
+               for (j = 0; j < num_seed_advancements; j++)
+                       __get_next_seed(&dedupe_working_set_state);
         }
  
         return 0;
diff --git a/io_u.c b/io_u.c

index 586a4befdce0969e596f9243abc73044b4b75fda..3c72d63d0d5368db1ecae9158371f99efb9a27e0 100644 (file)
--- a/io_u.c
+++ b/io_u.c
@@ -2230,27 +2230,30 @@ void fill_io_buffer(struct thread_data *td, void *buf, unsigned long long min_wr
  
         if (o->compress_percentage || o->dedupe_percentage) {
                 unsigned int perc = td->o.compress_percentage;
-               struct frand_state *rs;
+               struct frand_state *rs = NULL;
                 unsigned long long left = max_bs;
                 unsigned long long this_write;
  
                 do {
-                       rs = get_buf_state(td);
+                       /*
+                        * Buffers are either entirely dedupe-able or not.
+                        * If we choose to dedup, the buffer should undergo
+                        * the same manipulation as the original write. Which
+                        * means we should retrack the steps we took for compression
+                        * as well.
+                        */
+                       if (!rs)
+                               rs = get_buf_state(td);
  
                         min_write = min(min_write, left);
  
-                       if (perc) {
-                               this_write = min_not_zero(min_write,
-                                                       (unsigned long long) td->o.compress_chunk);
+                       this_write = min_not_zero(min_write,
+                                               (unsigned long long) td->o.compress_chunk);
  
-                               fill_random_buf_percentage(rs, buf, perc,
-                                       this_write, this_write,
-                                       o->buffer_pattern,
-                                       o->buffer_pattern_bytes);
-                       } else {
-                               fill_random_buf(rs, buf, min_write);
-                               this_write = min_write;
-                       }
+                       fill_random_buf_percentage(rs, buf, perc,
+                               this_write, this_write,
+                               o->buffer_pattern,
+                               o->buffer_pattern_bytes);
  
                         buf += this_write;
                         left -= this_write;
author	Bar David <bardavvid@gmail.com>
	Sun, 24 Oct 2021 10:59:50 +0000 (13:59 +0300)
committer	Bar David <bardavvid@gmail.com>
	Sun, 21 Nov 2021 07:16:59 +0000 (09:16 +0200)
DEDUPE-TODO		patch \| blob \| blame \| history
dedupe.c		patch \| blob \| blame \| history
io_u.c		patch \| blob \| blame \| history