Merge branch 'global_dedup' of https://github.com/bardavid/fio
[fio.git] / dedupe.c
index fd116dfba4933396ee6a928dd87866d417cf67a1..8214a786b04800a6848a706437a77aafa6a27c3e 100644 (file)
--- a/dedupe.c
+++ b/dedupe.c
@@ -1,13 +1,37 @@
 #include "fio.h"
 
-int init_dedupe_working_set_seeds(struct thread_data *td)
+/**
+ * initializes the global dedup workset.
+ * this needs to be called after all jobs' seeds
+ * have been initialized
+ */
+int init_global_dedupe_working_set_seeds(void)
 {
-       unsigned long long i, j, num_seed_advancements;
+       int i;
+       struct thread_data *td;
+
+       for_each_td(td, i) {
+               if (!td->o.dedupe_global)
+                       continue;
+
+               if (init_dedupe_working_set_seeds(td, 1))
+                       return 1;
+       }
+
+       return 0;
+}
+
+int init_dedupe_working_set_seeds(struct thread_data *td, bool global_dedup)
+{
+       int tindex;
+       struct thread_data *td_seed;
+       unsigned long long i, j, num_seed_advancements, pages_per_seed;
        struct frand_state dedupe_working_set_state = {0};
 
        if (!td->o.dedupe_percentage || !(td->o.dedupe_mode == DEDUPE_MODE_WORKING_SET))
                return 0;
 
+       tindex = td->thread_number - 1;
        num_seed_advancements = td->o.min_bs[DDIR_WRITE] /
                min_not_zero(td->o.min_bs[DDIR_WRITE], (unsigned long long) td->o.compress_chunk);
        /*
@@ -20,9 +44,11 @@ int init_dedupe_working_set_seeds(struct thread_data *td)
                log_err("fio: could not allocate dedupe working set\n");
                return 1;
        }
+
        frand_copy(&dedupe_working_set_state, &td->buf_state);
-       for (i = 0; i < td->num_unique_pages; i++) {
-               frand_copy(&td->dedupe_working_set_states[i], &dedupe_working_set_state);
+       frand_copy(&td->dedupe_working_set_states[0], &dedupe_working_set_state);
+       pages_per_seed = max(td->num_unique_pages / thread_number, 1ull);
+       for (i = 1; i < td->num_unique_pages; i++) {
                /*
                 * When compression is used the seed is advanced multiple times to
                 * generate the buffer. We want to regenerate the same buffer when
@@ -30,6 +56,18 @@ int init_dedupe_working_set_seeds(struct thread_data *td)
                 */
                for (j = 0; j < num_seed_advancements; j++)
                        __get_next_seed(&dedupe_working_set_state);
+
+               /*
+                * When global dedup is used, we rotate the seeds to allow
+                * generating same buffers across different jobs. Deduplication buffers
+                * are spread evenly across jobs participating in global dedupe
+                */
+               if (global_dedup && i % pages_per_seed == 0) {
+                       td_seed = tnumber_to_td(++tindex % thread_number);
+                       frand_copy(&dedupe_working_set_state, &td_seed->buf_state);
+               }
+
+               frand_copy(&td->dedupe_working_set_states[i], &dedupe_working_set_state);
        }
 
        return 0;