Introducing support for generation of dedup buffers
authorBar David <bardavvid@gmail.com>
Sun, 24 Apr 2022 09:25:57 +0000 (12:25 +0300)
committerBar David <bardavvid@gmail.com>
Wed, 27 Apr 2022 06:15:33 +0000 (09:15 +0300)
across jobs. The dedup buffers are spread evenly
between the jobs that enabled the dedupe_global option

Note only dedupe_mode=working_set is supported.
Note compression is supported with the global dedup enabled

Signed-off-by: Bar David <bardavvid@gmail.com>
HOWTO.rst
backend.c
cconv.c
dedupe.c
dedupe.h
fio.1
init.c
options.c
server.h
thread_options.h

index a5fa432e4b59caa4838eb24fe5687aa2a13e43c1..6a3e09f51938788e4a272cc958a02d4e4c845558 100644 (file)
--- a/HOWTO.rst
+++ b/HOWTO.rst
@@ -1749,6 +1749,12 @@ Buffers and memory
        Note that size needs to be explicitly provided and only 1 file per
        job is supported
 
+.. option:: dedupe_global=bool
+
+       This controls whether the deduplication buffers will be shared amongst
+       all jobs that have this option set. The buffers are spread evenly between
+       participating jobs.
+
 .. option:: invalidate=bool
 
        Invalidate the buffer/page cache parts of the files to be used prior to
index 317e4f6c0d642d26c7f70ac2ab93aa33851082c2..ffbb7e2a0718b3049aaa55caec4780781bfee8ea 100644 (file)
--- a/backend.c
+++ b/backend.c
@@ -2570,6 +2570,11 @@ int fio_backend(struct sk_out *sk_out)
                setup_log(&agg_io_log[DDIR_TRIM], &p, "agg-trim_bw.log");
        }
 
+       if (init_global_dedupe_working_set_seeds()) {
+               log_err("fio: failed to initialize global dedupe working set\n");
+               return 1;
+       }
+
        startup_sem = fio_sem_init(FIO_SEM_LOCKED);
        if (!sk_out)
                is_local_backend = true;
diff --git a/cconv.c b/cconv.c
index 62d02e366e06473da2b74b55a090d0e46499af16..6c36afb72d1eefaa9badaf3262262a26a510e307 100644 (file)
--- a/cconv.c
+++ b/cconv.c
@@ -305,6 +305,7 @@ void convert_thread_options_to_cpu(struct thread_options *o,
        o->dedupe_percentage = le32_to_cpu(top->dedupe_percentage);
        o->dedupe_mode = le32_to_cpu(top->dedupe_mode);
        o->dedupe_working_set_percentage = le32_to_cpu(top->dedupe_working_set_percentage);
+       o->dedupe_global = le32_to_cpu(top->dedupe_global);
        o->block_error_hist = le32_to_cpu(top->block_error_hist);
        o->replay_align = le32_to_cpu(top->replay_align);
        o->replay_scale = le32_to_cpu(top->replay_scale);
@@ -513,6 +514,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
        top->dedupe_percentage = cpu_to_le32(o->dedupe_percentage);
        top->dedupe_mode = cpu_to_le32(o->dedupe_mode);
        top->dedupe_working_set_percentage = cpu_to_le32(o->dedupe_working_set_percentage);
+       top->dedupe_global = cpu_to_le32(o->dedupe_global);
        top->block_error_hist = cpu_to_le32(o->block_error_hist);
        top->replay_align = cpu_to_le32(o->replay_align);
        top->replay_scale = cpu_to_le32(o->replay_scale);
index fd116dfba4933396ee6a928dd87866d417cf67a1..8214a786b04800a6848a706437a77aafa6a27c3e 100644 (file)
--- a/dedupe.c
+++ b/dedupe.c
@@ -1,13 +1,37 @@
 #include "fio.h"
 
-int init_dedupe_working_set_seeds(struct thread_data *td)
+/**
+ * initializes the global dedup workset.
+ * this needs to be called after all jobs' seeds
+ * have been initialized
+ */
+int init_global_dedupe_working_set_seeds(void)
 {
-       unsigned long long i, j, num_seed_advancements;
+       int i;
+       struct thread_data *td;
+
+       for_each_td(td, i) {
+               if (!td->o.dedupe_global)
+                       continue;
+
+               if (init_dedupe_working_set_seeds(td, 1))
+                       return 1;
+       }
+
+       return 0;
+}
+
+int init_dedupe_working_set_seeds(struct thread_data *td, bool global_dedup)
+{
+       int tindex;
+       struct thread_data *td_seed;
+       unsigned long long i, j, num_seed_advancements, pages_per_seed;
        struct frand_state dedupe_working_set_state = {0};
 
        if (!td->o.dedupe_percentage || !(td->o.dedupe_mode == DEDUPE_MODE_WORKING_SET))
                return 0;
 
+       tindex = td->thread_number - 1;
        num_seed_advancements = td->o.min_bs[DDIR_WRITE] /
                min_not_zero(td->o.min_bs[DDIR_WRITE], (unsigned long long) td->o.compress_chunk);
        /*
@@ -20,9 +44,11 @@ int init_dedupe_working_set_seeds(struct thread_data *td)
                log_err("fio: could not allocate dedupe working set\n");
                return 1;
        }
+
        frand_copy(&dedupe_working_set_state, &td->buf_state);
-       for (i = 0; i < td->num_unique_pages; i++) {
-               frand_copy(&td->dedupe_working_set_states[i], &dedupe_working_set_state);
+       frand_copy(&td->dedupe_working_set_states[0], &dedupe_working_set_state);
+       pages_per_seed = max(td->num_unique_pages / thread_number, 1ull);
+       for (i = 1; i < td->num_unique_pages; i++) {
                /*
                 * When compression is used the seed is advanced multiple times to
                 * generate the buffer. We want to regenerate the same buffer when
@@ -30,6 +56,18 @@ int init_dedupe_working_set_seeds(struct thread_data *td)
                 */
                for (j = 0; j < num_seed_advancements; j++)
                        __get_next_seed(&dedupe_working_set_state);
+
+               /*
+                * When global dedup is used, we rotate the seeds to allow
+                * generating same buffers across different jobs. Deduplication buffers
+                * are spread evenly across jobs participating in global dedupe
+                */
+               if (global_dedup && i % pages_per_seed == 0) {
+                       td_seed = tnumber_to_td(++tindex % thread_number);
+                       frand_copy(&dedupe_working_set_state, &td_seed->buf_state);
+               }
+
+               frand_copy(&td->dedupe_working_set_states[i], &dedupe_working_set_state);
        }
 
        return 0;
index d4c4dc377905685c7973c131b798d333f78023d7..bd1f9c0c0bd1de75bd30fbcf231fc84d5192a695 100644 (file)
--- a/dedupe.h
+++ b/dedupe.h
@@ -1,6 +1,7 @@
 #ifndef DEDUPE_H
 #define DEDUPE_H
 
-int init_dedupe_working_set_seeds(struct thread_data *td);
+int init_dedupe_working_set_seeds(struct thread_data *td, bool global_dedupe);
+int init_global_dedupe_working_set_seeds(void);
 
 #endif
diff --git a/fio.1 b/fio.1
index a2ec836ff3976403d950891514e13ea3145b2ffb..609947dc41dede5a4f0da2fc8d67aa5b6386b6c2 100644 (file)
--- a/fio.1
+++ b/fio.1
@@ -1553,6 +1553,15 @@ Note that \fBsize\fR needs to be explicitly provided and only 1 file
 per job is supported
 .RE
 .TP
+.BI dedupe_global \fR=\fPbool
+This controls whether the deduplication buffers will be shared amongst
+all jobs that have this option set. The buffers are spread evenly between
+participating jobs.
+.P
+.RS
+Note that \fBdedupe_mode\fR must be set to \fBworking_set\fR for this to work.
+Can be used in combination with compression
+.TP
 .BI invalidate \fR=\fPbool
 Invalidate the buffer/page cache parts of the files to be used prior to
 starting I/O if the platform and file type support it. Defaults to true.
diff --git a/init.c b/init.c
index 6f1860518f689814913209ceb2f836b428bc41fb..f7d702f849c7c39797912c8d7774b0fc6dccf908 100644 (file)
--- a/init.c
+++ b/init.c
@@ -1541,7 +1541,7 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
        if (fixup_options(td))
                goto err;
 
-       if (init_dedupe_working_set_seeds(td))
+       if (!td->o.dedupe_global && init_dedupe_working_set_seeds(td, 0))
                goto err;
 
        /*
index e06d9b66adf69abf56ccfcf7ec8cb941e710e980..3b83573bd87e20c42486b4c3598e888d0e83da04 100644 (file)
--- a/options.c
+++ b/options.c
@@ -4665,6 +4665,16 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                .category = FIO_OPT_C_IO,
                .group  = FIO_OPT_G_IO_BUF,
        },
+       {
+               .name   = "dedupe_global",
+               .lname  = "Global deduplication",
+               .type   = FIO_OPT_BOOL,
+               .off1   = offsetof(struct thread_options, dedupe_global),
+               .help   = "Share deduplication buffers across jobs",
+               .def    = "0",
+               .category = FIO_OPT_C_IO,
+               .group  = FIO_OPT_G_IO_BUF,
+       },
        {
                .name   = "dedupe_mode",
                .lname  = "Dedupe mode",
index 0e62b6dfe8eac2df8aba17abe843ef99b800d538..b0c5e2dfafa0f8359b7e68d4be63db2ff48a614f 100644 (file)
--- a/server.h
+++ b/server.h
@@ -51,7 +51,7 @@ struct fio_net_cmd_reply {
 };
 
 enum {
-       FIO_SERVER_VER                  = 96,
+       FIO_SERVER_VER                  = 97,
 
        FIO_SERVER_MAX_FRAGMENT_PDU     = 1024,
        FIO_SERVER_MAX_CMD_MB           = 2048,
index 4162c42faf731db6ec1d434656032d31e8350777..634070af00eca3f9cfc3b5ed7e669b1ff240a7ad 100644 (file)
@@ -263,6 +263,7 @@ struct thread_options {
        unsigned int dedupe_percentage;
        unsigned int dedupe_mode;
        unsigned int dedupe_working_set_percentage;
+       unsigned int dedupe_global;
        unsigned int time_based;
        unsigned int disable_lat;
        unsigned int disable_clat;
@@ -578,6 +579,7 @@ struct thread_options_pack {
        uint32_t dedupe_percentage;
        uint32_t dedupe_mode;
        uint32_t dedupe_working_set_percentage;
+       uint32_t dedupe_global;
        uint32_t time_based;
        uint32_t disable_lat;
        uint32_t disable_clat;
@@ -596,6 +598,7 @@ struct thread_options_pack {
        uint32_t lat_percentiles;
        uint32_t slat_percentiles;
        uint32_t percentile_precision;
+       uint32_t pad5;
        fio_fp64_t percentile_list[FIO_IO_U_LIST_MAX_LEN];
 
        uint8_t read_iolog_file[FIO_TOP_STR_MAX];