Merge branch 'dedupe_and_compression' of https://github.com/bardavid/fio
authorJens Axboe <axboe@kernel.dk>
Sun, 21 Nov 2021 13:43:05 +0000 (06:43 -0700)
committerJens Axboe <axboe@kernel.dk>
Sun, 21 Nov 2021 13:43:05 +0000 (06:43 -0700)
* 'dedupe_and_compression' of https://github.com/bardavid/fio:
  fio-dedup: adjusted the binary to support compression
  Mixed dedup and compression

DEDUPE-TODO
dedupe.c
io_u.c
t/dedupe.c

index 1f3ee9da641455637f304a4c37e2270c7f3bd4f4..4b0bfd1d62ce26a5ddbe801662ec8c462f850306 100644 (file)
@@ -1,6 +1,3 @@
-- Mixed buffers of dedupe-able and compressible data.
-  Major usecase in performance benchmarking of storage subsystems.
-
 - Shifted dedup-able data.
   Allow for dedup buffer generation to shift contents by random number
   of sectors (fill the gaps with uncompressible data). Some storage
index 043a376c6757641b5f2817c21b6d9b819e037bbb..fd116dfba4933396ee6a928dd87866d417cf67a1 100644 (file)
--- a/dedupe.c
+++ b/dedupe.c
@@ -2,12 +2,14 @@
 
 int init_dedupe_working_set_seeds(struct thread_data *td)
 {
-       unsigned long long i;
+       unsigned long long i, j, num_seed_advancements;
        struct frand_state dedupe_working_set_state = {0};
 
        if (!td->o.dedupe_percentage || !(td->o.dedupe_mode == DEDUPE_MODE_WORKING_SET))
                return 0;
 
+       num_seed_advancements = td->o.min_bs[DDIR_WRITE] /
+               min_not_zero(td->o.min_bs[DDIR_WRITE], (unsigned long long) td->o.compress_chunk);
        /*
         * The dedupe working set keeps seeds of unique data (generated by buf_state).
         * Dedupe-ed pages will be generated using those seeds.
@@ -21,7 +23,13 @@ int init_dedupe_working_set_seeds(struct thread_data *td)
        frand_copy(&dedupe_working_set_state, &td->buf_state);
        for (i = 0; i < td->num_unique_pages; i++) {
                frand_copy(&td->dedupe_working_set_states[i], &dedupe_working_set_state);
-               __get_next_seed(&dedupe_working_set_state);
+               /*
+                * When compression is used the seed is advanced multiple times to
+                * generate the buffer. We want to regenerate the same buffer when
+                * deduping against this page
+                */
+               for (j = 0; j < num_seed_advancements; j++)
+                       __get_next_seed(&dedupe_working_set_state);
        }
 
        return 0;
diff --git a/io_u.c b/io_u.c
index 586a4befdce0969e596f9243abc73044b4b75fda..3c72d63d0d5368db1ecae9158371f99efb9a27e0 100644 (file)
--- a/io_u.c
+++ b/io_u.c
@@ -2230,27 +2230,30 @@ void fill_io_buffer(struct thread_data *td, void *buf, unsigned long long min_wr
 
        if (o->compress_percentage || o->dedupe_percentage) {
                unsigned int perc = td->o.compress_percentage;
-               struct frand_state *rs;
+               struct frand_state *rs = NULL;
                unsigned long long left = max_bs;
                unsigned long long this_write;
 
                do {
-                       rs = get_buf_state(td);
+                       /*
+                        * Buffers are either entirely dedupe-able or not.
+                        * If we choose to dedup, the buffer should undergo
+                        * the same manipulation as the original write. Which
+                        * means we should retrack the steps we took for compression
+                        * as well.
+                        */
+                       if (!rs)
+                               rs = get_buf_state(td);
 
                        min_write = min(min_write, left);
 
-                       if (perc) {
-                               this_write = min_not_zero(min_write,
-                                                       (unsigned long long) td->o.compress_chunk);
+                       this_write = min_not_zero(min_write,
+                                               (unsigned long long) td->o.compress_chunk);
 
-                               fill_random_buf_percentage(rs, buf, perc,
-                                       this_write, this_write,
-                                       o->buffer_pattern,
-                                       o->buffer_pattern_bytes);
-                       } else {
-                               fill_random_buf(rs, buf, min_write);
-                               this_write = min_write;
-                       }
+                       fill_random_buf_percentage(rs, buf, perc,
+                               this_write, this_write,
+                               o->buffer_pattern,
+                               o->buffer_pattern_bytes);
 
                        buf += this_write;
                        left -= this_write;
index 8b659c76c71caca57be3e80c9b4ed94077fb79a8..5f72ed76cf27a561a5f52483156b14460ca4c766 100644 (file)
 
 #include "../lib/bloom.h"
 #include "debug.h"
+#include "zlib.h"
+
+struct zlib_ctrl {
+       z_stream stream;
+       unsigned char *buf_in;
+       unsigned char *buf_out;
+};
 
 struct worker_thread {
+       struct zlib_ctrl zc;
        pthread_t thread;
-
-       volatile int done;
-
-       int fd;
        uint64_t cur_offset;
        uint64_t size;
-
+       unsigned long long unique_capacity;
        unsigned long items;
        unsigned long dupes;
        int err;
+       int fd;
+       volatile int done;
 };
 
 struct extent {
@@ -68,6 +74,7 @@ static unsigned int odirect;
 static unsigned int collision_check;
 static unsigned int print_progress = 1;
 static unsigned int use_bloom = 1;
+static unsigned int compression = 0;
 
 static uint64_t total_size;
 static uint64_t cur_offset;
@@ -135,6 +142,34 @@ static int read_block(int fd, void *buf, off_t offset)
        return __read_block(fd, buf, offset, blocksize);
 }
 
+static void account_unique_capacity(uint64_t offset, uint64_t *unique_capacity, struct zlib_ctrl *zc)
+{
+       z_stream *stream = &zc->stream;
+       unsigned int compressed_len;
+       int ret;
+
+       if (read_block(file.fd, zc->buf_in, offset))
+               return;
+
+       stream->next_in = zc->buf_in;
+       stream->avail_in = blocksize;
+       stream->avail_out = deflateBound(stream, blocksize);
+       stream->next_out = zc->buf_out;
+
+       ret = deflate(stream, Z_FINISH);
+       assert(ret != Z_STREAM_ERROR);
+       compressed_len = blocksize - stream->avail_out;
+
+       if (dump_output)
+               printf("offset 0x%lx compressed to %d blocksize %d ratio %.2f \n",
+                               (unsigned long) offset, compressed_len, blocksize,
+                               (float)compressed_len / (float)blocksize);
+
+       *unique_capacity += compressed_len;
+
+       deflateReset(stream);
+}
+
 static void add_item(struct chunk *c, struct item *i)
 {
        /*      
@@ -188,7 +223,7 @@ static struct chunk *alloc_chunk(void)
        return c;
 }
 
-static void insert_chunk(struct item *i)
+static void insert_chunk(struct item *i, uint64_t *unique_capacity, struct zlib_ctrl *zc)
 {
        struct fio_rb_node **p, *parent;
        struct chunk *c;
@@ -228,12 +263,14 @@ static void insert_chunk(struct item *i)
        memcpy(c->hash, i->hash, sizeof(i->hash));
        rb_link_node(&c->rb_node, parent, p);
        rb_insert_color(&c->rb_node, &rb_root);
+       if (compression)
+               account_unique_capacity(i->offset, unique_capacity, zc);
 add:
        add_item(c, i);
 }
 
 static void insert_chunks(struct item *items, unsigned int nitems,
-                         uint64_t *ndupes)
+                         uint64_t *ndupes, uint64_t *unique_capacity, struct zlib_ctrl *zc)
 {
        int i;
 
@@ -248,7 +285,7 @@ static void insert_chunks(struct item *items, unsigned int nitems,
                        r = bloom_set(bloom, items[i].hash, s);
                        *ndupes += r;
                } else
-                       insert_chunk(&items[i]);
+                       insert_chunk(&items[i], unique_capacity, zc);
        }
 
        fio_sem_up(rb_lock);
@@ -277,6 +314,7 @@ static int do_work(struct worker_thread *thread, void *buf)
        off_t offset;
        int nitems = 0;
        uint64_t ndupes = 0;
+       uint64_t unique_capacity = 0;
        struct item *items;
 
        offset = thread->cur_offset;
@@ -296,14 +334,29 @@ static int do_work(struct worker_thread *thread, void *buf)
                nitems++;
        }
 
-       insert_chunks(items, nitems, &ndupes);
+       insert_chunks(items, nitems, &ndupes, &unique_capacity, &thread->zc);
 
        free(items);
        thread->items += nitems;
        thread->dupes += ndupes;
+       thread->unique_capacity += unique_capacity;
        return 0;
 }
 
+static void thread_init_zlib_control(struct worker_thread *thread)
+{
+       z_stream *stream = &thread->zc.stream;
+       stream->zalloc = Z_NULL;
+       stream->zfree = Z_NULL;
+       stream->opaque = Z_NULL;
+
+       if (deflateInit(stream, Z_DEFAULT_COMPRESSION) != Z_OK)
+               return;
+
+       thread->zc.buf_in = fio_memalign(blocksize, blocksize, false);
+       thread->zc.buf_out = fio_memalign(blocksize, deflateBound(stream, blocksize), false);
+}
+
 static void *thread_fn(void *data)
 {
        struct worker_thread *thread = data;
@@ -311,6 +364,8 @@ static void *thread_fn(void *data)
 
        buf = fio_memalign(blocksize, chunk_size, false);
 
+       thread_init_zlib_control(thread);
+
        do {
                if (get_work(&thread->cur_offset, &thread->size)) {
                        thread->err = 1;
@@ -370,7 +425,7 @@ static void show_progress(struct worker_thread *threads, unsigned long total)
 }
 
 static int run_dedupe_threads(struct fio_file *f, uint64_t dev_size,
-                             uint64_t *nextents, uint64_t *nchunks)
+                             uint64_t *nextents, uint64_t *nchunks, uint64_t *unique_capacity)
 {
        struct worker_thread *threads;
        unsigned long nitems, total_items;
@@ -398,11 +453,13 @@ static int run_dedupe_threads(struct fio_file *f, uint64_t dev_size,
        nitems = 0;
        *nextents = 0;
        *nchunks = 1;
+       *unique_capacity = 0;
        for (i = 0; i < num_threads; i++) {
                void *ret;
                pthread_join(threads[i].thread, &ret);
                nitems += threads[i].items;
                *nchunks += threads[i].dupes;
+               *unique_capacity += threads[i].unique_capacity;
        }
 
        printf("Threads(%u): %lu items processed\n", num_threads, nitems);
@@ -416,7 +473,7 @@ static int run_dedupe_threads(struct fio_file *f, uint64_t dev_size,
 }
 
 static int dedupe_check(const char *filename, uint64_t *nextents,
-                       uint64_t *nchunks)
+                       uint64_t *nchunks, uint64_t *unique_capacity)
 {
        uint64_t dev_size;
        struct stat sb;
@@ -453,7 +510,7 @@ static int dedupe_check(const char *filename, uint64_t *nextents,
 
        printf("Will check <%s>, size <%llu>, using %u threads\n", filename, (unsigned long long) dev_size, num_threads);
 
-       return run_dedupe_threads(&file, dev_size, nextents, nchunks);
+       return run_dedupe_threads(&file, dev_size, nextents, nchunks, unique_capacity);
 err:
        if (file.fd != -1)
                close(file.fd);
@@ -473,9 +530,27 @@ static void show_chunk(struct chunk *c)
        }
 }
 
-static void show_stat(uint64_t nextents, uint64_t nchunks, uint64_t ndupextents)
+static const char *capacity_unit[] = {"b","KB", "MB", "GB", "TB", "PB", "EB"};
+
+static uint64_t bytes_to_human_readable_unit(uint64_t n, const char **unit_out)
+{
+       uint8_t i = 0;
+
+       while (n >= 1024) {
+               i++;
+               n /= 1024;
+       }
+
+       *unit_out = capacity_unit[i];
+
+       return n;
+}
+
+static void show_stat(uint64_t nextents, uint64_t nchunks, uint64_t ndupextents, uint64_t unique_capacity)
 {
        double perc, ratio;
+       const char *unit;
+       uint64_t uc_human;
 
        printf("Extents=%lu, Unique extents=%lu", (unsigned long) nextents, (unsigned long) nchunks);
        if (!bloom)
@@ -495,12 +570,16 @@ static void show_stat(uint64_t nextents, uint64_t nchunks, uint64_t ndupextents)
        perc *= 100.0;
        printf("Fio setting: dedupe_percentage=%u\n", (int) (perc + 0.50));
 
+
+       if (compression) {
+               uc_human = bytes_to_human_readable_unit(unique_capacity, &unit);
+               printf("Unique capacity %lu%s\n", (unsigned long) uc_human, unit);
+       }
 }
 
 static void iter_rb_tree(uint64_t *nextents, uint64_t *nchunks, uint64_t *ndupextents)
 {
        struct fio_rb_node *n;
-
        *nchunks = *nextents = *ndupextents = 0;
 
        n = rb_first(&rb_root);
@@ -532,18 +611,19 @@ static int usage(char *argv[])
        log_err("\t-c\tFull collision check\n");
        log_err("\t-B\tUse probabilistic bloom filter\n");
        log_err("\t-p\tPrint progress indicator\n");
+       log_err("\t-C\tCalculate compressible size\n");
        return 1;
 }
 
 int main(int argc, char *argv[])
 {
-       uint64_t nextents = 0, nchunks = 0, ndupextents = 0;
+       uint64_t nextents = 0, nchunks = 0, ndupextents = 0, unique_capacity;
        int c, ret;
 
        arch_init(argv);
        debug_init();
 
-       while ((c = getopt(argc, argv, "b:t:d:o:c:p:B:")) != -1) {
+       while ((c = getopt(argc, argv, "b:t:d:o:c:p:B:C:")) != -1) {
                switch (c) {
                case 'b':
                        blocksize = atoi(optarg);
@@ -566,13 +646,16 @@ int main(int argc, char *argv[])
                case 'B':
                        use_bloom = atoi(optarg);
                        break;
+               case 'C':
+                       compression = atoi(optarg);
+                       break;
                case '?':
                default:
                        return usage(argv);
                }
        }
 
-       if (collision_check || dump_output)
+       if (collision_check || dump_output || compression)
                use_bloom = 0;
 
        if (!num_threads)
@@ -586,13 +669,13 @@ int main(int argc, char *argv[])
        rb_root = RB_ROOT;
        rb_lock = fio_sem_init(FIO_SEM_UNLOCKED);
 
-       ret = dedupe_check(argv[optind], &nextents, &nchunks);
+       ret = dedupe_check(argv[optind], &nextents, &nchunks, &unique_capacity);
 
        if (!ret) {
                if (!bloom)
                        iter_rb_tree(&nextents, &nchunks, &ndupextents);
 
-               show_stat(nextents, nchunks, ndupextents);
+               show_stat(nextents, nchunks, ndupextents, unique_capacity);
        }
 
        fio_sem_remove(rb_lock);