Merge branch 'master' of https://github.com/celestinechen/fio
[fio.git] / dedupe.c
... / ...
CommitLineData
1#include "fio.h"
2
3/**
4 * initializes the global dedup workset.
5 * this needs to be called after all jobs' seeds
6 * have been initialized
7 */
8int init_global_dedupe_working_set_seeds(void)
9{
10 int i;
11 struct thread_data *td;
12
13 for_each_td(td, i) {
14 if (!td->o.dedupe_global)
15 continue;
16
17 if (init_dedupe_working_set_seeds(td, 1))
18 return 1;
19 }
20
21 return 0;
22}
23
24int init_dedupe_working_set_seeds(struct thread_data *td, bool global_dedup)
25{
26 int tindex;
27 struct thread_data *td_seed;
28 unsigned long long i, j, num_seed_advancements, pages_per_seed;
29 struct frand_state dedupe_working_set_state = {0};
30
31 if (!td->o.dedupe_percentage || !(td->o.dedupe_mode == DEDUPE_MODE_WORKING_SET))
32 return 0;
33
34 tindex = td->thread_number - 1;
35 num_seed_advancements = td->o.min_bs[DDIR_WRITE] /
36 min_not_zero(td->o.min_bs[DDIR_WRITE], (unsigned long long) td->o.compress_chunk);
37 /*
38 * The dedupe working set keeps seeds of unique data (generated by buf_state).
39 * Dedupe-ed pages will be generated using those seeds.
40 */
41 td->num_unique_pages = (td->o.size * (unsigned long long)td->o.dedupe_working_set_percentage / 100) / td->o.min_bs[DDIR_WRITE];
42 td->dedupe_working_set_states = malloc(sizeof(struct frand_state) * td->num_unique_pages);
43 if (!td->dedupe_working_set_states) {
44 log_err("fio: could not allocate dedupe working set\n");
45 return 1;
46 }
47
48 frand_copy(&dedupe_working_set_state, &td->buf_state);
49 frand_copy(&td->dedupe_working_set_states[0], &dedupe_working_set_state);
50 pages_per_seed = max(td->num_unique_pages / thread_number, 1ull);
51 for (i = 1; i < td->num_unique_pages; i++) {
52 /*
53 * When compression is used the seed is advanced multiple times to
54 * generate the buffer. We want to regenerate the same buffer when
55 * deduping against this page
56 */
57 for (j = 0; j < num_seed_advancements; j++)
58 __get_next_seed(&dedupe_working_set_state);
59
60 /*
61 * When global dedup is used, we rotate the seeds to allow
62 * generating same buffers across different jobs. Deduplication buffers
63 * are spread evenly across jobs participating in global dedupe
64 */
65 if (global_dedup && i % pages_per_seed == 0) {
66 td_seed = tnumber_to_td(++tindex % thread_number);
67 frand_copy(&dedupe_working_set_state, &td_seed->buf_state);
68 }
69
70 frand_copy(&td->dedupe_working_set_states[i], &dedupe_working_set_state);
71 }
72
73 return 0;
74}