Merge branch 'master' of https://github.com/celestinechen/fio
[fio.git] / dedupe.c
CommitLineData
0d71aa98
BD
1#include "fio.h"
2
c49cfc76
BD
3/**
4 * initializes the global dedup workset.
5 * this needs to be called after all jobs' seeds
6 * have been initialized
7 */
8int init_global_dedupe_working_set_seeds(void)
0d71aa98 9{
da8f124f 10 for_each_td(td) {
c49cfc76
BD
11 if (!td->o.dedupe_global)
12 continue;
13
14 if (init_dedupe_working_set_seeds(td, 1))
15 return 1;
da8f124f 16 } end_for_each();
c49cfc76
BD
17
18 return 0;
19}
20
21int init_dedupe_working_set_seeds(struct thread_data *td, bool global_dedup)
22{
23 int tindex;
24 struct thread_data *td_seed;
25 unsigned long long i, j, num_seed_advancements, pages_per_seed;
0d71aa98
BD
26 struct frand_state dedupe_working_set_state = {0};
27
28 if (!td->o.dedupe_percentage || !(td->o.dedupe_mode == DEDUPE_MODE_WORKING_SET))
29 return 0;
30
c49cfc76 31 tindex = td->thread_number - 1;
eb57e710
BD
32 num_seed_advancements = td->o.min_bs[DDIR_WRITE] /
33 min_not_zero(td->o.min_bs[DDIR_WRITE], (unsigned long long) td->o.compress_chunk);
0d71aa98
BD
34 /*
35 * The dedupe working set keeps seeds of unique data (generated by buf_state).
36 * Dedupe-ed pages will be generated using those seeds.
37 */
38 td->num_unique_pages = (td->o.size * (unsigned long long)td->o.dedupe_working_set_percentage / 100) / td->o.min_bs[DDIR_WRITE];
39 td->dedupe_working_set_states = malloc(sizeof(struct frand_state) * td->num_unique_pages);
40 if (!td->dedupe_working_set_states) {
41 log_err("fio: could not allocate dedupe working set\n");
42 return 1;
43 }
c49cfc76 44
0d71aa98 45 frand_copy(&dedupe_working_set_state, &td->buf_state);
c49cfc76
BD
46 frand_copy(&td->dedupe_working_set_states[0], &dedupe_working_set_state);
47 pages_per_seed = max(td->num_unique_pages / thread_number, 1ull);
48 for (i = 1; i < td->num_unique_pages; i++) {
eb57e710
BD
49 /*
50 * When compression is used the seed is advanced multiple times to
51 * generate the buffer. We want to regenerate the same buffer when
52 * deduping against this page
53 */
54 for (j = 0; j < num_seed_advancements; j++)
55 __get_next_seed(&dedupe_working_set_state);
c49cfc76
BD
56
57 /*
58 * When global dedup is used, we rotate the seeds to allow
59 * generating same buffers across different jobs. Deduplication buffers
60 * are spread evenly across jobs participating in global dedupe
61 */
62 if (global_dedup && i % pages_per_seed == 0) {
63 td_seed = tnumber_to_td(++tindex % thread_number);
64 frand_copy(&dedupe_working_set_state, &td_seed->buf_state);
65 }
66
67 frand_copy(&td->dedupe_working_set_states[i], &dedupe_working_set_state);
0d71aa98
BD
68 }
69
70 return 0;
71}