summaryrefslogtreecommitdiff
path: root/dedupe.c
blob: 61705689184d84fae5ca9d422e17622c28822924 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#include "fio.h"

/**
 * initializes the global dedup workset.
 * this needs to be called after all jobs' seeds
 * have been initialized
 */
int init_global_dedupe_working_set_seeds(void)
{
	for_each_td(td) {
		if (!td->o.dedupe_global)
			continue;

		if (init_dedupe_working_set_seeds(td, 1))
			return 1;
	} end_for_each();

	return 0;
}

int init_dedupe_working_set_seeds(struct thread_data *td, bool global_dedup)
{
	int tindex;
	struct thread_data *td_seed;
	unsigned long long i, j, num_seed_advancements, pages_per_seed;
	struct frand_state dedupe_working_set_state = {0};

	if (!td->o.dedupe_percentage || !(td->o.dedupe_mode == DEDUPE_MODE_WORKING_SET))
		return 0;

	tindex = td->thread_number - 1;
	num_seed_advancements = td->o.min_bs[DDIR_WRITE] /
		min_not_zero(td->o.min_bs[DDIR_WRITE], (unsigned long long) td->o.compress_chunk);
	/*
	 * The dedupe working set keeps seeds of unique data (generated by buf_state).
	 * Dedupe-ed pages will be generated using those seeds.
	 */
	td->num_unique_pages = (td->o.size * (unsigned long long)td->o.dedupe_working_set_percentage / 100) / td->o.min_bs[DDIR_WRITE];
	td->dedupe_working_set_states = malloc(sizeof(struct frand_state) * td->num_unique_pages);
	if (!td->dedupe_working_set_states) {
		log_err("fio: could not allocate dedupe working set\n");
		return 1;
	}

	frand_copy(&dedupe_working_set_state, &td->buf_state);
	frand_copy(&td->dedupe_working_set_states[0], &dedupe_working_set_state);
	pages_per_seed = max(td->num_unique_pages / thread_number, 1ull);
	for (i = 1; i < td->num_unique_pages; i++) {
		/*
		 * When compression is used the seed is advanced multiple times to
		 * generate the buffer. We want to regenerate the same buffer when
		 * deduping against this page
		 */
		for (j = 0; j < num_seed_advancements; j++)
			__get_next_seed(&dedupe_working_set_state);

		/*
		 * When global dedup is used, we rotate the seeds to allow
		 * generating same buffers across different jobs. Deduplication buffers
		 * are spread evenly across jobs participating in global dedupe
		 */
		if (global_dedup && i % pages_per_seed == 0) {
			td_seed = tnumber_to_td(++tindex % thread_number);
			frand_copy(&dedupe_working_set_state, &td_seed->buf_state);
		}

		frand_copy(&td->dedupe_working_set_states[i], &dedupe_working_set_state);
	}

	return 0;
}