Commit | Line | Data |
---|---|---|
e25839d4 JA |
1 | #include <math.h> |
2 | #include <string.h> | |
e25839d4 JA |
3 | #include "zipf.h" |
4 | #include "../minmax.h" | |
ed1860cd | 5 | #include "../hash.h" |
e25839d4 | 6 | |
e6f735f0 | 7 | #define ZIPF_MAX_GEN 10000000UL |
e25839d4 JA |
8 | |
9 | static void zipf_update(struct zipf_state *zs) | |
10 | { | |
abb60c32 | 11 | uint64_t to_gen; |
e25839d4 JA |
12 | unsigned int i; |
13 | ||
4c9060e3 JA |
14 | /* |
15 | * It can become very costly to generate long sequences. Just cap it at | |
e4839591 JA |
16 | * 10M max, that should be doable in 1-2s on even slow machines. |
17 | * Precision will take a slight hit, but nothing major. | |
4c9060e3 | 18 | */ |
761c2729 | 19 | to_gen = min(zs->nranges, (uint64_t) ZIPF_MAX_GEN); |
e25839d4 | 20 | |
4c9060e3 JA |
21 | for (i = 0; i < to_gen; i++) |
22 | zs->zetan += pow(1.0 / (double) (i + 1), zs->theta); | |
e25839d4 JA |
23 | } |
24 | ||
abb60c32 | 25 | static void shared_rand_init(struct zipf_state *zs, uint64_t nranges, |
a87c90fd | 26 | double center, unsigned int seed) |
b2b0b753 JA |
27 | { |
28 | memset(zs, 0, sizeof(*zs)); | |
29 | zs->nranges = nranges; | |
30 | ||
c3546b53 | 31 | init_rand_seed(&zs->rand, seed, 0); |
b2b0b753 | 32 | zs->rand_off = __rand(&zs->rand); |
a87c90fd AK |
33 | if (center != -1) |
34 | zs->rand_off = nranges * center; | |
b2b0b753 JA |
35 | } |
36 | ||
abb60c32 | 37 | void zipf_init(struct zipf_state *zs, uint64_t nranges, double theta, |
a87c90fd | 38 | double center, unsigned int seed) |
e25839d4 | 39 | { |
a87c90fd | 40 | shared_rand_init(zs, nranges, center, seed); |
e25839d4 | 41 | |
e25839d4 | 42 | zs->theta = theta; |
1442ba18 | 43 | zs->zeta2 = pow(1.0, zs->theta) + pow(0.5, zs->theta); |
e25839d4 | 44 | |
4c9060e3 | 45 | zipf_update(zs); |
e25839d4 JA |
46 | } |
47 | ||
abb60c32 | 48 | uint64_t zipf_next(struct zipf_state *zs) |
e25839d4 | 49 | { |
e25839d4 JA |
50 | double alpha, eta, rand_uni, rand_z; |
51 | unsigned long long n = zs->nranges; | |
52 | unsigned long long val; | |
53 | ||
54 | alpha = 1.0 / (1.0 - zs->theta); | |
55 | eta = (1.0 - pow(2.0 / n, 1.0 - zs->theta)) / (1.0 - zs->zeta2 / zs->zetan); | |
56 | ||
c3546b53 | 57 | rand_uni = (double) __rand(&zs->rand) / (double) FRAND32_MAX; |
e25839d4 JA |
58 | rand_z = rand_uni * zs->zetan; |
59 | ||
60 | if (rand_z < 1.0) | |
61 | val = 1; | |
62 | else if (rand_z < (1.0 + pow(0.5, zs->theta))) | |
63 | val = 2; | |
64 | else | |
65 | val = 1 + (unsigned long long)(n * pow(eta*rand_uni - eta + 1.0, alpha)); | |
66 | ||
8c5e96a4 JA |
67 | val--; |
68 | ||
8348daf9 | 69 | if (!zs->disable_hash) |
8c5e96a4 | 70 | val = __hash_u64(val); |
8348daf9 | 71 | |
8c5e96a4 | 72 | return (val + zs->rand_off) % zs->nranges; |
e25839d4 | 73 | } |
925fee33 | 74 | |
abb60c32 | 75 | void pareto_init(struct zipf_state *zs, uint64_t nranges, double h, |
a87c90fd | 76 | double center, unsigned int seed) |
925fee33 | 77 | { |
a87c90fd | 78 | shared_rand_init(zs, nranges, center, seed); |
925fee33 | 79 | zs->pareto_pow = log(h) / log(1.0 - h); |
925fee33 JA |
80 | } |
81 | ||
abb60c32 | 82 | uint64_t pareto_next(struct zipf_state *zs) |
925fee33 | 83 | { |
c3546b53 | 84 | double rand = (double) __rand(&zs->rand) / (double) FRAND32_MAX; |
8c5e96a4 JA |
85 | unsigned long long n; |
86 | ||
87 | n = (zs->nranges - 1) * pow(rand, zs->pareto_pow); | |
925fee33 | 88 | |
8348daf9 | 89 | if (!zs->disable_hash) |
8c5e96a4 | 90 | n = __hash_u64(n); |
8348daf9 | 91 | |
8c5e96a4 | 92 | return (n + zs->rand_off) % zs->nranges; |
8348daf9 JA |
93 | } |
94 | ||
95 | void zipf_disable_hash(struct zipf_state *zs) | |
96 | { | |
97 | zs->disable_hash = true; | |
925fee33 | 98 | } |