Commit | Line | Data |
---|---|---|
6ff38856 JA |
1 | /* |
2 | * Generate/analyze pareto/zipf distributions to better understand | |
3 | * what an access pattern would look like. | |
4 | * | |
5 | * For instance, the following would generate a zipf distribution | |
6 | * with theta 1.2, using 100,000 values and split the reporting into | |
7 | * 20 buckets: | |
8 | * | |
9 | * t/genzipf zipf 1.2 100000 20 | |
10 | * | |
f880b1f6 JA |
11 | * Only the distribution type (zipf or pareto) and spread input need |
12 | * to be given, if not given defaults are used. | |
13 | * | |
6ff38856 JA |
14 | */ |
15 | #include <stdio.h> | |
16 | #include <stdlib.h> | |
17 | #include <fcntl.h> | |
18 | #include <string.h> | |
921d17ba | 19 | #include <unistd.h> |
6ff38856 JA |
20 | |
21 | #include "../lib/zipf.h" | |
fd6f237d JA |
22 | #include "../flist.h" |
23 | #include "../hash.h" | |
24 | #include "../rbtree.h" | |
6ff38856 | 25 | |
f880b1f6 JA |
26 | #define DEF_NR 1000000 |
27 | #define DEF_NR_OUTPUT 23 | |
28 | ||
fd6f237d JA |
29 | struct node { |
30 | struct flist_head list; | |
fd6f237d JA |
31 | unsigned long long val; |
32 | unsigned long hits; | |
33 | }; | |
34 | ||
35 | static struct flist_head *hash; | |
36 | static unsigned long hash_bits = 24; | |
37 | static unsigned long hash_size = 1 << 24; | |
fd6f237d | 38 | |
921d17ba JA |
39 | enum { |
40 | TYPE_NONE = 0, | |
41 | TYPE_ZIPF, | |
42 | TYPE_PARETO, | |
43 | }; | |
44 | static const char *dist_types[] = { "None", "Zipf", "Pareto" }; | |
45 | ||
46 | static int dist_type = TYPE_ZIPF; | |
47 | static unsigned long gb_size = 500; | |
444256ea | 48 | static unsigned long block_size = 4096; |
921d17ba | 49 | static unsigned long output_nranges = DEF_NR_OUTPUT; |
444256ea | 50 | static double percentage; |
921d17ba JA |
51 | static double dist_val; |
52 | ||
53 | #define DEF_ZIPF_VAL 1.2 | |
54 | #define DEF_PARETO_VAL 0.3 | |
55 | ||
fd6f237d JA |
56 | static struct node *hash_lookup(unsigned long long val) |
57 | { | |
58 | struct flist_head *l = &hash[hash_long(val, hash_bits)]; | |
59 | struct flist_head *entry; | |
60 | struct node *n; | |
61 | ||
62 | flist_for_each(entry, l) { | |
63 | n = flist_entry(entry, struct node, list); | |
64 | if (n->val == val) | |
65 | return n; | |
66 | } | |
67 | ||
68 | return NULL; | |
69 | } | |
70 | ||
24baa4c7 | 71 | static struct node *hash_insert(struct node *n, unsigned long long val) |
fd6f237d JA |
72 | { |
73 | struct flist_head *l = &hash[hash_long(val, hash_bits)]; | |
fd6f237d JA |
74 | |
75 | n->val = val; | |
76 | n->hits = 1; | |
77 | flist_add_tail(&n->list, l); | |
24baa4c7 | 78 | return n; |
6ff38856 JA |
79 | } |
80 | ||
921d17ba JA |
81 | static int parse_options(int argc, char *argv[]) |
82 | { | |
444256ea | 83 | const char *optstring = "t:g:i:o:b:p:"; |
921d17ba JA |
84 | int c, dist_val_set = 0; |
85 | ||
86 | while ((c = getopt(argc, argv, optstring)) != -1) { | |
87 | switch (c) { | |
444256ea JA |
88 | case 'p': |
89 | percentage = atof(optarg); | |
90 | break; | |
91 | case 'b': | |
92 | block_size = strtoul(optarg, NULL, 10); | |
93 | break; | |
921d17ba JA |
94 | case 't': |
95 | if (!strncmp(optarg, "zipf", 4)) | |
96 | dist_type = TYPE_ZIPF; | |
97 | else if (!strncmp(optarg, "pareto", 6)) | |
98 | dist_type = TYPE_PARETO; | |
99 | else { | |
100 | printf("wrong dist type: %s\n", optarg); | |
101 | return 1; | |
102 | } | |
103 | break; | |
104 | case 'g': | |
105 | gb_size = strtoul(optarg, NULL, 10); | |
106 | break; | |
107 | case 'i': | |
108 | dist_val = atof(optarg); | |
109 | dist_val_set = 1; | |
110 | break; | |
921d17ba JA |
111 | case 'o': |
112 | output_nranges = strtoul(optarg, NULL, 10); | |
113 | break; | |
114 | default: | |
115 | printf("bad option %c\n", c); | |
116 | return 1; | |
117 | } | |
118 | } | |
119 | ||
120 | if (dist_type == TYPE_PARETO) { | |
121 | if ((dist_val >= 1.00 || dist_val < 0.00)) { | |
122 | printf("pareto input must be > 0.00 and < 1.00\n"); | |
123 | return 1; | |
124 | } | |
125 | if (!dist_val_set) | |
126 | dist_val = DEF_PARETO_VAL; | |
127 | } else if (dist_type == TYPE_ZIPF) { | |
128 | if (dist_val == 1.0) { | |
129 | printf("zipf input must be different than 1.0\n"); | |
130 | return 1; | |
131 | } | |
132 | if (!dist_val_set) | |
133 | dist_val = DEF_ZIPF_VAL; | |
134 | } | |
135 | ||
136 | return 0; | |
137 | } | |
138 | ||
444256ea JA |
139 | struct output_sum { |
140 | double output; | |
141 | unsigned int nranges; | |
142 | }; | |
143 | ||
24baa4c7 JA |
144 | static int node_cmp(const void *p1, const void *p2) |
145 | { | |
146 | const struct node *n1 = p1; | |
147 | const struct node *n2 = p2; | |
148 | ||
149 | return n2->hits - n1->hits; | |
150 | } | |
151 | ||
6ff38856 JA |
152 | int main(int argc, char *argv[]) |
153 | { | |
fd6f237d | 154 | unsigned long offset; |
24baa4c7 | 155 | unsigned long i, j, k, nr_vals, cur_vals, interval, total_vals, nnodes; |
444256ea JA |
156 | unsigned long long nranges; |
157 | struct output_sum *output_sums; | |
24baa4c7 | 158 | struct node *nodes; |
444256ea | 159 | double perc, perc_i; |
6ff38856 | 160 | struct zipf_state zs; |
6ff38856 | 161 | |
921d17ba | 162 | if (parse_options(argc, argv)) |
6ff38856 | 163 | return 1; |
6ff38856 | 164 | |
444256ea JA |
165 | printf("Generating %s distribution with %f input and %lu GB size and %lu block_size.\n", dist_types[dist_type], dist_val, gb_size, block_size); |
166 | ||
167 | nranges = gb_size * 1024 * 1024 * 1024ULL; | |
168 | nranges /= block_size; | |
f880b1f6 | 169 | |
921d17ba JA |
170 | if (dist_type == TYPE_ZIPF) |
171 | zipf_init(&zs, nranges, dist_val, 1); | |
6ff38856 | 172 | else |
921d17ba | 173 | pareto_init(&zs, nranges, dist_val, 1); |
6ff38856 | 174 | |
c71224e7 JA |
175 | hash_bits = 0; |
176 | hash_size = nranges; | |
177 | while ((hash_size >>= 1) != 0) | |
178 | hash_bits++; | |
179 | ||
180 | hash_size = 1 << hash_bits; | |
181 | ||
fd6f237d JA |
182 | hash = malloc(hash_size * sizeof(struct flist_head)); |
183 | for (i = 0; i < hash_size; i++) | |
184 | INIT_FLIST_HEAD(&hash[i]); | |
185 | ||
24baa4c7 JA |
186 | nodes = malloc(nranges * sizeof(struct node)); |
187 | ||
188 | for (nr_vals = i = j = 0; i < nranges; i++) { | |
fd6f237d | 189 | struct node *n; |
6ff38856 | 190 | |
921d17ba | 191 | if (dist_type == TYPE_ZIPF) |
fd6f237d | 192 | offset = zipf_next(&zs); |
6ff38856 | 193 | else |
fd6f237d JA |
194 | offset = pareto_next(&zs); |
195 | ||
196 | n = hash_lookup(offset); | |
197 | if (n) | |
198 | n->hits++; | |
24baa4c7 JA |
199 | else { |
200 | hash_insert(&nodes[j], offset); | |
201 | j++; | |
202 | } | |
6ff38856 | 203 | |
6ff38856 JA |
204 | nr_vals++; |
205 | } | |
206 | ||
24baa4c7 JA |
207 | qsort(nodes, j, sizeof(struct node), node_cmp); |
208 | nnodes = j; | |
209 | nr_vals = nnodes; | |
6ff38856 | 210 | |
c71224e7 | 211 | interval = (nr_vals + output_nranges - 1) / output_nranges; |
6ff38856 | 212 | |
444256ea JA |
213 | output_sums = malloc(output_nranges * sizeof(struct output_sum)); |
214 | for (i = 0; i < output_nranges; i++) { | |
215 | output_sums[i].output = 0.0; | |
216 | output_sums[i].nranges = 1; | |
217 | } | |
6ff38856 | 218 | |
444256ea | 219 | total_vals = i = j = cur_vals = 0; |
fd6f237d | 220 | |
24baa4c7 | 221 | for (k = 0; k < nnodes; k++) { |
444256ea | 222 | struct output_sum *os = &output_sums[j]; |
24baa4c7 | 223 | struct node *node = &nodes[k]; |
fd6f237d JA |
224 | |
225 | if (i >= interval) { | |
444256ea JA |
226 | os->output = (double) (cur_vals + 1) / (double) nranges; |
227 | os->output *= 100.0; | |
6ff38856 | 228 | j++; |
fd6f237d | 229 | cur_vals = node->hits; |
c71224e7 | 230 | interval += (nr_vals + output_nranges - 1) / output_nranges; |
444256ea | 231 | } else { |
fd6f237d | 232 | cur_vals += node->hits; |
444256ea JA |
233 | os->nranges += node->hits; |
234 | } | |
6ff38856 | 235 | |
fd6f237d | 236 | i++; |
444256ea JA |
237 | total_vals += node->hits; |
238 | ||
239 | if (percentage) { | |
240 | unsigned long blocks = percentage * nranges / 100; | |
241 | ||
242 | if (total_vals >= blocks) { | |
243 | double cs = i * block_size / (1024 * 1024); | |
244 | char p = 'M'; | |
245 | ||
246 | if (cs > 1024.0) { | |
247 | cs /= 1024.0; | |
248 | p = 'G'; | |
249 | } | |
250 | if (cs > 1024.0) { | |
251 | cs /= 1024.0; | |
252 | p = 'T'; | |
253 | } | |
254 | ||
255 | printf("%.2f%% of hits satisfied in %.3f%cB of cache\n", percentage, cs, p); | |
256 | percentage = 0.0; | |
257 | } | |
258 | } | |
0779dfc8 | 259 | } |
6ff38856 | 260 | |
f880b1f6 JA |
261 | perc_i = 100.0 / (double) output_nranges; |
262 | perc = 0.0; | |
921d17ba | 263 | |
444256ea JA |
264 | printf("\n Rows Hits No Hits Size\n"); |
265 | printf("--------------------------------------------------------\n"); | |
f880b1f6 | 266 | for (i = 0; i < j; i++) { |
444256ea JA |
267 | struct output_sum *os = &output_sums[i]; |
268 | double gb = (double) os->nranges * block_size / 1024.0; | |
269 | char p = 'K'; | |
921d17ba | 270 | |
444256ea | 271 | if (gb > 1024.0) { |
921d17ba | 272 | p = 'M'; |
444256ea JA |
273 | gb /= 1024.0; |
274 | } | |
275 | if (gb > 1024.0) { | |
276 | p = 'G'; | |
277 | gb /= 1024.0; | |
921d17ba JA |
278 | } |
279 | ||
f880b1f6 | 280 | perc += perc_i; |
444256ea | 281 | printf("%s %6.2f%%\t%6.2f%%\t\t%8u\t%6.2f%c\n", i ? "|->" : "Top", perc, os->output, os->nranges, gb, p); |
f880b1f6 | 282 | } |
6ff38856 | 283 | |
444256ea | 284 | free(output_sums); |
fd6f237d | 285 | free(hash); |
6ff38856 JA |
286 | return 0; |
287 | } |