Commit | Line | Data |
---|---|---|
6ff38856 JA |
1 | /* |
2 | * Generate/analyze pareto/zipf distributions to better understand | |
3 | * what an access pattern would look like. | |
4 | * | |
5 | * For instance, the following would generate a zipf distribution | |
6 | * with theta 1.2, using 100,000 values and split the reporting into | |
7 | * 20 buckets: | |
8 | * | |
9 | * t/genzipf zipf 1.2 100000 20 | |
10 | * | |
f880b1f6 JA |
11 | * Only the distribution type (zipf or pareto) and spread input need |
12 | * to be given, if not given defaults are used. | |
13 | * | |
6ff38856 JA |
14 | */ |
15 | #include <stdio.h> | |
16 | #include <stdlib.h> | |
17 | #include <fcntl.h> | |
18 | #include <string.h> | |
921d17ba | 19 | #include <unistd.h> |
6ff38856 JA |
20 | |
21 | #include "../lib/zipf.h" | |
fd6f237d JA |
22 | #include "../flist.h" |
23 | #include "../hash.h" | |
24 | #include "../rbtree.h" | |
6ff38856 | 25 | |
f880b1f6 JA |
26 | #define DEF_NR 1000000 |
27 | #define DEF_NR_OUTPUT 23 | |
28 | ||
fd6f237d JA |
29 | struct node { |
30 | struct flist_head list; | |
31 | struct rb_node rb; | |
32 | unsigned long long val; | |
33 | unsigned long hits; | |
34 | }; | |
35 | ||
36 | static struct flist_head *hash; | |
37 | static unsigned long hash_bits = 24; | |
38 | static unsigned long hash_size = 1 << 24; | |
39 | static struct rb_root rb; | |
40 | ||
921d17ba JA |
41 | enum { |
42 | TYPE_NONE = 0, | |
43 | TYPE_ZIPF, | |
44 | TYPE_PARETO, | |
45 | }; | |
46 | static const char *dist_types[] = { "None", "Zipf", "Pareto" }; | |
47 | ||
48 | static int dist_type = TYPE_ZIPF; | |
49 | static unsigned long gb_size = 500; | |
444256ea | 50 | static unsigned long block_size = 4096; |
921d17ba | 51 | static unsigned long output_nranges = DEF_NR_OUTPUT; |
444256ea | 52 | static double percentage; |
921d17ba JA |
53 | static double dist_val; |
54 | ||
55 | #define DEF_ZIPF_VAL 1.2 | |
56 | #define DEF_PARETO_VAL 0.3 | |
57 | ||
fd6f237d JA |
58 | static struct node *hash_lookup(unsigned long long val) |
59 | { | |
60 | struct flist_head *l = &hash[hash_long(val, hash_bits)]; | |
61 | struct flist_head *entry; | |
62 | struct node *n; | |
63 | ||
64 | flist_for_each(entry, l) { | |
65 | n = flist_entry(entry, struct node, list); | |
66 | if (n->val == val) | |
67 | return n; | |
68 | } | |
69 | ||
70 | return NULL; | |
71 | } | |
72 | ||
73 | static void hash_insert(unsigned long long val) | |
74 | { | |
75 | struct flist_head *l = &hash[hash_long(val, hash_bits)]; | |
76 | struct node *n = malloc(sizeof(*n)); | |
77 | ||
78 | n->val = val; | |
79 | n->hits = 1; | |
80 | flist_add_tail(&n->list, l); | |
81 | } | |
82 | ||
83 | static void rb_insert(struct node *n) | |
84 | { | |
85 | struct rb_node **p, *parent; | |
86 | ||
87 | memset(&n->rb, 0, sizeof(n->rb)); | |
88 | p = &rb.rb_node; | |
89 | parent = NULL; | |
90 | while (*p) { | |
91 | struct node *__n; | |
92 | ||
93 | parent = *p; | |
94 | __n = rb_entry(parent, struct node, rb); | |
95 | if (n->hits > __n->hits) | |
96 | p = &(*p)->rb_left; | |
97 | else | |
98 | p = &(*p)->rb_right; | |
99 | } | |
100 | ||
101 | rb_link_node(&n->rb, parent, p); | |
102 | rb_insert_color(&n->rb, &rb); | |
103 | } | |
104 | ||
105 | static unsigned long rb_add(struct flist_head *list) | |
6ff38856 | 106 | { |
fd6f237d JA |
107 | struct flist_head *entry; |
108 | unsigned long ret = 0; | |
109 | struct node *n; | |
110 | ||
111 | flist_for_each(entry, list) { | |
112 | n = flist_entry(entry, struct node, list); | |
113 | ||
114 | rb_insert(n); | |
115 | ret++; | |
116 | } | |
6ff38856 | 117 | |
fd6f237d JA |
118 | return ret; |
119 | } | |
120 | ||
121 | static unsigned long rb_gen(void) | |
122 | { | |
123 | unsigned long ret = 0; | |
124 | unsigned int i; | |
125 | ||
126 | for (i = 0; i < hash_size; i++) | |
127 | ret += rb_add(&hash[i]); | |
128 | ||
129 | return ret; | |
6ff38856 JA |
130 | } |
131 | ||
921d17ba JA |
132 | static int parse_options(int argc, char *argv[]) |
133 | { | |
444256ea | 134 | const char *optstring = "t:g:i:o:b:p:"; |
921d17ba JA |
135 | int c, dist_val_set = 0; |
136 | ||
137 | while ((c = getopt(argc, argv, optstring)) != -1) { | |
138 | switch (c) { | |
444256ea JA |
139 | case 'p': |
140 | percentage = atof(optarg); | |
141 | break; | |
142 | case 'b': | |
143 | block_size = strtoul(optarg, NULL, 10); | |
144 | break; | |
921d17ba JA |
145 | case 't': |
146 | if (!strncmp(optarg, "zipf", 4)) | |
147 | dist_type = TYPE_ZIPF; | |
148 | else if (!strncmp(optarg, "pareto", 6)) | |
149 | dist_type = TYPE_PARETO; | |
150 | else { | |
151 | printf("wrong dist type: %s\n", optarg); | |
152 | return 1; | |
153 | } | |
154 | break; | |
155 | case 'g': | |
156 | gb_size = strtoul(optarg, NULL, 10); | |
157 | break; | |
158 | case 'i': | |
159 | dist_val = atof(optarg); | |
160 | dist_val_set = 1; | |
161 | break; | |
921d17ba JA |
162 | case 'o': |
163 | output_nranges = strtoul(optarg, NULL, 10); | |
164 | break; | |
165 | default: | |
166 | printf("bad option %c\n", c); | |
167 | return 1; | |
168 | } | |
169 | } | |
170 | ||
171 | if (dist_type == TYPE_PARETO) { | |
172 | if ((dist_val >= 1.00 || dist_val < 0.00)) { | |
173 | printf("pareto input must be > 0.00 and < 1.00\n"); | |
174 | return 1; | |
175 | } | |
176 | if (!dist_val_set) | |
177 | dist_val = DEF_PARETO_VAL; | |
178 | } else if (dist_type == TYPE_ZIPF) { | |
179 | if (dist_val == 1.0) { | |
180 | printf("zipf input must be different than 1.0\n"); | |
181 | return 1; | |
182 | } | |
183 | if (!dist_val_set) | |
184 | dist_val = DEF_ZIPF_VAL; | |
185 | } | |
186 | ||
187 | return 0; | |
188 | } | |
189 | ||
444256ea JA |
190 | struct output_sum { |
191 | double output; | |
192 | unsigned int nranges; | |
193 | }; | |
194 | ||
6ff38856 JA |
195 | int main(int argc, char *argv[]) |
196 | { | |
fd6f237d | 197 | unsigned long offset; |
444256ea JA |
198 | unsigned long i, j, nr_vals, cur_vals, interval, total_vals; |
199 | unsigned long long nranges; | |
200 | struct output_sum *output_sums; | |
201 | double perc, perc_i; | |
6ff38856 | 202 | struct zipf_state zs; |
fd6f237d | 203 | struct rb_node *n; |
6ff38856 | 204 | |
921d17ba | 205 | if (parse_options(argc, argv)) |
6ff38856 | 206 | return 1; |
6ff38856 | 207 | |
444256ea JA |
208 | printf("Generating %s distribution with %f input and %lu GB size and %lu block_size.\n", dist_types[dist_type], dist_val, gb_size, block_size); |
209 | ||
210 | nranges = gb_size * 1024 * 1024 * 1024ULL; | |
211 | nranges /= block_size; | |
f880b1f6 | 212 | |
921d17ba JA |
213 | if (dist_type == TYPE_ZIPF) |
214 | zipf_init(&zs, nranges, dist_val, 1); | |
6ff38856 | 215 | else |
921d17ba | 216 | pareto_init(&zs, nranges, dist_val, 1); |
6ff38856 | 217 | |
c71224e7 JA |
218 | hash_bits = 0; |
219 | hash_size = nranges; | |
220 | while ((hash_size >>= 1) != 0) | |
221 | hash_bits++; | |
222 | ||
223 | hash_size = 1 << hash_bits; | |
224 | ||
fd6f237d JA |
225 | hash = malloc(hash_size * sizeof(struct flist_head)); |
226 | for (i = 0; i < hash_size; i++) | |
227 | INIT_FLIST_HEAD(&hash[i]); | |
228 | ||
444256ea | 229 | for (nr_vals = i = 0; i < nranges; i++) { |
fd6f237d | 230 | struct node *n; |
6ff38856 | 231 | |
921d17ba | 232 | if (dist_type == TYPE_ZIPF) |
fd6f237d | 233 | offset = zipf_next(&zs); |
6ff38856 | 234 | else |
fd6f237d JA |
235 | offset = pareto_next(&zs); |
236 | ||
237 | n = hash_lookup(offset); | |
238 | if (n) | |
239 | n->hits++; | |
240 | else | |
241 | hash_insert(offset); | |
6ff38856 | 242 | |
6ff38856 JA |
243 | nr_vals++; |
244 | } | |
245 | ||
fd6f237d | 246 | nr_vals = rb_gen(); |
6ff38856 | 247 | |
c71224e7 | 248 | interval = (nr_vals + output_nranges - 1) / output_nranges; |
6ff38856 | 249 | |
444256ea JA |
250 | output_sums = malloc(output_nranges * sizeof(struct output_sum)); |
251 | for (i = 0; i < output_nranges; i++) { | |
252 | output_sums[i].output = 0.0; | |
253 | output_sums[i].nranges = 1; | |
254 | } | |
6ff38856 | 255 | |
444256ea | 256 | total_vals = i = j = cur_vals = 0; |
fd6f237d JA |
257 | |
258 | n = rb_first(&rb); | |
259 | while (n) { | |
260 | struct node *node = rb_entry(n, struct node, rb); | |
444256ea | 261 | struct output_sum *os = &output_sums[j]; |
fd6f237d JA |
262 | |
263 | if (i >= interval) { | |
444256ea JA |
264 | os->output = (double) (cur_vals + 1) / (double) nranges; |
265 | os->output *= 100.0; | |
6ff38856 | 266 | j++; |
fd6f237d | 267 | cur_vals = node->hits; |
c71224e7 | 268 | interval += (nr_vals + output_nranges - 1) / output_nranges; |
444256ea | 269 | } else { |
fd6f237d | 270 | cur_vals += node->hits; |
444256ea JA |
271 | os->nranges += node->hits; |
272 | } | |
6ff38856 | 273 | |
fd6f237d | 274 | i++; |
444256ea JA |
275 | total_vals += node->hits; |
276 | ||
277 | if (percentage) { | |
278 | unsigned long blocks = percentage * nranges / 100; | |
279 | ||
280 | if (total_vals >= blocks) { | |
281 | double cs = i * block_size / (1024 * 1024); | |
282 | char p = 'M'; | |
283 | ||
284 | if (cs > 1024.0) { | |
285 | cs /= 1024.0; | |
286 | p = 'G'; | |
287 | } | |
288 | if (cs > 1024.0) { | |
289 | cs /= 1024.0; | |
290 | p = 'T'; | |
291 | } | |
292 | ||
293 | printf("%.2f%% of hits satisfied in %.3f%cB of cache\n", percentage, cs, p); | |
294 | percentage = 0.0; | |
295 | } | |
296 | } | |
297 | ||
298 | n = rb_next(n); | |
0779dfc8 | 299 | } |
6ff38856 | 300 | |
f880b1f6 JA |
301 | perc_i = 100.0 / (double) output_nranges; |
302 | perc = 0.0; | |
921d17ba | 303 | |
444256ea JA |
304 | printf("\n Rows Hits No Hits Size\n"); |
305 | printf("--------------------------------------------------------\n"); | |
f880b1f6 | 306 | for (i = 0; i < j; i++) { |
444256ea JA |
307 | struct output_sum *os = &output_sums[i]; |
308 | double gb = (double) os->nranges * block_size / 1024.0; | |
309 | char p = 'K'; | |
921d17ba | 310 | |
444256ea | 311 | if (gb > 1024.0) { |
921d17ba | 312 | p = 'M'; |
444256ea JA |
313 | gb /= 1024.0; |
314 | } | |
315 | if (gb > 1024.0) { | |
316 | p = 'G'; | |
317 | gb /= 1024.0; | |
921d17ba JA |
318 | } |
319 | ||
f880b1f6 | 320 | perc += perc_i; |
444256ea | 321 | printf("%s %6.2f%%\t%6.2f%%\t\t%8u\t%6.2f%c\n", i ? "|->" : "Top", perc, os->output, os->nranges, gb, p); |
f880b1f6 | 322 | } |
6ff38856 | 323 | |
444256ea | 324 | free(output_sums); |
fd6f237d | 325 | free(hash); |
6ff38856 JA |
326 | return 0; |
327 | } |