Commit | Line | Data |
---|---|---|
6ff38856 JA |
1 | /* |
2 | * Generate/analyze pareto/zipf distributions to better understand | |
3 | * what an access pattern would look like. | |
4 | * | |
5 | * For instance, the following would generate a zipf distribution | |
6 | * with theta 1.2, using 100,000 values and split the reporting into | |
7 | * 20 buckets: | |
8 | * | |
9 | * t/genzipf zipf 1.2 100000 20 | |
10 | * | |
f880b1f6 JA |
11 | * Only the distribution type (zipf or pareto) and spread input need |
12 | * to be given, if not given defaults are used. | |
13 | * | |
6ff38856 JA |
14 | */ |
15 | #include <stdio.h> | |
16 | #include <stdlib.h> | |
17 | #include <fcntl.h> | |
18 | #include <string.h> | |
921d17ba | 19 | #include <unistd.h> |
6ff38856 JA |
20 | |
21 | #include "../lib/zipf.h" | |
fd6f237d JA |
22 | #include "../flist.h" |
23 | #include "../hash.h" | |
6ff38856 | 24 | |
f880b1f6 JA |
25 | #define DEF_NR 1000000 |
26 | #define DEF_NR_OUTPUT 23 | |
27 | ||
fd6f237d JA |
28 | struct node { |
29 | struct flist_head list; | |
fd6f237d JA |
30 | unsigned long long val; |
31 | unsigned long hits; | |
32 | }; | |
33 | ||
34 | static struct flist_head *hash; | |
35 | static unsigned long hash_bits = 24; | |
36 | static unsigned long hash_size = 1 << 24; | |
fd6f237d | 37 | |
921d17ba JA |
38 | enum { |
39 | TYPE_NONE = 0, | |
40 | TYPE_ZIPF, | |
41 | TYPE_PARETO, | |
42 | }; | |
43 | static const char *dist_types[] = { "None", "Zipf", "Pareto" }; | |
44 | ||
45 | static int dist_type = TYPE_ZIPF; | |
46 | static unsigned long gb_size = 500; | |
444256ea | 47 | static unsigned long block_size = 4096; |
921d17ba | 48 | static unsigned long output_nranges = DEF_NR_OUTPUT; |
444256ea | 49 | static double percentage; |
921d17ba | 50 | static double dist_val; |
355934b7 | 51 | static int output_csv = 0; |
921d17ba JA |
52 | |
53 | #define DEF_ZIPF_VAL 1.2 | |
54 | #define DEF_PARETO_VAL 0.3 | |
55 | ||
fd6f237d JA |
56 | static struct node *hash_lookup(unsigned long long val) |
57 | { | |
58 | struct flist_head *l = &hash[hash_long(val, hash_bits)]; | |
59 | struct flist_head *entry; | |
60 | struct node *n; | |
61 | ||
62 | flist_for_each(entry, l) { | |
63 | n = flist_entry(entry, struct node, list); | |
64 | if (n->val == val) | |
65 | return n; | |
66 | } | |
67 | ||
68 | return NULL; | |
69 | } | |
70 | ||
24baa4c7 | 71 | static struct node *hash_insert(struct node *n, unsigned long long val) |
fd6f237d JA |
72 | { |
73 | struct flist_head *l = &hash[hash_long(val, hash_bits)]; | |
fd6f237d JA |
74 | |
75 | n->val = val; | |
76 | n->hits = 1; | |
77 | flist_add_tail(&n->list, l); | |
24baa4c7 | 78 | return n; |
6ff38856 JA |
79 | } |
80 | ||
921d17ba JA |
81 | static int parse_options(int argc, char *argv[]) |
82 | { | |
355934b7 | 83 | const char *optstring = "t:g:i:o:b:p:c"; |
921d17ba JA |
84 | int c, dist_val_set = 0; |
85 | ||
86 | while ((c = getopt(argc, argv, optstring)) != -1) { | |
87 | switch (c) { | |
444256ea JA |
88 | case 'p': |
89 | percentage = atof(optarg); | |
90 | break; | |
91 | case 'b': | |
92 | block_size = strtoul(optarg, NULL, 10); | |
93 | break; | |
921d17ba JA |
94 | case 't': |
95 | if (!strncmp(optarg, "zipf", 4)) | |
96 | dist_type = TYPE_ZIPF; | |
97 | else if (!strncmp(optarg, "pareto", 6)) | |
98 | dist_type = TYPE_PARETO; | |
99 | else { | |
100 | printf("wrong dist type: %s\n", optarg); | |
101 | return 1; | |
102 | } | |
103 | break; | |
104 | case 'g': | |
105 | gb_size = strtoul(optarg, NULL, 10); | |
106 | break; | |
107 | case 'i': | |
108 | dist_val = atof(optarg); | |
109 | dist_val_set = 1; | |
110 | break; | |
921d17ba JA |
111 | case 'o': |
112 | output_nranges = strtoul(optarg, NULL, 10); | |
113 | break; | |
355934b7 VKF |
114 | case 'c': |
115 | output_csv = 1; | |
116 | break; | |
921d17ba JA |
117 | default: |
118 | printf("bad option %c\n", c); | |
119 | return 1; | |
120 | } | |
121 | } | |
122 | ||
123 | if (dist_type == TYPE_PARETO) { | |
124 | if ((dist_val >= 1.00 || dist_val < 0.00)) { | |
125 | printf("pareto input must be > 0.00 and < 1.00\n"); | |
126 | return 1; | |
127 | } | |
128 | if (!dist_val_set) | |
129 | dist_val = DEF_PARETO_VAL; | |
130 | } else if (dist_type == TYPE_ZIPF) { | |
131 | if (dist_val == 1.0) { | |
132 | printf("zipf input must be different than 1.0\n"); | |
133 | return 1; | |
134 | } | |
135 | if (!dist_val_set) | |
136 | dist_val = DEF_ZIPF_VAL; | |
137 | } | |
138 | ||
139 | return 0; | |
140 | } | |
141 | ||
444256ea JA |
142 | struct output_sum { |
143 | double output; | |
144 | unsigned int nranges; | |
145 | }; | |
146 | ||
24baa4c7 JA |
147 | static int node_cmp(const void *p1, const void *p2) |
148 | { | |
149 | const struct node *n1 = p1; | |
150 | const struct node *n2 = p2; | |
151 | ||
152 | return n2->hits - n1->hits; | |
153 | } | |
154 | ||
6ff38856 JA |
155 | int main(int argc, char *argv[]) |
156 | { | |
fd6f237d | 157 | unsigned long offset; |
24baa4c7 | 158 | unsigned long i, j, k, nr_vals, cur_vals, interval, total_vals, nnodes; |
444256ea JA |
159 | unsigned long long nranges; |
160 | struct output_sum *output_sums; | |
24baa4c7 | 161 | struct node *nodes; |
444256ea | 162 | double perc, perc_i; |
6ff38856 | 163 | struct zipf_state zs; |
6ff38856 | 164 | |
921d17ba | 165 | if (parse_options(argc, argv)) |
6ff38856 | 166 | return 1; |
6ff38856 | 167 | |
355934b7 VKF |
168 | if( !output_csv ) |
169 | printf("Generating %s distribution with %f input and %lu GB size and %lu block_size.\n", dist_types[dist_type], dist_val, gb_size, block_size); | |
444256ea JA |
170 | |
171 | nranges = gb_size * 1024 * 1024 * 1024ULL; | |
172 | nranges /= block_size; | |
f880b1f6 | 173 | |
921d17ba JA |
174 | if (dist_type == TYPE_ZIPF) |
175 | zipf_init(&zs, nranges, dist_val, 1); | |
6ff38856 | 176 | else |
921d17ba | 177 | pareto_init(&zs, nranges, dist_val, 1); |
6ff38856 | 178 | |
c71224e7 JA |
179 | hash_bits = 0; |
180 | hash_size = nranges; | |
181 | while ((hash_size >>= 1) != 0) | |
182 | hash_bits++; | |
183 | ||
184 | hash_size = 1 << hash_bits; | |
185 | ||
fd6f237d JA |
186 | hash = malloc(hash_size * sizeof(struct flist_head)); |
187 | for (i = 0; i < hash_size; i++) | |
188 | INIT_FLIST_HEAD(&hash[i]); | |
189 | ||
24baa4c7 JA |
190 | nodes = malloc(nranges * sizeof(struct node)); |
191 | ||
192 | for (nr_vals = i = j = 0; i < nranges; i++) { | |
fd6f237d | 193 | struct node *n; |
6ff38856 | 194 | |
921d17ba | 195 | if (dist_type == TYPE_ZIPF) |
fd6f237d | 196 | offset = zipf_next(&zs); |
6ff38856 | 197 | else |
fd6f237d JA |
198 | offset = pareto_next(&zs); |
199 | ||
200 | n = hash_lookup(offset); | |
201 | if (n) | |
202 | n->hits++; | |
24baa4c7 JA |
203 | else { |
204 | hash_insert(&nodes[j], offset); | |
205 | j++; | |
206 | } | |
6ff38856 | 207 | |
6ff38856 JA |
208 | nr_vals++; |
209 | } | |
210 | ||
24baa4c7 JA |
211 | qsort(nodes, j, sizeof(struct node), node_cmp); |
212 | nnodes = j; | |
213 | nr_vals = nnodes; | |
6ff38856 | 214 | |
355934b7 VKF |
215 | if (output_csv) { |
216 | printf("rank, count\n"); | |
217 | for (k = 0; k < nnodes; k++) | |
218 | printf("%lu, %lu\n", k, nodes[k].hits); | |
219 | } else { | |
220 | interval = (nr_vals + output_nranges - 1) / output_nranges; | |
221 | ||
222 | output_sums = malloc(output_nranges * sizeof(struct output_sum)); | |
223 | for (i = 0; i < output_nranges; i++) { | |
224 | output_sums[i].output = 0.0; | |
225 | output_sums[i].nranges = 1; | |
444256ea | 226 | } |
6ff38856 | 227 | |
355934b7 VKF |
228 | total_vals = i = j = cur_vals = 0; |
229 | ||
230 | for (k = 0; k < nnodes; k++) { | |
231 | struct output_sum *os = &output_sums[j]; | |
232 | struct node *node = &nodes[k]; | |
233 | ||
234 | if (i >= interval) { | |
235 | os->output = | |
236 | (double)(cur_vals + 1) / (double)nranges; | |
237 | os->output *= 100.0; | |
238 | j++; | |
239 | cur_vals = node->hits; | |
240 | interval += | |
241 | (nr_vals + output_nranges - | |
242 | 1) / output_nranges; | |
243 | } else { | |
244 | cur_vals += node->hits; | |
245 | os->nranges += node->hits; | |
246 | } | |
444256ea | 247 | |
355934b7 VKF |
248 | i++; |
249 | total_vals += node->hits; | |
250 | ||
251 | if (percentage) { | |
252 | unsigned long blocks = | |
253 | percentage * nranges / 100; | |
254 | ||
255 | if (total_vals >= blocks) { | |
256 | double cs = | |
257 | i * block_size / (1024 * 1024); | |
258 | char p = 'M'; | |
259 | ||
260 | if (cs > 1024.0) { | |
261 | cs /= 1024.0; | |
262 | p = 'G'; | |
263 | } | |
264 | if (cs > 1024.0) { | |
265 | cs /= 1024.0; | |
266 | p = 'T'; | |
267 | } | |
268 | ||
269 | printf("%.2f%% of hits satisfied in %.3f%cB of cache\n", percentage, cs, p); | |
270 | percentage = 0.0; | |
444256ea | 271 | } |
444256ea JA |
272 | } |
273 | } | |
6ff38856 | 274 | |
355934b7 VKF |
275 | perc_i = 100.0 / (double)output_nranges; |
276 | perc = 0.0; | |
921d17ba | 277 | |
355934b7 VKF |
278 | printf("\n Rows Hits No Hits Size\n"); |
279 | printf("--------------------------------------------------------\n"); | |
280 | for (i = 0; i < j; i++) { | |
281 | struct output_sum *os = &output_sums[i]; | |
282 | double gb = (double)os->nranges * block_size / 1024.0; | |
283 | char p = 'K'; | |
921d17ba | 284 | |
355934b7 VKF |
285 | if (gb > 1024.0) { |
286 | p = 'M'; | |
287 | gb /= 1024.0; | |
288 | } | |
289 | if (gb > 1024.0) { | |
290 | p = 'G'; | |
291 | gb /= 1024.0; | |
292 | } | |
293 | ||
294 | perc += perc_i; | |
295 | printf("%s %6.2f%%\t%6.2f%%\t\t%8u\t%6.2f%c\n", | |
296 | i ? "|->" : "Top", perc, os->output, os->nranges, | |
297 | gb, p); | |
921d17ba JA |
298 | } |
299 | ||
355934b7 | 300 | free(output_sums); |
f880b1f6 | 301 | } |
6ff38856 | 302 | |
fd6f237d | 303 | free(hash); |
355934b7 | 304 | free(nodes); |
6ff38856 JA |
305 | return 0; |
306 | } |