Commit | Line | Data |
---|---|---|
6ff38856 JA |
1 | /* |
2 | * Generate/analyze pareto/zipf distributions to better understand | |
3 | * what an access pattern would look like. | |
4 | * | |
5 | * For instance, the following would generate a zipf distribution | |
4870138d RE |
6 | * with theta 1.2, using 262144 (1 GiB / 4096) values and split the |
7 | * reporting into 20 buckets: | |
6ff38856 | 8 | * |
2c9b3ccd | 9 | * ./t/fio-genzipf -t zipf -i 1.2 -g 1 -b 4096 -o 20 |
6ff38856 | 10 | * |
f880b1f6 JA |
11 | * Only the distribution type (zipf or pareto) and spread input need |
12 | * to be given, if not given defaults are used. | |
13 | * | |
6ff38856 JA |
14 | */ |
15 | #include <stdio.h> | |
16 | #include <stdlib.h> | |
6ff38856 | 17 | #include <string.h> |
921d17ba | 18 | #include <unistd.h> |
6ff38856 JA |
19 | |
20 | #include "../lib/zipf.h" | |
c4ba4d4f | 21 | #include "../lib/gauss.h" |
fd6f237d JA |
22 | #include "../flist.h" |
23 | #include "../hash.h" | |
6ff38856 | 24 | |
2c9b3ccd | 25 | #define DEF_NR_OUTPUT 20 |
f880b1f6 | 26 | |
fd6f237d JA |
27 | struct node { |
28 | struct flist_head list; | |
fd6f237d JA |
29 | unsigned long long val; |
30 | unsigned long hits; | |
31 | }; | |
32 | ||
33 | static struct flist_head *hash; | |
34 | static unsigned long hash_bits = 24; | |
35 | static unsigned long hash_size = 1 << 24; | |
fd6f237d | 36 | |
921d17ba JA |
37 | enum { |
38 | TYPE_NONE = 0, | |
39 | TYPE_ZIPF, | |
40 | TYPE_PARETO, | |
c4ba4d4f | 41 | TYPE_NORMAL, |
921d17ba | 42 | }; |
c4ba4d4f | 43 | static const char *dist_types[] = { "None", "Zipf", "Pareto", "Normal" }; |
921d17ba | 44 | |
85629b7f JA |
45 | enum { |
46 | OUTPUT_NORMAL, | |
47 | OUTPUT_CSV, | |
48 | }; | |
49 | ||
921d17ba | 50 | static int dist_type = TYPE_ZIPF; |
4870138d | 51 | static unsigned long gib_size = 500; |
444256ea | 52 | static unsigned long block_size = 4096; |
921d17ba | 53 | static unsigned long output_nranges = DEF_NR_OUTPUT; |
444256ea | 54 | static double percentage; |
921d17ba | 55 | static double dist_val; |
85629b7f | 56 | static int output_type = OUTPUT_NORMAL; |
921d17ba JA |
57 | |
58 | #define DEF_ZIPF_VAL 1.2 | |
59 | #define DEF_PARETO_VAL 0.3 | |
60 | ||
87f5fce3 JA |
61 | static unsigned int hashv(unsigned long long val) |
62 | { | |
63 | return jhash(&val, sizeof(val), 0) & (hash_size - 1); | |
64 | } | |
65 | ||
fd6f237d JA |
66 | static struct node *hash_lookup(unsigned long long val) |
67 | { | |
87f5fce3 | 68 | struct flist_head *l = &hash[hashv(val)]; |
fd6f237d JA |
69 | struct flist_head *entry; |
70 | struct node *n; | |
71 | ||
72 | flist_for_each(entry, l) { | |
73 | n = flist_entry(entry, struct node, list); | |
74 | if (n->val == val) | |
75 | return n; | |
76 | } | |
77 | ||
78 | return NULL; | |
79 | } | |
80 | ||
87f5fce3 | 81 | static void hash_insert(struct node *n, unsigned long long val) |
fd6f237d | 82 | { |
87f5fce3 | 83 | struct flist_head *l = &hash[hashv(val)]; |
fd6f237d JA |
84 | |
85 | n->val = val; | |
86 | n->hits = 1; | |
87 | flist_add_tail(&n->list, l); | |
6ff38856 JA |
88 | } |
89 | ||
4e98a450 JA |
90 | static void usage(void) |
91 | { | |
92 | printf("genzipf: test zipf/pareto values for fio input\n"); | |
93 | printf("\t-h\tThis help screen\n"); | |
94 | printf("\t-p\tGenerate size of data set that are hit by this percentage\n"); | |
c4ba4d4f JA |
95 | printf("\t-t\tDistribution type (zipf, pareto, or normal)\n"); |
96 | printf("\t-i\tDistribution algorithm input (zipf theta, pareto power,\n" | |
97 | "\t\tor normal %% deviation)\n"); | |
4e98a450 JA |
98 | printf("\t-b\tBlock size of a given range (in bytes)\n"); |
99 | printf("\t-g\tSize of data set (in gigabytes)\n"); | |
2c9b3ccd | 100 | printf("\t-o\tNumber of output rows\n"); |
4e98a450 JA |
101 | printf("\t-c\tOutput ranges in CSV format\n"); |
102 | } | |
103 | ||
921d17ba JA |
104 | static int parse_options(int argc, char *argv[]) |
105 | { | |
4e98a450 | 106 | const char *optstring = "t:g:i:o:b:p:ch"; |
921d17ba JA |
107 | int c, dist_val_set = 0; |
108 | ||
109 | while ((c = getopt(argc, argv, optstring)) != -1) { | |
110 | switch (c) { | |
4e98a450 JA |
111 | case 'h': |
112 | usage(); | |
113 | return 1; | |
444256ea JA |
114 | case 'p': |
115 | percentage = atof(optarg); | |
116 | break; | |
117 | case 'b': | |
118 | block_size = strtoul(optarg, NULL, 10); | |
119 | break; | |
921d17ba JA |
120 | case 't': |
121 | if (!strncmp(optarg, "zipf", 4)) | |
122 | dist_type = TYPE_ZIPF; | |
123 | else if (!strncmp(optarg, "pareto", 6)) | |
124 | dist_type = TYPE_PARETO; | |
c4ba4d4f JA |
125 | else if (!strncmp(optarg, "normal", 6)) |
126 | dist_type = TYPE_NORMAL; | |
921d17ba JA |
127 | else { |
128 | printf("wrong dist type: %s\n", optarg); | |
129 | return 1; | |
130 | } | |
131 | break; | |
132 | case 'g': | |
4870138d | 133 | gib_size = strtoul(optarg, NULL, 10); |
921d17ba JA |
134 | break; |
135 | case 'i': | |
136 | dist_val = atof(optarg); | |
137 | dist_val_set = 1; | |
138 | break; | |
921d17ba JA |
139 | case 'o': |
140 | output_nranges = strtoul(optarg, NULL, 10); | |
141 | break; | |
355934b7 | 142 | case 'c': |
85629b7f | 143 | output_type = OUTPUT_CSV; |
355934b7 | 144 | break; |
921d17ba JA |
145 | default: |
146 | printf("bad option %c\n", c); | |
147 | return 1; | |
148 | } | |
149 | } | |
150 | ||
151 | if (dist_type == TYPE_PARETO) { | |
152 | if ((dist_val >= 1.00 || dist_val < 0.00)) { | |
153 | printf("pareto input must be > 0.00 and < 1.00\n"); | |
154 | return 1; | |
155 | } | |
156 | if (!dist_val_set) | |
157 | dist_val = DEF_PARETO_VAL; | |
158 | } else if (dist_type == TYPE_ZIPF) { | |
159 | if (dist_val == 1.0) { | |
160 | printf("zipf input must be different than 1.0\n"); | |
161 | return 1; | |
162 | } | |
163 | if (!dist_val_set) | |
164 | dist_val = DEF_ZIPF_VAL; | |
165 | } | |
166 | ||
167 | return 0; | |
168 | } | |
169 | ||
444256ea JA |
170 | struct output_sum { |
171 | double output; | |
172 | unsigned int nranges; | |
173 | }; | |
174 | ||
24baa4c7 JA |
175 | static int node_cmp(const void *p1, const void *p2) |
176 | { | |
177 | const struct node *n1 = p1; | |
178 | const struct node *n2 = p2; | |
179 | ||
180 | return n2->hits - n1->hits; | |
181 | } | |
182 | ||
85629b7f JA |
183 | static void output_csv(struct node *nodes, unsigned long nnodes) |
184 | { | |
185 | unsigned long i; | |
186 | ||
187 | printf("rank, count\n"); | |
188 | for (i = 0; i < nnodes; i++) | |
189 | printf("%lu, %lu\n", i, nodes[i].hits); | |
190 | } | |
191 | ||
192 | static void output_normal(struct node *nodes, unsigned long nnodes, | |
193 | unsigned long nranges) | |
194 | { | |
195 | unsigned long i, j, cur_vals, interval_step, next_interval, total_vals; | |
196 | unsigned long blocks = percentage * nnodes / 100; | |
197 | double hit_percent_sum = 0; | |
198 | unsigned long long hit_sum = 0; | |
199 | double perc, perc_i; | |
200 | struct output_sum *output_sums; | |
201 | ||
202 | interval_step = (nnodes - 1) / output_nranges + 1; | |
203 | next_interval = interval_step; | |
204 | output_sums = malloc(output_nranges * sizeof(struct output_sum)); | |
205 | ||
206 | for (i = 0; i < output_nranges; i++) { | |
207 | output_sums[i].output = 0.0; | |
208 | output_sums[i].nranges = 0; | |
209 | } | |
210 | ||
211 | j = total_vals = cur_vals = 0; | |
212 | ||
213 | for (i = 0; i < nnodes; i++) { | |
214 | struct output_sum *os = &output_sums[j]; | |
215 | struct node *node = &nodes[i]; | |
216 | cur_vals += node->hits; | |
217 | total_vals += node->hits; | |
218 | os->nranges += node->hits; | |
219 | if (i == (next_interval) -1 || i == nnodes - 1) { | |
220 | os->output = (double) cur_vals / (double) nranges; | |
221 | os->output *= 100.0; | |
222 | cur_vals = 0; | |
223 | next_interval += interval_step; | |
224 | j++; | |
225 | } | |
226 | ||
227 | if (percentage) { | |
228 | if (total_vals >= blocks) { | |
18ae9b09 | 229 | double cs = (double) i * block_size / (1024.0 * 1024.0); |
85629b7f JA |
230 | char p = 'M'; |
231 | ||
232 | if (cs > 1024.0) { | |
233 | cs /= 1024.0; | |
234 | p = 'G'; | |
235 | } | |
236 | if (cs > 1024.0) { | |
237 | cs /= 1024.0; | |
238 | p = 'T'; | |
239 | } | |
240 | ||
241 | printf("%.2f%% of hits satisfied in %.3f%cB of cache\n", percentage, cs, p); | |
242 | percentage = 0.0; | |
243 | } | |
244 | } | |
245 | } | |
246 | ||
247 | perc_i = 100.0 / (double)output_nranges; | |
248 | perc = 0.0; | |
249 | ||
250 | printf("\n Rows Hits %% Sum %% # Hits Size\n"); | |
251 | printf("-----------------------------------------------------------------------\n"); | |
252 | for (i = 0; i < output_nranges; i++) { | |
253 | struct output_sum *os = &output_sums[i]; | |
254 | double gb = (double)os->nranges * block_size / 1024.0; | |
255 | char p = 'K'; | |
256 | ||
257 | if (gb > 1024.0) { | |
258 | p = 'M'; | |
259 | gb /= 1024.0; | |
260 | } | |
261 | if (gb > 1024.0) { | |
262 | p = 'G'; | |
263 | gb /= 1024.0; | |
264 | } | |
265 | ||
266 | perc += perc_i; | |
267 | hit_percent_sum += os->output; | |
268 | hit_sum += os->nranges; | |
269 | printf("%s %6.2f%%\t%6.2f%%\t\t%6.2f%%\t\t%8u\t%6.2f%c\n", | |
270 | i ? "|->" : "Top", perc, os->output, hit_percent_sum, | |
271 | os->nranges, gb, p); | |
272 | } | |
273 | ||
274 | printf("-----------------------------------------------------------------------\n"); | |
275 | printf("Total\t\t\t\t\t\t%8llu\n", hit_sum); | |
276 | free(output_sums); | |
277 | } | |
278 | ||
6ff38856 JA |
279 | int main(int argc, char *argv[]) |
280 | { | |
fd6f237d | 281 | unsigned long offset; |
444256ea | 282 | unsigned long long nranges; |
85629b7f | 283 | unsigned long nnodes; |
24baa4c7 | 284 | struct node *nodes; |
6ff38856 | 285 | struct zipf_state zs; |
c4ba4d4f | 286 | struct gauss_state gs; |
85629b7f | 287 | int i, j; |
6ff38856 | 288 | |
921d17ba | 289 | if (parse_options(argc, argv)) |
6ff38856 | 290 | return 1; |
6ff38856 | 291 | |
85629b7f | 292 | if (output_type != OUTPUT_CSV) |
4870138d RE |
293 | printf("Generating %s distribution with %f input and %lu GiB size and %lu block_size.\n", |
294 | dist_types[dist_type], dist_val, gib_size, block_size); | |
444256ea | 295 | |
4870138d | 296 | nranges = gib_size * 1024 * 1024 * 1024ULL; |
444256ea | 297 | nranges /= block_size; |
f880b1f6 | 298 | |
921d17ba JA |
299 | if (dist_type == TYPE_ZIPF) |
300 | zipf_init(&zs, nranges, dist_val, 1); | |
c4ba4d4f | 301 | else if (dist_type == TYPE_PARETO) |
921d17ba | 302 | pareto_init(&zs, nranges, dist_val, 1); |
c4ba4d4f JA |
303 | else |
304 | gauss_init(&gs, nranges, dist_val, 1); | |
6ff38856 | 305 | |
c71224e7 JA |
306 | hash_bits = 0; |
307 | hash_size = nranges; | |
308 | while ((hash_size >>= 1) != 0) | |
309 | hash_bits++; | |
310 | ||
311 | hash_size = 1 << hash_bits; | |
312 | ||
87f5fce3 | 313 | hash = calloc(hash_size, sizeof(struct flist_head)); |
fd6f237d JA |
314 | for (i = 0; i < hash_size; i++) |
315 | INIT_FLIST_HEAD(&hash[i]); | |
316 | ||
24baa4c7 JA |
317 | nodes = malloc(nranges * sizeof(struct node)); |
318 | ||
2c9b3ccd | 319 | for (i = j = 0; i < nranges; i++) { |
fd6f237d | 320 | struct node *n; |
6ff38856 | 321 | |
921d17ba | 322 | if (dist_type == TYPE_ZIPF) |
fd6f237d | 323 | offset = zipf_next(&zs); |
c4ba4d4f | 324 | else if (dist_type == TYPE_PARETO) |
fd6f237d | 325 | offset = pareto_next(&zs); |
c4ba4d4f JA |
326 | else |
327 | offset = gauss_next(&gs); | |
fd6f237d JA |
328 | |
329 | n = hash_lookup(offset); | |
330 | if (n) | |
331 | n->hits++; | |
24baa4c7 JA |
332 | else { |
333 | hash_insert(&nodes[j], offset); | |
334 | j++; | |
335 | } | |
6ff38856 JA |
336 | } |
337 | ||
24baa4c7 JA |
338 | qsort(nodes, j, sizeof(struct node), node_cmp); |
339 | nnodes = j; | |
921d17ba | 340 | |
85629b7f JA |
341 | if (output_type == OUTPUT_CSV) |
342 | output_csv(nodes, nnodes); | |
343 | else | |
344 | output_normal(nodes, nnodes, nranges); | |
6ff38856 | 345 | |
fd6f237d | 346 | free(hash); |
355934b7 | 347 | free(nodes); |
6ff38856 JA |
348 | return 0; |
349 | } |