genzip: cleanups
[fio.git] / t / genzipf.c
CommitLineData
6ff38856
JA
1/*
2 * Generate/analyze pareto/zipf distributions to better understand
3 * what an access pattern would look like.
4 *
5 * For instance, the following would generate a zipf distribution
2c9b3ccd 6 * with theta 1.2, using 262144 (1 GB / 4096) values and split the reporting into
6ff38856
JA
7 * 20 buckets:
8 *
2c9b3ccd 9 * ./t/fio-genzipf -t zipf -i 1.2 -g 1 -b 4096 -o 20
6ff38856 10 *
f880b1f6
JA
11 * Only the distribution type (zipf or pareto) and spread input need
12 * to be given, if not given defaults are used.
13 *
6ff38856
JA
14 */
15#include <stdio.h>
16#include <stdlib.h>
17#include <fcntl.h>
18#include <string.h>
921d17ba 19#include <unistd.h>
6ff38856
JA
20
21#include "../lib/zipf.h"
c4ba4d4f 22#include "../lib/gauss.h"
fd6f237d
JA
23#include "../flist.h"
24#include "../hash.h"
6ff38856 25
2c9b3ccd 26#define DEF_NR_OUTPUT 20
f880b1f6 27
fd6f237d
JA
28struct node {
29 struct flist_head list;
fd6f237d
JA
30 unsigned long long val;
31 unsigned long hits;
32};
33
34static struct flist_head *hash;
35static unsigned long hash_bits = 24;
36static unsigned long hash_size = 1 << 24;
fd6f237d 37
921d17ba
JA
38enum {
39 TYPE_NONE = 0,
40 TYPE_ZIPF,
41 TYPE_PARETO,
c4ba4d4f 42 TYPE_NORMAL,
921d17ba 43};
c4ba4d4f 44static const char *dist_types[] = { "None", "Zipf", "Pareto", "Normal" };
921d17ba 45
85629b7f
JA
46enum {
47 OUTPUT_NORMAL,
48 OUTPUT_CSV,
49};
50
921d17ba
JA
51static int dist_type = TYPE_ZIPF;
52static unsigned long gb_size = 500;
444256ea 53static unsigned long block_size = 4096;
921d17ba 54static unsigned long output_nranges = DEF_NR_OUTPUT;
444256ea 55static double percentage;
921d17ba 56static double dist_val;
85629b7f 57static int output_type = OUTPUT_NORMAL;
921d17ba
JA
58
59#define DEF_ZIPF_VAL 1.2
60#define DEF_PARETO_VAL 0.3
61
fd6f237d
JA
62static struct node *hash_lookup(unsigned long long val)
63{
64 struct flist_head *l = &hash[hash_long(val, hash_bits)];
65 struct flist_head *entry;
66 struct node *n;
67
68 flist_for_each(entry, l) {
69 n = flist_entry(entry, struct node, list);
70 if (n->val == val)
71 return n;
72 }
73
74 return NULL;
75}
76
24baa4c7 77static struct node *hash_insert(struct node *n, unsigned long long val)
fd6f237d
JA
78{
79 struct flist_head *l = &hash[hash_long(val, hash_bits)];
fd6f237d
JA
80
81 n->val = val;
82 n->hits = 1;
83 flist_add_tail(&n->list, l);
24baa4c7 84 return n;
6ff38856
JA
85}
86
4e98a450
JA
87static void usage(void)
88{
89 printf("genzipf: test zipf/pareto values for fio input\n");
90 printf("\t-h\tThis help screen\n");
91 printf("\t-p\tGenerate size of data set that are hit by this percentage\n");
c4ba4d4f
JA
92 printf("\t-t\tDistribution type (zipf, pareto, or normal)\n");
93 printf("\t-i\tDistribution algorithm input (zipf theta, pareto power,\n"
94 "\t\tor normal %% deviation)\n");
4e98a450
JA
95 printf("\t-b\tBlock size of a given range (in bytes)\n");
96 printf("\t-g\tSize of data set (in gigabytes)\n");
2c9b3ccd 97 printf("\t-o\tNumber of output rows\n");
4e98a450
JA
98 printf("\t-c\tOutput ranges in CSV format\n");
99}
100
921d17ba
JA
101static int parse_options(int argc, char *argv[])
102{
4e98a450 103 const char *optstring = "t:g:i:o:b:p:ch";
921d17ba
JA
104 int c, dist_val_set = 0;
105
106 while ((c = getopt(argc, argv, optstring)) != -1) {
107 switch (c) {
4e98a450
JA
108 case 'h':
109 usage();
110 return 1;
444256ea
JA
111 case 'p':
112 percentage = atof(optarg);
113 break;
114 case 'b':
115 block_size = strtoul(optarg, NULL, 10);
116 break;
921d17ba
JA
117 case 't':
118 if (!strncmp(optarg, "zipf", 4))
119 dist_type = TYPE_ZIPF;
120 else if (!strncmp(optarg, "pareto", 6))
121 dist_type = TYPE_PARETO;
c4ba4d4f
JA
122 else if (!strncmp(optarg, "normal", 6))
123 dist_type = TYPE_NORMAL;
921d17ba
JA
124 else {
125 printf("wrong dist type: %s\n", optarg);
126 return 1;
127 }
128 break;
129 case 'g':
130 gb_size = strtoul(optarg, NULL, 10);
131 break;
132 case 'i':
133 dist_val = atof(optarg);
134 dist_val_set = 1;
135 break;
921d17ba
JA
136 case 'o':
137 output_nranges = strtoul(optarg, NULL, 10);
138 break;
355934b7 139 case 'c':
85629b7f 140 output_type = OUTPUT_CSV;
355934b7 141 break;
921d17ba
JA
142 default:
143 printf("bad option %c\n", c);
144 return 1;
145 }
146 }
147
148 if (dist_type == TYPE_PARETO) {
149 if ((dist_val >= 1.00 || dist_val < 0.00)) {
150 printf("pareto input must be > 0.00 and < 1.00\n");
151 return 1;
152 }
153 if (!dist_val_set)
154 dist_val = DEF_PARETO_VAL;
155 } else if (dist_type == TYPE_ZIPF) {
156 if (dist_val == 1.0) {
157 printf("zipf input must be different than 1.0\n");
158 return 1;
159 }
160 if (!dist_val_set)
161 dist_val = DEF_ZIPF_VAL;
162 }
163
164 return 0;
165}
166
444256ea
JA
167struct output_sum {
168 double output;
169 unsigned int nranges;
170};
171
24baa4c7
JA
172static int node_cmp(const void *p1, const void *p2)
173{
174 const struct node *n1 = p1;
175 const struct node *n2 = p2;
176
177 return n2->hits - n1->hits;
178}
179
85629b7f
JA
180static void output_csv(struct node *nodes, unsigned long nnodes)
181{
182 unsigned long i;
183
184 printf("rank, count\n");
185 for (i = 0; i < nnodes; i++)
186 printf("%lu, %lu\n", i, nodes[i].hits);
187}
188
189static void output_normal(struct node *nodes, unsigned long nnodes,
190 unsigned long nranges)
191{
192 unsigned long i, j, cur_vals, interval_step, next_interval, total_vals;
193 unsigned long blocks = percentage * nnodes / 100;
194 double hit_percent_sum = 0;
195 unsigned long long hit_sum = 0;
196 double perc, perc_i;
197 struct output_sum *output_sums;
198
199 interval_step = (nnodes - 1) / output_nranges + 1;
200 next_interval = interval_step;
201 output_sums = malloc(output_nranges * sizeof(struct output_sum));
202
203 for (i = 0; i < output_nranges; i++) {
204 output_sums[i].output = 0.0;
205 output_sums[i].nranges = 0;
206 }
207
208 j = total_vals = cur_vals = 0;
209
210 for (i = 0; i < nnodes; i++) {
211 struct output_sum *os = &output_sums[j];
212 struct node *node = &nodes[i];
213 cur_vals += node->hits;
214 total_vals += node->hits;
215 os->nranges += node->hits;
216 if (i == (next_interval) -1 || i == nnodes - 1) {
217 os->output = (double) cur_vals / (double) nranges;
218 os->output *= 100.0;
219 cur_vals = 0;
220 next_interval += interval_step;
221 j++;
222 }
223
224 if (percentage) {
225 if (total_vals >= blocks) {
226 double cs = i * block_size / (1024 * 1024);
227 char p = 'M';
228
229 if (cs > 1024.0) {
230 cs /= 1024.0;
231 p = 'G';
232 }
233 if (cs > 1024.0) {
234 cs /= 1024.0;
235 p = 'T';
236 }
237
238 printf("%.2f%% of hits satisfied in %.3f%cB of cache\n", percentage, cs, p);
239 percentage = 0.0;
240 }
241 }
242 }
243
244 perc_i = 100.0 / (double)output_nranges;
245 perc = 0.0;
246
247 printf("\n Rows Hits %% Sum %% # Hits Size\n");
248 printf("-----------------------------------------------------------------------\n");
249 for (i = 0; i < output_nranges; i++) {
250 struct output_sum *os = &output_sums[i];
251 double gb = (double)os->nranges * block_size / 1024.0;
252 char p = 'K';
253
254 if (gb > 1024.0) {
255 p = 'M';
256 gb /= 1024.0;
257 }
258 if (gb > 1024.0) {
259 p = 'G';
260 gb /= 1024.0;
261 }
262
263 perc += perc_i;
264 hit_percent_sum += os->output;
265 hit_sum += os->nranges;
266 printf("%s %6.2f%%\t%6.2f%%\t\t%6.2f%%\t\t%8u\t%6.2f%c\n",
267 i ? "|->" : "Top", perc, os->output, hit_percent_sum,
268 os->nranges, gb, p);
269 }
270
271 printf("-----------------------------------------------------------------------\n");
272 printf("Total\t\t\t\t\t\t%8llu\n", hit_sum);
273 free(output_sums);
274}
275
6ff38856
JA
276int main(int argc, char *argv[])
277{
fd6f237d 278 unsigned long offset;
444256ea 279 unsigned long long nranges;
85629b7f 280 unsigned long nnodes;
24baa4c7 281 struct node *nodes;
6ff38856 282 struct zipf_state zs;
c4ba4d4f 283 struct gauss_state gs;
85629b7f 284 int i, j;
6ff38856 285
921d17ba 286 if (parse_options(argc, argv))
6ff38856 287 return 1;
6ff38856 288
85629b7f 289 if (output_type != OUTPUT_CSV)
355934b7 290 printf("Generating %s distribution with %f input and %lu GB size and %lu block_size.\n", dist_types[dist_type], dist_val, gb_size, block_size);
444256ea
JA
291
292 nranges = gb_size * 1024 * 1024 * 1024ULL;
293 nranges /= block_size;
f880b1f6 294
921d17ba
JA
295 if (dist_type == TYPE_ZIPF)
296 zipf_init(&zs, nranges, dist_val, 1);
c4ba4d4f 297 else if (dist_type == TYPE_PARETO)
921d17ba 298 pareto_init(&zs, nranges, dist_val, 1);
c4ba4d4f
JA
299 else
300 gauss_init(&gs, nranges, dist_val, 1);
6ff38856 301
c71224e7
JA
302 hash_bits = 0;
303 hash_size = nranges;
304 while ((hash_size >>= 1) != 0)
305 hash_bits++;
306
307 hash_size = 1 << hash_bits;
308
fd6f237d
JA
309 hash = malloc(hash_size * sizeof(struct flist_head));
310 for (i = 0; i < hash_size; i++)
311 INIT_FLIST_HEAD(&hash[i]);
312
24baa4c7
JA
313 nodes = malloc(nranges * sizeof(struct node));
314
2c9b3ccd 315 for (i = j = 0; i < nranges; i++) {
fd6f237d 316 struct node *n;
6ff38856 317
921d17ba 318 if (dist_type == TYPE_ZIPF)
fd6f237d 319 offset = zipf_next(&zs);
c4ba4d4f 320 else if (dist_type == TYPE_PARETO)
fd6f237d 321 offset = pareto_next(&zs);
c4ba4d4f
JA
322 else
323 offset = gauss_next(&gs);
fd6f237d
JA
324
325 n = hash_lookup(offset);
326 if (n)
327 n->hits++;
24baa4c7
JA
328 else {
329 hash_insert(&nodes[j], offset);
330 j++;
331 }
6ff38856 332
6ff38856
JA
333 }
334
24baa4c7
JA
335 qsort(nodes, j, sizeof(struct node), node_cmp);
336 nnodes = j;
921d17ba 337
85629b7f
JA
338 if (output_type == OUTPUT_CSV)
339 output_csv(nodes, nnodes);
340 else
341 output_normal(nodes, nnodes, nranges);
6ff38856 342
fd6f237d 343 free(hash);
355934b7 344 free(nodes);
6ff38856
JA
345 return 0;
346}