Commit | Line | Data |
---|---|---|
6ff38856 JA |
1 | /* |
2 | * Generate/analyze pareto/zipf distributions to better understand | |
3 | * what an access pattern would look like. | |
4 | * | |
5 | * For instance, the following would generate a zipf distribution | |
6 | * with theta 1.2, using 100,000 values and split the reporting into | |
7 | * 20 buckets: | |
8 | * | |
9 | * t/genzipf zipf 1.2 100000 20 | |
10 | * | |
f880b1f6 JA |
11 | * Only the distribution type (zipf or pareto) and spread input need |
12 | * to be given, if not given defaults are used. | |
13 | * | |
6ff38856 JA |
14 | */ |
15 | #include <stdio.h> | |
16 | #include <stdlib.h> | |
17 | #include <fcntl.h> | |
18 | #include <string.h> | |
921d17ba | 19 | #include <unistd.h> |
6ff38856 JA |
20 | |
21 | #include "../lib/zipf.h" | |
fd6f237d JA |
22 | #include "../flist.h" |
23 | #include "../hash.h" | |
24 | #include "../rbtree.h" | |
6ff38856 | 25 | |
f880b1f6 JA |
26 | #define DEF_NR 1000000 |
27 | #define DEF_NR_OUTPUT 23 | |
28 | ||
fd6f237d JA |
29 | struct node { |
30 | struct flist_head list; | |
31 | struct rb_node rb; | |
32 | unsigned long long val; | |
33 | unsigned long hits; | |
34 | }; | |
35 | ||
36 | static struct flist_head *hash; | |
37 | static unsigned long hash_bits = 24; | |
38 | static unsigned long hash_size = 1 << 24; | |
39 | static struct rb_root rb; | |
40 | ||
921d17ba JA |
41 | enum { |
42 | TYPE_NONE = 0, | |
43 | TYPE_ZIPF, | |
44 | TYPE_PARETO, | |
45 | }; | |
46 | static const char *dist_types[] = { "None", "Zipf", "Pareto" }; | |
47 | ||
48 | static int dist_type = TYPE_ZIPF; | |
49 | static unsigned long gb_size = 500; | |
50 | static unsigned long nranges = DEF_NR; | |
51 | static unsigned long output_nranges = DEF_NR_OUTPUT; | |
52 | static double dist_val; | |
53 | ||
54 | #define DEF_ZIPF_VAL 1.2 | |
55 | #define DEF_PARETO_VAL 0.3 | |
56 | ||
fd6f237d JA |
57 | static struct node *hash_lookup(unsigned long long val) |
58 | { | |
59 | struct flist_head *l = &hash[hash_long(val, hash_bits)]; | |
60 | struct flist_head *entry; | |
61 | struct node *n; | |
62 | ||
63 | flist_for_each(entry, l) { | |
64 | n = flist_entry(entry, struct node, list); | |
65 | if (n->val == val) | |
66 | return n; | |
67 | } | |
68 | ||
69 | return NULL; | |
70 | } | |
71 | ||
72 | static void hash_insert(unsigned long long val) | |
73 | { | |
74 | struct flist_head *l = &hash[hash_long(val, hash_bits)]; | |
75 | struct node *n = malloc(sizeof(*n)); | |
76 | ||
77 | n->val = val; | |
78 | n->hits = 1; | |
79 | flist_add_tail(&n->list, l); | |
80 | } | |
81 | ||
82 | static void rb_insert(struct node *n) | |
83 | { | |
84 | struct rb_node **p, *parent; | |
85 | ||
86 | memset(&n->rb, 0, sizeof(n->rb)); | |
87 | p = &rb.rb_node; | |
88 | parent = NULL; | |
89 | while (*p) { | |
90 | struct node *__n; | |
91 | ||
92 | parent = *p; | |
93 | __n = rb_entry(parent, struct node, rb); | |
94 | if (n->hits > __n->hits) | |
95 | p = &(*p)->rb_left; | |
96 | else | |
97 | p = &(*p)->rb_right; | |
98 | } | |
99 | ||
100 | rb_link_node(&n->rb, parent, p); | |
101 | rb_insert_color(&n->rb, &rb); | |
102 | } | |
103 | ||
104 | static unsigned long rb_add(struct flist_head *list) | |
6ff38856 | 105 | { |
fd6f237d JA |
106 | struct flist_head *entry; |
107 | unsigned long ret = 0; | |
108 | struct node *n; | |
109 | ||
110 | flist_for_each(entry, list) { | |
111 | n = flist_entry(entry, struct node, list); | |
112 | ||
113 | rb_insert(n); | |
114 | ret++; | |
115 | } | |
6ff38856 | 116 | |
fd6f237d JA |
117 | return ret; |
118 | } | |
119 | ||
120 | static unsigned long rb_gen(void) | |
121 | { | |
122 | unsigned long ret = 0; | |
123 | unsigned int i; | |
124 | ||
125 | for (i = 0; i < hash_size; i++) | |
126 | ret += rb_add(&hash[i]); | |
127 | ||
128 | return ret; | |
6ff38856 JA |
129 | } |
130 | ||
921d17ba JA |
131 | static int parse_options(int argc, char *argv[]) |
132 | { | |
133 | const char *optstring = "t:g:i:r:o:"; | |
134 | int c, dist_val_set = 0; | |
135 | ||
136 | while ((c = getopt(argc, argv, optstring)) != -1) { | |
137 | switch (c) { | |
138 | case 't': | |
139 | if (!strncmp(optarg, "zipf", 4)) | |
140 | dist_type = TYPE_ZIPF; | |
141 | else if (!strncmp(optarg, "pareto", 6)) | |
142 | dist_type = TYPE_PARETO; | |
143 | else { | |
144 | printf("wrong dist type: %s\n", optarg); | |
145 | return 1; | |
146 | } | |
147 | break; | |
148 | case 'g': | |
149 | gb_size = strtoul(optarg, NULL, 10); | |
150 | break; | |
151 | case 'i': | |
152 | dist_val = atof(optarg); | |
153 | dist_val_set = 1; | |
154 | break; | |
155 | case 'r': | |
156 | nranges = strtoul(optarg, NULL, 10); | |
157 | break; | |
158 | case 'o': | |
159 | output_nranges = strtoul(optarg, NULL, 10); | |
160 | break; | |
161 | default: | |
162 | printf("bad option %c\n", c); | |
163 | return 1; | |
164 | } | |
165 | } | |
166 | ||
167 | if (dist_type == TYPE_PARETO) { | |
168 | if ((dist_val >= 1.00 || dist_val < 0.00)) { | |
169 | printf("pareto input must be > 0.00 and < 1.00\n"); | |
170 | return 1; | |
171 | } | |
172 | if (!dist_val_set) | |
173 | dist_val = DEF_PARETO_VAL; | |
174 | } else if (dist_type == TYPE_ZIPF) { | |
175 | if (dist_val == 1.0) { | |
176 | printf("zipf input must be different than 1.0\n"); | |
177 | return 1; | |
178 | } | |
179 | if (!dist_val_set) | |
180 | dist_val = DEF_ZIPF_VAL; | |
181 | } | |
182 | ||
183 | return 0; | |
184 | } | |
185 | ||
6ff38856 JA |
186 | int main(int argc, char *argv[]) |
187 | { | |
fd6f237d JA |
188 | unsigned long offset; |
189 | unsigned long i, j, nr_vals, cur_vals, interval; | |
f880b1f6 | 190 | double *output, perc, perc_i; |
6ff38856 | 191 | struct zipf_state zs; |
fd6f237d | 192 | struct rb_node *n; |
6ff38856 | 193 | |
921d17ba | 194 | if (parse_options(argc, argv)) |
6ff38856 | 195 | return 1; |
6ff38856 | 196 | |
921d17ba JA |
197 | printf("Generating %s distribution with %f input and %lu ranges.\n", dist_types[dist_type], dist_val, nranges); |
198 | printf("Using device gb=%lu\n\n", gb_size); | |
f880b1f6 | 199 | |
921d17ba JA |
200 | if (dist_type == TYPE_ZIPF) |
201 | zipf_init(&zs, nranges, dist_val, 1); | |
6ff38856 | 202 | else |
921d17ba | 203 | pareto_init(&zs, nranges, dist_val, 1); |
6ff38856 | 204 | |
c71224e7 JA |
205 | hash_bits = 0; |
206 | hash_size = nranges; | |
207 | while ((hash_size >>= 1) != 0) | |
208 | hash_bits++; | |
209 | ||
210 | hash_size = 1 << hash_bits; | |
211 | ||
fd6f237d JA |
212 | hash = malloc(hash_size * sizeof(struct flist_head)); |
213 | for (i = 0; i < hash_size; i++) | |
214 | INIT_FLIST_HEAD(&hash[i]); | |
215 | ||
216 | for (nr_vals = 0, i = 0; i < nranges; i++) { | |
217 | struct node *n; | |
6ff38856 | 218 | |
921d17ba | 219 | if (dist_type == TYPE_ZIPF) |
fd6f237d | 220 | offset = zipf_next(&zs); |
6ff38856 | 221 | else |
fd6f237d JA |
222 | offset = pareto_next(&zs); |
223 | ||
224 | n = hash_lookup(offset); | |
225 | if (n) | |
226 | n->hits++; | |
227 | else | |
228 | hash_insert(offset); | |
6ff38856 | 229 | |
6ff38856 JA |
230 | nr_vals++; |
231 | } | |
232 | ||
fd6f237d | 233 | nr_vals = rb_gen(); |
6ff38856 | 234 | |
c71224e7 | 235 | interval = (nr_vals + output_nranges - 1) / output_nranges; |
6ff38856 JA |
236 | |
237 | output = malloc(output_nranges * sizeof(double)); | |
238 | ||
fd6f237d JA |
239 | i = j = cur_vals = 0; |
240 | ||
241 | n = rb_first(&rb); | |
242 | while (n) { | |
243 | struct node *node = rb_entry(n, struct node, rb); | |
244 | ||
245 | if (i >= interval) { | |
246 | output[j] = (double) (cur_vals + 1) / (double) nranges; | |
6ff38856 JA |
247 | output[j] *= 100.0; |
248 | j++; | |
fd6f237d | 249 | cur_vals = node->hits; |
c71224e7 | 250 | interval += (nr_vals + output_nranges - 1) / output_nranges; |
fd6f237d JA |
251 | } else |
252 | cur_vals += node->hits; | |
6ff38856 | 253 | |
fd6f237d JA |
254 | n = rb_next(n); |
255 | i++; | |
0779dfc8 | 256 | } |
6ff38856 | 257 | |
f880b1f6 JA |
258 | perc_i = 100.0 / (double) output_nranges; |
259 | perc = 0.0; | |
921d17ba JA |
260 | |
261 | printf(" Rows Hits Size\n"); | |
262 | printf("-------------------------------------------\n"); | |
f880b1f6 | 263 | for (i = 0; i < j; i++) { |
921d17ba JA |
264 | double gb = (double) gb_size * perc_i / 100.0; |
265 | char p = 'G'; | |
266 | ||
267 | if (gb < 1.0) { | |
268 | p = 'M'; | |
269 | gb *= 1024.0; | |
270 | } | |
271 | ||
f880b1f6 | 272 | perc += perc_i; |
921d17ba | 273 | printf("%s %6.2f%%\t%6.2f%%\t\t%6.2f%c\n", i ? "|->" : "Top", perc, output[i], gb, p); |
f880b1f6 | 274 | } |
6ff38856 JA |
275 | |
276 | free(output); | |
fd6f237d | 277 | free(hash); |
6ff38856 JA |
278 | return 0; |
279 | } |