Commit | Line | Data |
---|---|---|
6ff38856 JA |
1 | /* |
2 | * Generate/analyze pareto/zipf distributions to better understand | |
3 | * what an access pattern would look like. | |
4 | * | |
5 | * For instance, the following would generate a zipf distribution | |
6 | * with theta 1.2, using 100,000 values and split the reporting into | |
7 | * 20 buckets: | |
8 | * | |
9 | * t/genzipf zipf 1.2 100000 20 | |
10 | * | |
f880b1f6 JA |
11 | * Only the distribution type (zipf or pareto) and spread input need |
12 | * to be given, if not given defaults are used. | |
13 | * | |
6ff38856 JA |
14 | */ |
15 | #include <stdio.h> | |
16 | #include <stdlib.h> | |
17 | #include <fcntl.h> | |
18 | #include <string.h> | |
19 | ||
20 | #include "../lib/zipf.h" | |
fd6f237d JA |
21 | #include "../flist.h" |
22 | #include "../hash.h" | |
23 | #include "../rbtree.h" | |
6ff38856 | 24 | |
f880b1f6 JA |
25 | #define DEF_NR 1000000 |
26 | #define DEF_NR_OUTPUT 23 | |
27 | ||
fd6f237d JA |
28 | struct node { |
29 | struct flist_head list; | |
30 | struct rb_node rb; | |
31 | unsigned long long val; | |
32 | unsigned long hits; | |
33 | }; | |
34 | ||
35 | static struct flist_head *hash; | |
36 | static unsigned long hash_bits = 24; | |
37 | static unsigned long hash_size = 1 << 24; | |
38 | static struct rb_root rb; | |
39 | ||
40 | static struct node *hash_lookup(unsigned long long val) | |
41 | { | |
42 | struct flist_head *l = &hash[hash_long(val, hash_bits)]; | |
43 | struct flist_head *entry; | |
44 | struct node *n; | |
45 | ||
46 | flist_for_each(entry, l) { | |
47 | n = flist_entry(entry, struct node, list); | |
48 | if (n->val == val) | |
49 | return n; | |
50 | } | |
51 | ||
52 | return NULL; | |
53 | } | |
54 | ||
55 | static void hash_insert(unsigned long long val) | |
56 | { | |
57 | struct flist_head *l = &hash[hash_long(val, hash_bits)]; | |
58 | struct node *n = malloc(sizeof(*n)); | |
59 | ||
60 | n->val = val; | |
61 | n->hits = 1; | |
62 | flist_add_tail(&n->list, l); | |
63 | } | |
64 | ||
65 | static void rb_insert(struct node *n) | |
66 | { | |
67 | struct rb_node **p, *parent; | |
68 | ||
69 | memset(&n->rb, 0, sizeof(n->rb)); | |
70 | p = &rb.rb_node; | |
71 | parent = NULL; | |
72 | while (*p) { | |
73 | struct node *__n; | |
74 | ||
75 | parent = *p; | |
76 | __n = rb_entry(parent, struct node, rb); | |
77 | if (n->hits > __n->hits) | |
78 | p = &(*p)->rb_left; | |
79 | else | |
80 | p = &(*p)->rb_right; | |
81 | } | |
82 | ||
83 | rb_link_node(&n->rb, parent, p); | |
84 | rb_insert_color(&n->rb, &rb); | |
85 | } | |
86 | ||
87 | static unsigned long rb_add(struct flist_head *list) | |
6ff38856 | 88 | { |
fd6f237d JA |
89 | struct flist_head *entry; |
90 | unsigned long ret = 0; | |
91 | struct node *n; | |
92 | ||
93 | flist_for_each(entry, list) { | |
94 | n = flist_entry(entry, struct node, list); | |
95 | ||
96 | rb_insert(n); | |
97 | ret++; | |
98 | } | |
6ff38856 | 99 | |
fd6f237d JA |
100 | return ret; |
101 | } | |
102 | ||
103 | static unsigned long rb_gen(void) | |
104 | { | |
105 | unsigned long ret = 0; | |
106 | unsigned int i; | |
107 | ||
108 | for (i = 0; i < hash_size; i++) | |
109 | ret += rb_add(&hash[i]); | |
110 | ||
111 | return ret; | |
6ff38856 JA |
112 | } |
113 | ||
114 | int main(int argc, char *argv[]) | |
115 | { | |
116 | unsigned long nranges, output_nranges; | |
fd6f237d JA |
117 | unsigned long offset; |
118 | unsigned long i, j, nr_vals, cur_vals, interval; | |
f880b1f6 | 119 | double *output, perc, perc_i; |
6ff38856 | 120 | struct zipf_state zs; |
fd6f237d | 121 | struct rb_node *n; |
6ff38856 JA |
122 | int use_zipf; |
123 | double val; | |
124 | ||
f880b1f6 | 125 | if (argc < 3) { |
6ff38856 JA |
126 | printf("%s: {zipf,pareto} val values [output ranges]\n", argv[0]); |
127 | return 1; | |
128 | } | |
129 | ||
130 | if (!strcmp(argv[1], "zipf")) | |
131 | use_zipf = 1; | |
132 | else if (!strcmp(argv[1], "pareto")) | |
133 | use_zipf = 0; | |
134 | else { | |
135 | printf("Bad distribution type <%s>\n", argv[1]); | |
136 | return 1; | |
137 | } | |
138 | ||
139 | val = atof(argv[2]); | |
f880b1f6 JA |
140 | |
141 | nranges = DEF_NR; | |
142 | output_nranges = DEF_NR_OUTPUT; | |
143 | ||
144 | if (argc >= 4) | |
145 | nranges = strtoul(argv[3], NULL, 10); | |
146 | if (argc >= 5) | |
6ff38856 | 147 | output_nranges = strtoul(argv[4], NULL, 10); |
6ff38856 | 148 | |
f880b1f6 | 149 | printf("Generating %s distribution with %f input and %lu ranges.\n", use_zipf ? "zipf" : "pareto", val, nranges); |
6ff38856 JA |
150 | |
151 | if (use_zipf) | |
152 | zipf_init(&zs, nranges, val); | |
153 | else | |
154 | pareto_init(&zs, nranges, val); | |
155 | ||
c71224e7 JA |
156 | hash_bits = 0; |
157 | hash_size = nranges; | |
158 | while ((hash_size >>= 1) != 0) | |
159 | hash_bits++; | |
160 | ||
161 | hash_size = 1 << hash_bits; | |
162 | ||
fd6f237d JA |
163 | hash = malloc(hash_size * sizeof(struct flist_head)); |
164 | for (i = 0; i < hash_size; i++) | |
165 | INIT_FLIST_HEAD(&hash[i]); | |
166 | ||
167 | for (nr_vals = 0, i = 0; i < nranges; i++) { | |
168 | struct node *n; | |
6ff38856 | 169 | |
6ff38856 | 170 | if (use_zipf) |
fd6f237d | 171 | offset = zipf_next(&zs); |
6ff38856 | 172 | else |
fd6f237d JA |
173 | offset = pareto_next(&zs); |
174 | ||
175 | n = hash_lookup(offset); | |
176 | if (n) | |
177 | n->hits++; | |
178 | else | |
179 | hash_insert(offset); | |
6ff38856 | 180 | |
6ff38856 JA |
181 | nr_vals++; |
182 | } | |
183 | ||
fd6f237d | 184 | nr_vals = rb_gen(); |
6ff38856 | 185 | |
c71224e7 | 186 | interval = (nr_vals + output_nranges - 1) / output_nranges; |
6ff38856 JA |
187 | |
188 | output = malloc(output_nranges * sizeof(double)); | |
189 | ||
fd6f237d JA |
190 | i = j = cur_vals = 0; |
191 | ||
192 | n = rb_first(&rb); | |
193 | while (n) { | |
194 | struct node *node = rb_entry(n, struct node, rb); | |
195 | ||
196 | if (i >= interval) { | |
197 | output[j] = (double) (cur_vals + 1) / (double) nranges; | |
6ff38856 JA |
198 | output[j] *= 100.0; |
199 | j++; | |
fd6f237d | 200 | cur_vals = node->hits; |
c71224e7 | 201 | interval += (nr_vals + output_nranges - 1) / output_nranges; |
fd6f237d JA |
202 | } else |
203 | cur_vals += node->hits; | |
6ff38856 | 204 | |
fd6f237d JA |
205 | n = rb_next(n); |
206 | i++; | |
0779dfc8 | 207 | } |
6ff38856 | 208 | |
f880b1f6 JA |
209 | perc_i = 100.0 / (double) output_nranges; |
210 | perc = 0.0; | |
211 | for (i = 0; i < j; i++) { | |
f880b1f6 | 212 | perc += perc_i; |
c71224e7 | 213 | printf("%s %6.2f%%:\t%6.2f%% of hits\n", i ? "|->" : "Top", perc, output[i]); |
f880b1f6 | 214 | } |
6ff38856 JA |
215 | |
216 | free(output); | |
fd6f237d | 217 | free(hash); |
6ff38856 JA |
218 | return 0; |
219 | } |