#include <string.h>
#include "../lib/zipf.h"
+#include "../flist.h"
+#include "../hash.h"
+#include "../rbtree.h"
#define DEF_NR 1000000
#define DEF_NR_OUTPUT 23
-static int val_cmp(const void *p1, const void *p2)
+struct node {
+ struct flist_head list;
+ struct rb_node rb;
+ unsigned long long val;
+ unsigned long hits;
+};
+
+static struct flist_head *hash;
+static unsigned long hash_bits = 24;
+static unsigned long hash_size = 1 << 24;
+static struct rb_root rb;
+
+static struct node *hash_lookup(unsigned long long val)
+{
+ struct flist_head *l = &hash[hash_long(val, hash_bits)];
+ struct flist_head *entry;
+ struct node *n;
+
+ flist_for_each(entry, l) {
+ n = flist_entry(entry, struct node, list);
+ if (n->val == val)
+ return n;
+ }
+
+ return NULL;
+}
+
+static void hash_insert(unsigned long long val)
+{
+ struct flist_head *l = &hash[hash_long(val, hash_bits)];
+ struct node *n = malloc(sizeof(*n));
+
+ n->val = val;
+ n->hits = 1;
+ flist_add_tail(&n->list, l);
+}
+
+static void rb_insert(struct node *n)
+{
+ struct rb_node **p, *parent;
+
+ memset(&n->rb, 0, sizeof(n->rb));
+ p = &rb.rb_node;
+ parent = NULL;
+ while (*p) {
+ struct node *__n;
+
+ parent = *p;
+ __n = rb_entry(parent, struct node, rb);
+ if (n->hits > __n->hits)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+
+ rb_link_node(&n->rb, parent, p);
+ rb_insert_color(&n->rb, &rb);
+}
+
+static unsigned long rb_add(struct flist_head *list)
{
- const unsigned long *v1 = p1;
- const unsigned long *v2 = p2;
+ struct flist_head *entry;
+ unsigned long ret = 0;
+ struct node *n;
+
+ flist_for_each(entry, list) {
+ n = flist_entry(entry, struct node, list);
+
+ rb_insert(n);
+ ret++;
+ }
- return *v1 - *v2;
+ return ret;
+}
+
+static unsigned long rb_gen(void)
+{
+ unsigned long ret = 0;
+ unsigned int i;
+
+ for (i = 0; i < hash_size; i++)
+ ret += rb_add(&hash[i]);
+
+ return ret;
}
int main(int argc, char *argv[])
{
unsigned long nranges, output_nranges;
- unsigned long *vals;
- unsigned long i, j, nr_vals, cur_vals, max_val, interval;
+ unsigned long offset;
+ unsigned long i, j, nr_vals, cur_vals, interval;
double *output, perc, perc_i;
struct zipf_state zs;
+ struct rb_node *n;
int use_zipf;
double val;
else
pareto_init(&zs, nranges, val);
- vals = malloc(nranges * sizeof(unsigned long));
+ hash = malloc(hash_size * sizeof(struct flist_head));
+ for (i = 0; i < hash_size; i++)
+ INIT_FLIST_HEAD(&hash[i]);
+
+ for (nr_vals = 0, i = 0; i < nranges; i++) {
+ struct node *n;
- max_val = nr_vals = 0;
- for (i = 0; i < nranges; i++) {
if (use_zipf)
- vals[nr_vals] = zipf_next(&zs);
+ offset = zipf_next(&zs);
else
- vals[nr_vals] = pareto_next(&zs);
+ offset = pareto_next(&zs);
+
+ n = hash_lookup(offset);
+ if (n)
+ n->hits++;
+ else
+ hash_insert(offset);
- if (vals[nr_vals] > max_val)
- max_val = vals[nr_vals];
nr_vals++;
}
- qsort(vals, nr_vals, sizeof(unsigned long), val_cmp);
+ nr_vals = rb_gen();
- interval = (max_val + output_nranges - 1) / output_nranges;
+ interval = nr_vals / output_nranges;
output = malloc(output_nranges * sizeof(double));
- for (i = j = 0, cur_vals = 0; i < nr_vals; i++) {
- if (vals[i] > interval) {
- output[j] = (double) (cur_vals + 1) / (double) nr_vals;
+ i = j = cur_vals = 0;
+
+ n = rb_first(&rb);
+ while (n) {
+ struct node *node = rb_entry(n, struct node, rb);
+
+ if (i >= interval) {
+ output[j] = (double) (cur_vals + 1) / (double) nranges;
output[j] *= 100.0;
j++;
- cur_vals = 0;
- interval += (max_val + output_nranges - 1) / output_nranges;
- continue;
- }
- cur_vals++;
- }
+ cur_vals = node->hits;
+ interval += nr_vals / output_nranges;
+ } else
+ cur_vals += node->hits;
- if (cur_vals) {
- output[j] = (double) (cur_vals + 1) / (double) nr_vals;
- output[j] *= 100.0;
- j++;
+ n = rb_next(n);
+ i++;
}
perc_i = 100.0 / (double) output_nranges;
perc = 0.0;
for (i = 0; i < j; i++) {
- printf("%6.2f%% -> %6.2f%%:\t%6.2f%%\n", perc, perc + perc_i, output[i]);
perc += perc_i;
+ printf("Top %6.2f%%:\t%6.2f%% of hits\n", perc, output[i]);
}
free(output);
- free(vals);
+ free(hash);
return 0;
}