X-Git-Url: https://git.kernel.dk/?p=fio.git;a=blobdiff_plain;f=t%2Fgenzipf.c;h=c5f098c4c606eceb26072048fa398cfb31bfedda;hp=e625defc60cecd2c0f82c481175b7ebe1b6bda6b;hb=ad1f90aa84ba96916d02043958ee416a499f3f25;hpb=18ded917799ea71ff950360fab7eebebe3c2f406 diff --git a/t/genzipf.c b/t/genzipf.c index e625defc..c5f098c4 100644 --- a/t/genzipf.c +++ b/t/genzipf.c @@ -16,18 +16,17 @@ #include #include #include +#include #include "../lib/zipf.h" #include "../flist.h" #include "../hash.h" -#include "../rbtree.h" #define DEF_NR 1000000 #define DEF_NR_OUTPUT 23 struct node { struct flist_head list; - struct rb_node rb; unsigned long long val; unsigned long hits; }; @@ -35,7 +34,24 @@ struct node { static struct flist_head *hash; static unsigned long hash_bits = 24; static unsigned long hash_size = 1 << 24; -static struct rb_root rb; + +enum { + TYPE_NONE = 0, + TYPE_ZIPF, + TYPE_PARETO, +}; +static const char *dist_types[] = { "None", "Zipf", "Pareto" }; + +static int dist_type = TYPE_ZIPF; +static unsigned long gb_size = 500; +static unsigned long block_size = 4096; +static unsigned long output_nranges = DEF_NR_OUTPUT; +static double percentage; +static double dist_val; +static int output_csv = 0; + +#define DEF_ZIPF_VAL 1.2 +#define DEF_PARETO_VAL 0.3 static struct node *hash_lookup(unsigned long long val) { @@ -52,114 +68,129 @@ static struct node *hash_lookup(unsigned long long val) return NULL; } -static void hash_insert(unsigned long long val) +static struct node *hash_insert(struct node *n, unsigned long long val) { struct flist_head *l = &hash[hash_long(val, hash_bits)]; - struct node *n = malloc(sizeof(*n)); n->val = val; n->hits = 1; flist_add_tail(&n->list, l); + return n; } -static void rb_insert(struct node *n) +static void usage(void) { - struct rb_node **p, *parent; - - memset(&n->rb, 0, sizeof(n->rb)); - p = &rb.rb_node; - parent = NULL; - while (*p) { - struct node *__n; - - parent = *p; - __n = rb_entry(parent, struct node, rb); - if (n->hits > __n->hits) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - - rb_link_node(&n->rb, parent, p); - rb_insert_color(&n->rb, &rb); + printf("genzipf: test zipf/pareto values for fio input\n"); + printf("\t-h\tThis help screen\n"); + printf("\t-p\tGenerate size of data set that are hit by this percentage\n"); + printf("\t-t\tDistribution type (zipf or pareto)\n"); + printf("\t-i\tDistribution algorithm input (zipf theta or pareto power)\n"); + printf("\t-b\tBlock size of a given range (in bytes)\n"); + printf("\t-g\tSize of data set (in gigabytes)\n"); + printf("\t-o\tNumber of output columns\n"); + printf("\t-c\tOutput ranges in CSV format\n"); } -static unsigned long rb_add(struct flist_head *list) +static int parse_options(int argc, char *argv[]) { - struct flist_head *entry; - unsigned long ret = 0; - struct node *n; - - flist_for_each(entry, list) { - n = flist_entry(entry, struct node, list); + const char *optstring = "t:g:i:o:b:p:ch"; + int c, dist_val_set = 0; + + while ((c = getopt(argc, argv, optstring)) != -1) { + switch (c) { + case 'h': + usage(); + return 1; + case 'p': + percentage = atof(optarg); + break; + case 'b': + block_size = strtoul(optarg, NULL, 10); + break; + case 't': + if (!strncmp(optarg, "zipf", 4)) + dist_type = TYPE_ZIPF; + else if (!strncmp(optarg, "pareto", 6)) + dist_type = TYPE_PARETO; + else { + printf("wrong dist type: %s\n", optarg); + return 1; + } + break; + case 'g': + gb_size = strtoul(optarg, NULL, 10); + break; + case 'i': + dist_val = atof(optarg); + dist_val_set = 1; + break; + case 'o': + output_nranges = strtoul(optarg, NULL, 10); + break; + case 'c': + output_csv = 1; + break; + default: + printf("bad option %c\n", c); + return 1; + } + } - rb_insert(n); - ret++; + if (dist_type == TYPE_PARETO) { + if ((dist_val >= 1.00 || dist_val < 0.00)) { + printf("pareto input must be > 0.00 and < 1.00\n"); + return 1; + } + if (!dist_val_set) + dist_val = DEF_PARETO_VAL; + } else if (dist_type == TYPE_ZIPF) { + if (dist_val == 1.0) { + printf("zipf input must be different than 1.0\n"); + return 1; + } + if (!dist_val_set) + dist_val = DEF_ZIPF_VAL; } - return ret; + return 0; } -static unsigned long rb_gen(void) -{ - unsigned long ret = 0; - unsigned int i; +struct output_sum { + double output; + unsigned int nranges; +}; - for (i = 0; i < hash_size; i++) - ret += rb_add(&hash[i]); +static int node_cmp(const void *p1, const void *p2) +{ + const struct node *n1 = p1; + const struct node *n2 = p2; - return ret; + return n2->hits - n1->hits; } int main(int argc, char *argv[]) { - unsigned long nranges, output_nranges; unsigned long offset; - unsigned long i, j, nr_vals, cur_vals, interval; - double *output, perc, perc_i; + unsigned long i, j, k, nr_vals, cur_vals, interval, total_vals, nnodes; + unsigned long long nranges; + struct output_sum *output_sums; + struct node *nodes; + double perc, perc_i; struct zipf_state zs; - struct rb_node *n; - int use_zipf; - double val; - - if (argc < 3) { - printf("%s: {zipf,pareto} val values [output ranges]\n", argv[0]); - return 1; - } - if (!strcmp(argv[1], "zipf")) - use_zipf = 1; - else if (!strcmp(argv[1], "pareto")) - use_zipf = 0; - else { - printf("Bad distribution type <%s>\n", argv[1]); - return 1; - } - - val = atof(argv[2]); - if ((val >= 1.00 || val < 0.00) && !use_zipf) { - printf("pareto input must be > 0.00 and < 1.00\n"); - return 1; - } - if (val == 1.0 && use_zipf) { - printf("zipf input must be different than 1.0\n"); + if (parse_options(argc, argv)) return 1; - } - - nranges = DEF_NR; - output_nranges = DEF_NR_OUTPUT; - if (argc >= 4) - nranges = strtoul(argv[3], NULL, 10); - if (argc >= 5) - output_nranges = strtoul(argv[4], NULL, 10); + if( !output_csv ) + printf("Generating %s distribution with %f input and %lu GB size and %lu block_size.\n", dist_types[dist_type], dist_val, gb_size, block_size); - printf("Generating %s distribution with %f input and %lu ranges.\n", use_zipf ? "zipf" : "pareto", val, nranges); + nranges = gb_size * 1024 * 1024 * 1024ULL; + nranges /= block_size; - if (use_zipf) - zipf_init(&zs, nranges, val, 1); + if (dist_type == TYPE_ZIPF) + zipf_init(&zs, nranges, dist_val, 1); else - pareto_init(&zs, nranges, val, 1); + pareto_init(&zs, nranges, dist_val, 1); hash_bits = 0; hash_size = nranges; @@ -172,10 +203,12 @@ int main(int argc, char *argv[]) for (i = 0; i < hash_size; i++) INIT_FLIST_HEAD(&hash[i]); - for (nr_vals = 0, i = 0; i < nranges; i++) { + nodes = malloc(nranges * sizeof(struct node)); + + for (nr_vals = i = j = 0; i < nranges; i++) { struct node *n; - if (use_zipf) + if (dist_type == TYPE_ZIPF) offset = zipf_next(&zs); else offset = pareto_next(&zs); @@ -183,45 +216,107 @@ int main(int argc, char *argv[]) n = hash_lookup(offset); if (n) n->hits++; - else - hash_insert(offset); - - nr_vals++; - } - - nr_vals = rb_gen(); - - interval = (nr_vals + output_nranges - 1) / output_nranges; - - output = malloc(output_nranges * sizeof(double)); - - i = j = cur_vals = 0; - - n = rb_first(&rb); - while (n) { - struct node *node = rb_entry(n, struct node, rb); - - if (i >= interval) { - output[j] = (double) (cur_vals + 1) / (double) nranges; - output[j] *= 100.0; + else { + hash_insert(&nodes[j], offset); j++; - cur_vals = node->hits; - interval += (nr_vals + output_nranges - 1) / output_nranges; - } else - cur_vals += node->hits; + } - n = rb_next(n); - i++; + nr_vals++; } - perc_i = 100.0 / (double) output_nranges; - perc = 0.0; - for (i = 0; i < j; i++) { - perc += perc_i; - printf("%s %6.2f%%:\t%6.2f%% of hits\n", i ? "|->" : "Top", perc, output[i]); + qsort(nodes, j, sizeof(struct node), node_cmp); + nnodes = j; + nr_vals = nnodes; + + if (output_csv) { + printf("rank, count\n"); + for (k = 0; k < nnodes; k++) + printf("%lu, %lu\n", k, nodes[k].hits); + } else { + interval = (nr_vals + output_nranges - 1) / output_nranges; + + output_sums = malloc(output_nranges * sizeof(struct output_sum)); + for (i = 0; i < output_nranges; i++) { + output_sums[i].output = 0.0; + output_sums[i].nranges = 1; + } + + total_vals = i = j = cur_vals = 0; + + for (k = 0; k < nnodes; k++) { + struct output_sum *os = &output_sums[j]; + struct node *node = &nodes[k]; + + if (i >= interval) { + os->output = + (double)(cur_vals + 1) / (double)nranges; + os->output *= 100.0; + j++; + cur_vals = node->hits; + interval += + (nr_vals + output_nranges - + 1) / output_nranges; + } else { + cur_vals += node->hits; + os->nranges += node->hits; + } + + i++; + total_vals += node->hits; + + if (percentage) { + unsigned long blocks = + percentage * nranges / 100; + + if (total_vals >= blocks) { + double cs = + i * block_size / (1024 * 1024); + char p = 'M'; + + if (cs > 1024.0) { + cs /= 1024.0; + p = 'G'; + } + if (cs > 1024.0) { + cs /= 1024.0; + p = 'T'; + } + + printf("%.2f%% of hits satisfied in %.3f%cB of cache\n", percentage, cs, p); + percentage = 0.0; + } + } + } + + perc_i = 100.0 / (double)output_nranges; + perc = 0.0; + + printf("\n Rows Hits No Hits Size\n"); + printf("--------------------------------------------------------\n"); + for (i = 0; i < j; i++) { + struct output_sum *os = &output_sums[i]; + double gb = (double)os->nranges * block_size / 1024.0; + char p = 'K'; + + if (gb > 1024.0) { + p = 'M'; + gb /= 1024.0; + } + if (gb > 1024.0) { + p = 'G'; + gb /= 1024.0; + } + + perc += perc_i; + printf("%s %6.2f%%\t%6.2f%%\t\t%8u\t%6.2f%c\n", + i ? "|->" : "Top", perc, os->output, os->nranges, + gb, p); + } + + free(output_sums); } - free(output); free(hash); + free(nodes); return 0; }