X-Git-Url: https://git.kernel.dk/?a=blobdiff_plain;f=t%2Fgenzipf.c;h=4fc10ae72d8421e3407d0239d0f96c2257529d59;hb=c7b42e0a8c3d6fb179cc7c681fcee58623de8b2f;hp=4ba940cfefeb60f69045ca9f14f8ea20e1fc4a5b;hpb=444256eaf4f0ffdc4cacef37873d58f2a65bf8e6;p=fio.git diff --git a/t/genzipf.c b/t/genzipf.c index 4ba940cf..4fc10ae7 100644 --- a/t/genzipf.c +++ b/t/genzipf.c @@ -3,10 +3,10 @@ * what an access pattern would look like. * * For instance, the following would generate a zipf distribution - * with theta 1.2, using 100,000 values and split the reporting into - * 20 buckets: + * with theta 1.2, using 262144 (1 GiB / 4096) values and split the + * reporting into 20 buckets: * - * t/genzipf zipf 1.2 100000 20 + * ./t/fio-genzipf -t zipf -i 1.2 -g 1 -b 4096 -o 20 * * Only the distribution type (zipf or pareto) and spread input need * to be given, if not given defaults are used. @@ -14,21 +14,18 @@ */ #include #include -#include #include #include #include "../lib/zipf.h" +#include "../lib/gauss.h" #include "../flist.h" #include "../hash.h" -#include "../rbtree.h" -#define DEF_NR 1000000 -#define DEF_NR_OUTPUT 23 +#define DEF_NR_OUTPUT 20 struct node { struct flist_head list; - struct rb_node rb; unsigned long long val; unsigned long hits; }; @@ -36,28 +33,39 @@ struct node { static struct flist_head *hash; static unsigned long hash_bits = 24; static unsigned long hash_size = 1 << 24; -static struct rb_root rb; enum { TYPE_NONE = 0, TYPE_ZIPF, TYPE_PARETO, + TYPE_NORMAL, +}; +static const char *dist_types[] = { "None", "Zipf", "Pareto", "Normal" }; + +enum { + OUTPUT_NORMAL, + OUTPUT_CSV, }; -static const char *dist_types[] = { "None", "Zipf", "Pareto" }; static int dist_type = TYPE_ZIPF; -static unsigned long gb_size = 500; +static unsigned long gib_size = 500; static unsigned long block_size = 4096; static unsigned long output_nranges = DEF_NR_OUTPUT; static double percentage; static double dist_val; +static int output_type = OUTPUT_NORMAL; #define DEF_ZIPF_VAL 1.2 #define DEF_PARETO_VAL 0.3 +static unsigned int hashv(unsigned long long val) +{ + return jhash(&val, sizeof(val), 0) & (hash_size - 1); +} + static struct node *hash_lookup(unsigned long long val) { - struct flist_head *l = &hash[hash_long(val, hash_bits)]; + struct flist_head *l = &hash[hashv(val)]; struct flist_head *entry; struct node *n; @@ -70,72 +78,39 @@ static struct node *hash_lookup(unsigned long long val) return NULL; } -static void hash_insert(unsigned long long val) +static void hash_insert(struct node *n, unsigned long long val) { - struct flist_head *l = &hash[hash_long(val, hash_bits)]; - struct node *n = malloc(sizeof(*n)); + struct flist_head *l = &hash[hashv(val)]; n->val = val; n->hits = 1; flist_add_tail(&n->list, l); } -static void rb_insert(struct node *n) -{ - struct rb_node **p, *parent; - - memset(&n->rb, 0, sizeof(n->rb)); - p = &rb.rb_node; - parent = NULL; - while (*p) { - struct node *__n; - - parent = *p; - __n = rb_entry(parent, struct node, rb); - if (n->hits > __n->hits) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - - rb_link_node(&n->rb, parent, p); - rb_insert_color(&n->rb, &rb); -} - -static unsigned long rb_add(struct flist_head *list) -{ - struct flist_head *entry; - unsigned long ret = 0; - struct node *n; - - flist_for_each(entry, list) { - n = flist_entry(entry, struct node, list); - - rb_insert(n); - ret++; - } - - return ret; -} - -static unsigned long rb_gen(void) +static void usage(void) { - unsigned long ret = 0; - unsigned int i; - - for (i = 0; i < hash_size; i++) - ret += rb_add(&hash[i]); - - return ret; + printf("genzipf: test zipf/pareto values for fio input\n"); + printf("\t-h\tThis help screen\n"); + printf("\t-p\tGenerate size of data set that are hit by this percentage\n"); + printf("\t-t\tDistribution type (zipf, pareto, or normal)\n"); + printf("\t-i\tDistribution algorithm input (zipf theta, pareto power,\n" + "\t\tor normal %% deviation)\n"); + printf("\t-b\tBlock size of a given range (in bytes)\n"); + printf("\t-g\tSize of data set (in gigabytes)\n"); + printf("\t-o\tNumber of output rows\n"); + printf("\t-c\tOutput ranges in CSV format\n"); } static int parse_options(int argc, char *argv[]) { - const char *optstring = "t:g:i:o:b:p:"; + const char *optstring = "t:g:i:o:b:p:ch"; int c, dist_val_set = 0; while ((c = getopt(argc, argv, optstring)) != -1) { switch (c) { + case 'h': + usage(); + return 1; case 'p': percentage = atof(optarg); break; @@ -147,13 +122,15 @@ static int parse_options(int argc, char *argv[]) dist_type = TYPE_ZIPF; else if (!strncmp(optarg, "pareto", 6)) dist_type = TYPE_PARETO; + else if (!strncmp(optarg, "normal", 6)) + dist_type = TYPE_NORMAL; else { printf("wrong dist type: %s\n", optarg); return 1; } break; case 'g': - gb_size = strtoul(optarg, NULL, 10); + gib_size = strtoul(optarg, NULL, 10); break; case 'i': dist_val = atof(optarg); @@ -162,6 +139,9 @@ static int parse_options(int argc, char *argv[]) case 'o': output_nranges = strtoul(optarg, NULL, 10); break; + case 'c': + output_type = OUTPUT_CSV; + break; default: printf("bad option %c\n", c); return 1; @@ -192,93 +172,61 @@ struct output_sum { unsigned int nranges; }; -int main(int argc, char *argv[]) +static int node_cmp(const void *p1, const void *p2) { - unsigned long offset; - unsigned long i, j, nr_vals, cur_vals, interval, total_vals; - unsigned long long nranges; - struct output_sum *output_sums; - double perc, perc_i; - struct zipf_state zs; - struct rb_node *n; - - if (parse_options(argc, argv)) - return 1; - - printf("Generating %s distribution with %f input and %lu GB size and %lu block_size.\n", dist_types[dist_type], dist_val, gb_size, block_size); - - nranges = gb_size * 1024 * 1024 * 1024ULL; - nranges /= block_size; - - if (dist_type == TYPE_ZIPF) - zipf_init(&zs, nranges, dist_val, 1); - else - pareto_init(&zs, nranges, dist_val, 1); - - hash_bits = 0; - hash_size = nranges; - while ((hash_size >>= 1) != 0) - hash_bits++; - - hash_size = 1 << hash_bits; - - hash = malloc(hash_size * sizeof(struct flist_head)); - for (i = 0; i < hash_size; i++) - INIT_FLIST_HEAD(&hash[i]); - - for (nr_vals = i = 0; i < nranges; i++) { - struct node *n; + const struct node *n1 = p1; + const struct node *n2 = p2; - if (dist_type == TYPE_ZIPF) - offset = zipf_next(&zs); - else - offset = pareto_next(&zs); - - n = hash_lookup(offset); - if (n) - n->hits++; - else - hash_insert(offset); + return n2->hits - n1->hits; +} - nr_vals++; - } +static void output_csv(struct node *nodes, unsigned long nnodes) +{ + unsigned long i; - nr_vals = rb_gen(); + printf("rank, count\n"); + for (i = 0; i < nnodes; i++) + printf("%lu, %lu\n", i, nodes[i].hits); +} - interval = (nr_vals + output_nranges - 1) / output_nranges; +static void output_normal(struct node *nodes, unsigned long nnodes, + unsigned long nranges) +{ + unsigned long i, j, cur_vals, interval_step, next_interval, total_vals; + unsigned long blocks = percentage * nnodes / 100; + double hit_percent_sum = 0; + unsigned long long hit_sum = 0; + double perc, perc_i; + struct output_sum *output_sums; + interval_step = (nnodes - 1) / output_nranges + 1; + next_interval = interval_step; output_sums = malloc(output_nranges * sizeof(struct output_sum)); + for (i = 0; i < output_nranges; i++) { output_sums[i].output = 0.0; - output_sums[i].nranges = 1; + output_sums[i].nranges = 0; } - total_vals = i = j = cur_vals = 0; - - n = rb_first(&rb); - while (n) { - struct node *node = rb_entry(n, struct node, rb); - struct output_sum *os = &output_sums[j]; + j = total_vals = cur_vals = 0; - if (i >= interval) { - os->output = (double) (cur_vals + 1) / (double) nranges; + for (i = 0; i < nnodes; i++) { + struct output_sum *os = &output_sums[j]; + struct node *node = &nodes[i]; + cur_vals += node->hits; + total_vals += node->hits; + os->nranges += node->hits; + if (i == (next_interval) -1 || i == nnodes - 1) { + os->output = (double) cur_vals / (double) nranges; os->output *= 100.0; + cur_vals = 0; + next_interval += interval_step; j++; - cur_vals = node->hits; - interval += (nr_vals + output_nranges - 1) / output_nranges; - } else { - cur_vals += node->hits; - os->nranges += node->hits; } - i++; - total_vals += node->hits; - if (percentage) { - unsigned long blocks = percentage * nranges / 100; - if (total_vals >= blocks) { - double cs = i * block_size / (1024 * 1024); + double cs = (double) i * block_size / (1024.0 * 1024.0); char p = 'M'; if (cs > 1024.0) { @@ -294,18 +242,16 @@ int main(int argc, char *argv[]) percentage = 0.0; } } - - n = rb_next(n); } - perc_i = 100.0 / (double) output_nranges; + perc_i = 100.0 / (double)output_nranges; perc = 0.0; - printf("\n Rows Hits No Hits Size\n"); - printf("--------------------------------------------------------\n"); - for (i = 0; i < j; i++) { + printf("\n Rows Hits %% Sum %% # Hits Size\n"); + printf("-----------------------------------------------------------------------\n"); + for (i = 0; i < output_nranges; i++) { struct output_sum *os = &output_sums[i]; - double gb = (double) os->nranges * block_size / 1024.0; + double gb = (double)os->nranges * block_size / 1024.0; char p = 'K'; if (gb > 1024.0) { @@ -318,10 +264,86 @@ int main(int argc, char *argv[]) } perc += perc_i; - printf("%s %6.2f%%\t%6.2f%%\t\t%8u\t%6.2f%c\n", i ? "|->" : "Top", perc, os->output, os->nranges, gb, p); + hit_percent_sum += os->output; + hit_sum += os->nranges; + printf("%s %6.2f%%\t%6.2f%%\t\t%6.2f%%\t\t%8u\t%6.2f%c\n", + i ? "|->" : "Top", perc, os->output, hit_percent_sum, + os->nranges, gb, p); } + printf("-----------------------------------------------------------------------\n"); + printf("Total\t\t\t\t\t\t%8llu\n", hit_sum); free(output_sums); +} + +int main(int argc, char *argv[]) +{ + unsigned long offset; + unsigned long long nranges; + unsigned long nnodes; + struct node *nodes; + struct zipf_state zs; + struct gauss_state gs; + int i, j; + + if (parse_options(argc, argv)) + return 1; + + if (output_type != OUTPUT_CSV) + printf("Generating %s distribution with %f input and %lu GiB size and %lu block_size.\n", + dist_types[dist_type], dist_val, gib_size, block_size); + + nranges = gib_size * 1024 * 1024 * 1024ULL; + nranges /= block_size; + + if (dist_type == TYPE_ZIPF) + zipf_init(&zs, nranges, dist_val, 1); + else if (dist_type == TYPE_PARETO) + pareto_init(&zs, nranges, dist_val, 1); + else + gauss_init(&gs, nranges, dist_val, 1); + + hash_bits = 0; + hash_size = nranges; + while ((hash_size >>= 1) != 0) + hash_bits++; + + hash_size = 1 << hash_bits; + + hash = calloc(hash_size, sizeof(struct flist_head)); + for (i = 0; i < hash_size; i++) + INIT_FLIST_HEAD(&hash[i]); + + nodes = malloc(nranges * sizeof(struct node)); + + for (i = j = 0; i < nranges; i++) { + struct node *n; + + if (dist_type == TYPE_ZIPF) + offset = zipf_next(&zs); + else if (dist_type == TYPE_PARETO) + offset = pareto_next(&zs); + else + offset = gauss_next(&gs); + + n = hash_lookup(offset); + if (n) + n->hits++; + else { + hash_insert(&nodes[j], offset); + j++; + } + } + + qsort(nodes, j, sizeof(struct node), node_cmp); + nnodes = j; + + if (output_type == OUTPUT_CSV) + output_csv(nodes, nnodes); + else + output_normal(nodes, nnodes, nranges); + free(hash); + free(nodes); return 0; }