X-Git-Url: https://git.kernel.dk/?p=fio.git;a=blobdiff_plain;f=t%2Fgenzipf.c;h=3cc93e6d7f080624a3749df162f7aaefc50908ef;hp=f28674424df13fdf15b16aed76ec488dff1e266a;hb=355934b7ed82d13a5bfc043e2243013fd1e4e5bd;hpb=f880b1f60dcfb278ab00e6b20994072b04dfd5af diff --git a/t/genzipf.c b/t/genzipf.c index f2867442..3cc93e6d 100644 --- a/t/genzipf.c +++ b/t/genzipf.c @@ -16,107 +16,291 @@ #include #include #include +#include #include "../lib/zipf.h" +#include "../flist.h" +#include "../hash.h" #define DEF_NR 1000000 #define DEF_NR_OUTPUT 23 -static int val_cmp(const void *p1, const void *p2) +struct node { + struct flist_head list; + unsigned long long val; + unsigned long hits; +}; + +static struct flist_head *hash; +static unsigned long hash_bits = 24; +static unsigned long hash_size = 1 << 24; + +enum { + TYPE_NONE = 0, + TYPE_ZIPF, + TYPE_PARETO, +}; +static const char *dist_types[] = { "None", "Zipf", "Pareto" }; + +static int dist_type = TYPE_ZIPF; +static unsigned long gb_size = 500; +static unsigned long block_size = 4096; +static unsigned long output_nranges = DEF_NR_OUTPUT; +static double percentage; +static double dist_val; +static int output_csv = 0; + +#define DEF_ZIPF_VAL 1.2 +#define DEF_PARETO_VAL 0.3 + +static struct node *hash_lookup(unsigned long long val) { - const unsigned long *v1 = p1; - const unsigned long *v2 = p2; + struct flist_head *l = &hash[hash_long(val, hash_bits)]; + struct flist_head *entry; + struct node *n; + + flist_for_each(entry, l) { + n = flist_entry(entry, struct node, list); + if (n->val == val) + return n; + } - return *v1 - *v2; + return NULL; } -int main(int argc, char *argv[]) +static struct node *hash_insert(struct node *n, unsigned long long val) { - unsigned long nranges, output_nranges; - unsigned long *vals; - unsigned long i, j, nr_vals, cur_vals, max_val, interval; - double *output, perc, perc_i; - struct zipf_state zs; - int use_zipf; - double val; + struct flist_head *l = &hash[hash_long(val, hash_bits)]; - if (argc < 3) { - printf("%s: {zipf,pareto} val values [output ranges]\n", argv[0]); - return 1; + n->val = val; + n->hits = 1; + flist_add_tail(&n->list, l); + return n; +} + +static int parse_options(int argc, char *argv[]) +{ + const char *optstring = "t:g:i:o:b:p:c"; + int c, dist_val_set = 0; + + while ((c = getopt(argc, argv, optstring)) != -1) { + switch (c) { + case 'p': + percentage = atof(optarg); + break; + case 'b': + block_size = strtoul(optarg, NULL, 10); + break; + case 't': + if (!strncmp(optarg, "zipf", 4)) + dist_type = TYPE_ZIPF; + else if (!strncmp(optarg, "pareto", 6)) + dist_type = TYPE_PARETO; + else { + printf("wrong dist type: %s\n", optarg); + return 1; + } + break; + case 'g': + gb_size = strtoul(optarg, NULL, 10); + break; + case 'i': + dist_val = atof(optarg); + dist_val_set = 1; + break; + case 'o': + output_nranges = strtoul(optarg, NULL, 10); + break; + case 'c': + output_csv = 1; + break; + default: + printf("bad option %c\n", c); + return 1; + } } - if (!strcmp(argv[1], "zipf")) - use_zipf = 1; - else if (!strcmp(argv[1], "pareto")) - use_zipf = 0; - else { - printf("Bad distribution type <%s>\n", argv[1]); - return 1; + if (dist_type == TYPE_PARETO) { + if ((dist_val >= 1.00 || dist_val < 0.00)) { + printf("pareto input must be > 0.00 and < 1.00\n"); + return 1; + } + if (!dist_val_set) + dist_val = DEF_PARETO_VAL; + } else if (dist_type == TYPE_ZIPF) { + if (dist_val == 1.0) { + printf("zipf input must be different than 1.0\n"); + return 1; + } + if (!dist_val_set) + dist_val = DEF_ZIPF_VAL; } - val = atof(argv[2]); + return 0; +} + +struct output_sum { + double output; + unsigned int nranges; +}; + +static int node_cmp(const void *p1, const void *p2) +{ + const struct node *n1 = p1; + const struct node *n2 = p2; + + return n2->hits - n1->hits; +} + +int main(int argc, char *argv[]) +{ + unsigned long offset; + unsigned long i, j, k, nr_vals, cur_vals, interval, total_vals, nnodes; + unsigned long long nranges; + struct output_sum *output_sums; + struct node *nodes; + double perc, perc_i; + struct zipf_state zs; - nranges = DEF_NR; - output_nranges = DEF_NR_OUTPUT; + if (parse_options(argc, argv)) + return 1; - if (argc >= 4) - nranges = strtoul(argv[3], NULL, 10); - if (argc >= 5) - output_nranges = strtoul(argv[4], NULL, 10); + if( !output_csv ) + printf("Generating %s distribution with %f input and %lu GB size and %lu block_size.\n", dist_types[dist_type], dist_val, gb_size, block_size); - printf("Generating %s distribution with %f input and %lu ranges.\n", use_zipf ? "zipf" : "pareto", val, nranges); + nranges = gb_size * 1024 * 1024 * 1024ULL; + nranges /= block_size; - if (use_zipf) - zipf_init(&zs, nranges, val); + if (dist_type == TYPE_ZIPF) + zipf_init(&zs, nranges, dist_val, 1); else - pareto_init(&zs, nranges, val); + pareto_init(&zs, nranges, dist_val, 1); + + hash_bits = 0; + hash_size = nranges; + while ((hash_size >>= 1) != 0) + hash_bits++; + + hash_size = 1 << hash_bits; + + hash = malloc(hash_size * sizeof(struct flist_head)); + for (i = 0; i < hash_size; i++) + INIT_FLIST_HEAD(&hash[i]); - vals = malloc(nranges * sizeof(unsigned long)); + nodes = malloc(nranges * sizeof(struct node)); - max_val = nr_vals = 0; - for (i = 0; i < nranges; i++) { - if (use_zipf) - vals[nr_vals] = zipf_next(&zs); + for (nr_vals = i = j = 0; i < nranges; i++) { + struct node *n; + + if (dist_type == TYPE_ZIPF) + offset = zipf_next(&zs); else - vals[nr_vals] = pareto_next(&zs); + offset = pareto_next(&zs); + + n = hash_lookup(offset); + if (n) + n->hits++; + else { + hash_insert(&nodes[j], offset); + j++; + } - if (vals[nr_vals] > max_val) - max_val = vals[nr_vals]; nr_vals++; } - qsort(vals, nr_vals, sizeof(unsigned long), val_cmp); + qsort(nodes, j, sizeof(struct node), node_cmp); + nnodes = j; + nr_vals = nnodes; - interval = (max_val + output_nranges - 1) / output_nranges; + if (output_csv) { + printf("rank, count\n"); + for (k = 0; k < nnodes; k++) + printf("%lu, %lu\n", k, nodes[k].hits); + } else { + interval = (nr_vals + output_nranges - 1) / output_nranges; - output = malloc(output_nranges * sizeof(double)); + output_sums = malloc(output_nranges * sizeof(struct output_sum)); + for (i = 0; i < output_nranges; i++) { + output_sums[i].output = 0.0; + output_sums[i].nranges = 1; + } - for (i = j = 0, cur_vals = 0; i < nr_vals; i++) { - if (vals[i] > interval) { - output[j] = (double) (cur_vals + 1) / (double) nr_vals; - output[j] *= 100.0; - j++; - cur_vals = 0; - interval += (max_val + output_nranges - 1) / output_nranges; - continue; + total_vals = i = j = cur_vals = 0; + + for (k = 0; k < nnodes; k++) { + struct output_sum *os = &output_sums[j]; + struct node *node = &nodes[k]; + + if (i >= interval) { + os->output = + (double)(cur_vals + 1) / (double)nranges; + os->output *= 100.0; + j++; + cur_vals = node->hits; + interval += + (nr_vals + output_nranges - + 1) / output_nranges; + } else { + cur_vals += node->hits; + os->nranges += node->hits; + } + + i++; + total_vals += node->hits; + + if (percentage) { + unsigned long blocks = + percentage * nranges / 100; + + if (total_vals >= blocks) { + double cs = + i * block_size / (1024 * 1024); + char p = 'M'; + + if (cs > 1024.0) { + cs /= 1024.0; + p = 'G'; + } + if (cs > 1024.0) { + cs /= 1024.0; + p = 'T'; + } + + printf("%.2f%% of hits satisfied in %.3f%cB of cache\n", percentage, cs, p); + percentage = 0.0; + } + } } - cur_vals++; - } - if (cur_vals) { - output[j] = (double) (cur_vals + 1) / (double) nr_vals; - output[j] *= 100.0; - j++; - } + perc_i = 100.0 / (double)output_nranges; + perc = 0.0; + + printf("\n Rows Hits No Hits Size\n"); + printf("--------------------------------------------------------\n"); + for (i = 0; i < j; i++) { + struct output_sum *os = &output_sums[i]; + double gb = (double)os->nranges * block_size / 1024.0; + char p = 'K'; + + if (gb > 1024.0) { + p = 'M'; + gb /= 1024.0; + } + if (gb > 1024.0) { + p = 'G'; + gb /= 1024.0; + } + + perc += perc_i; + printf("%s %6.2f%%\t%6.2f%%\t\t%8u\t%6.2f%c\n", + i ? "|->" : "Top", perc, os->output, os->nranges, + gb, p); + } - perc_i = 100.0 / (double) output_nranges; - perc = 0.0; - for (i = 0; i < j; i++) { - printf("%6.2f%% -> %6.2f%%:\t%6.2f%%\n", perc, perc + perc_i, output[i]); - perc += perc_i; + free(output_sums); } - free(output); - free(vals); + free(hash); + free(nodes); return 0; }