X-Git-Url: https://git.kernel.dk/?p=fio.git;a=blobdiff_plain;f=t%2Fbtrace2fio.c;h=04d4a28dd2ccfd8ca85496e6a17dd6724573082c;hp=d09f1be9037128a335924ba66b4533f68e8d3eaf;hb=dd9bd2b2e0ed9665325240fdd5a40253fe23bca6;hpb=35661615f8c42ef078ae0b7d23f632060d723826 diff --git a/t/btrace2fio.c b/t/btrace2fio.c index d09f1be9..04d4a28d 100644 --- a/t/btrace2fio.c +++ b/t/btrace2fio.c @@ -17,6 +17,9 @@ static unsigned int rt_threshold = 1000000; static unsigned int ios_threshold = 10; +static unsigned int rate_threshold; +static unsigned int set_rate; +static unsigned int max_depth = 256; static int output_ascii = 1; static char *filename; @@ -33,7 +36,6 @@ struct trace_file { struct btrace_out { unsigned long ios[DDIR_RWDIR_CNT]; - unsigned long rw_bs[DDIR_RWDIR_CNT]; unsigned long merges[DDIR_RWDIR_CNT]; uint64_t last_end[DDIR_RWDIR_CNT]; @@ -44,12 +46,12 @@ struct btrace_out { int inflight; unsigned int depth; - uint64_t first_ttime; - uint64_t last_ttime; + int depth_disabled; + int complete_seen; - struct trace_file *files; - int nr_files; - unsigned int last_major, last_minor; + uint64_t first_ttime[DDIR_RWDIR_CNT]; + uint64_t last_ttime[DDIR_RWDIR_CNT]; + uint64_t kb[DDIR_RWDIR_CNT]; uint64_t start_delay; }; @@ -58,6 +60,11 @@ struct btrace_pid { struct flist_head hash_list; struct flist_head pid_list; pid_t pid; + + struct trace_file *files; + int nr_files; + unsigned int last_major, last_minor; + struct btrace_out o; }; @@ -73,15 +80,20 @@ struct inflight { static struct flist_head pid_hash[PID_HASH_SIZE]; static FLIST_HEAD(pid_list); -static FLIST_HEAD(inflight_list); +#define INFLIGHT_HASH_BITS 8 +#define INFLIGHT_HASH_SIZE (1U << INFLIGHT_HASH_BITS) +static struct flist_head inflight_hash[INFLIGHT_HASH_SIZE]; static uint64_t first_ttime = -1ULL; static struct inflight *inflight_find(uint64_t sector) { + struct flist_head *inflight_list; struct flist_head *e; - flist_for_each(e, &inflight_list) { + inflight_list = &inflight_hash[hash_long(sector, INFLIGHT_HASH_BITS)]; + + flist_for_each(e, inflight_list) { struct inflight *i = flist_entry(e, struct inflight, list); if (i->end_sector == sector) @@ -101,11 +113,12 @@ static void inflight_remove(struct inflight *i) free(i); } -static void inflight_merge(struct inflight *i, int rw, unsigned int size) +static void __inflight_add(struct inflight *i) { - i->p->o.merges[rw]++; - if (size) - i->end_sector += (size >> 9); + struct flist_head *list; + + list = &inflight_hash[hash_long(i->end_sector, INFLIGHT_HASH_BITS)]; + flist_add_tail(&i->list, list); } static void inflight_add(struct btrace_pid *p, uint64_t sector, uint32_t len) @@ -116,9 +129,25 @@ static void inflight_add(struct btrace_pid *p, uint64_t sector, uint32_t len) i = calloc(1, sizeof(*i)); i->p = p; o->inflight++; - o->depth = max((int) o->depth, o->inflight); + if (!o->depth_disabled) { + o->depth = max((int) o->depth, o->inflight); + if (o->depth >= max_depth && !o->complete_seen) { + o->depth_disabled = 1; + o->depth = max_depth; + } + } i->end_sector = sector + (len >> 9); - flist_add_tail(&i->list, &inflight_list); + __inflight_add(i); +} + +static void inflight_merge(struct inflight *i, int rw, unsigned int size) +{ + i->p->o.merges[rw]++; + if (size) { + i->end_sector += (size >> 9); + flist_del(&i->list); + __inflight_add(i); + } } /* @@ -173,7 +202,7 @@ static int discard_pdu(struct fifo *fifo, int fd, struct blk_io_trace *t) return trace_fifo_get(fifo, fd, NULL, t->pdu_len); } -static void handle_trace_notify(struct blk_io_trace *t) +static int handle_trace_notify(struct blk_io_trace *t) { switch (t->action) { case BLK_TN_PROCESS: @@ -185,9 +214,11 @@ static void handle_trace_notify(struct blk_io_trace *t) case BLK_TN_MESSAGE: break; default: - fprintf(stderr, "unknown trace act %x\n", t->action); - break; + log_err("unknown trace act %x\n", t->action); + return 1; } + + return 0; } static void __add_bs(struct btrace_out *o, unsigned int len, int rw) @@ -223,7 +254,7 @@ static void add_bs(struct btrace_out *o, unsigned int len, int rw) #define FMAJOR(dev) ((unsigned int) ((dev) >> FMINORBITS)) #define FMINOR(dev) ((unsigned int) ((dev) & FMINORMASK)) -static void btrace_add_file(struct btrace_out *o, uint32_t devno) +static int btrace_add_file(struct btrace_pid *p, uint32_t devno) { unsigned int maj = FMAJOR(devno); unsigned int min = FMINOR(devno); @@ -232,61 +263,80 @@ static void btrace_add_file(struct btrace_out *o, uint32_t devno) char dev[256]; if (filename) - return; - if (o->last_major == maj && o->last_minor == min) - return; + return 0; + if (p->last_major == maj && p->last_minor == min) + return 0; - o->last_major = maj; - o->last_minor = min; + p->last_major = maj; + p->last_minor = min; /* * check for this file in our list */ - for (i = 0; i < o->nr_files; i++) { - f = &o->files[i]; + for (i = 0; i < p->nr_files; i++) { + f = &p->files[i]; if (f->major == maj && f->minor == min) - return; + return 0; } strcpy(dev, "/dev"); if (!blktrace_lookup_device(NULL, dev, maj, min)) { log_err("fio: failed to find device %u/%u\n", maj, min); - return; + if (!output_ascii) { + log_err("fio: use -d to specify device\n"); + return 1; + } + return 0; } - o->files = realloc(o->files, (o->nr_files + 1) * sizeof(*f)); - f = &o->files[o->nr_files]; + p->files = realloc(p->files, (p->nr_files + 1) * sizeof(*f)); + f = &p->files[p->nr_files]; f->name = strdup(dev); f->major = maj; f->minor = min; - o->nr_files++; + p->nr_files++; + return 0; } -static void handle_trace_discard(struct blk_io_trace *t, struct btrace_out *o) +static int t_to_rwdir(struct blk_io_trace *t) { - btrace_add_file(o, t->device); + if (t->action & BLK_TC_ACT(BLK_TC_DISCARD)) + return DDIR_TRIM; - if (o->first_ttime == -1ULL) - o->first_ttime = t->time; + return (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0; +} + +static int handle_trace_discard(struct blk_io_trace *t, struct btrace_pid *p) +{ + struct btrace_out *o = &p->o; + + if (btrace_add_file(p, t->device)) + return 1; + + if (o->first_ttime[2] == -1ULL) + o->first_ttime[2] = t->time; o->ios[DDIR_TRIM]++; add_bs(o, t->bytes, DDIR_TRIM); + return 0; } -static void handle_trace_fs(struct blk_io_trace *t, struct btrace_out *o) +static int handle_trace_fs(struct blk_io_trace *t, struct btrace_pid *p) { + struct btrace_out *o = &p->o; int rw; - btrace_add_file(o, t->device); + if (btrace_add_file(p, t->device)) + return 1; first_ttime = min(first_ttime, (uint64_t) t->time); - if (o->first_ttime == -1ULL) - o->first_ttime = t->time; - rw = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0; + if (o->first_ttime[rw] == -1ULL) + o->first_ttime[rw] = t->time; + add_bs(o, t->bytes, rw); o->ios[rw]++; @@ -294,27 +344,27 @@ static void handle_trace_fs(struct blk_io_trace *t, struct btrace_out *o) o->seq[rw]++; o->last_end[rw] = t->sector + (t->bytes >> 9); + return 0; } -static void handle_queue_trace(struct blk_io_trace *t, struct btrace_out *o) +static int handle_queue_trace(struct blk_io_trace *t, struct btrace_pid *p) { if (t->action & BLK_TC_ACT(BLK_TC_NOTIFY)) - handle_trace_notify(t); + return handle_trace_notify(t); else if (t->action & BLK_TC_ACT(BLK_TC_DISCARD)) - handle_trace_discard(t, o); + return handle_trace_discard(t, p); else - handle_trace_fs(t, o); + return handle_trace_fs(t, p); } -static void handle_trace(struct blk_io_trace *t, struct btrace_pid *p) +static int handle_trace(struct blk_io_trace *t, struct btrace_pid *p) { unsigned int act = t->action & 0xffff; + int ret = 0; if (act == __BLK_TA_QUEUE) { inflight_add(p, t->sector, t->bytes); - handle_queue_trace(t, &p->o); - } else if (act == __BLK_TA_REQUEUE) { - p->o.inflight--; + ret = handle_queue_trace(t, p); } else if (act == __BLK_TA_BACKMERGE) { struct inflight *i; @@ -323,11 +373,8 @@ static void handle_trace(struct blk_io_trace *t, struct btrace_pid *p) inflight_remove(i); i = inflight_find(t->sector); - if (i) { - int rw = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0; - - inflight_merge(i, rw, t->bytes); - } + if (i) + inflight_merge(i, t_to_rwdir(t), t->bytes); } else if (act == __BLK_TA_FRONTMERGE) { struct inflight *i; @@ -336,18 +383,20 @@ static void handle_trace(struct blk_io_trace *t, struct btrace_pid *p) inflight_remove(i); i = inflight_find(t->sector); - if (i) { - int rw = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0; - - inflight_merge(i, rw, 0); - } + if (i) + inflight_merge(i, t_to_rwdir(t), 0); } else if (act == __BLK_TA_COMPLETE) { struct inflight *i; i = inflight_find(t->sector + (t->bytes >> 9)); - if (i) + if (i) { + i->p->o.kb[t_to_rwdir(t)] += (t->bytes >> 10); + i->p->o.complete_seen = 1; inflight_remove(i); + } } + + return ret; } static void byteswap_trace(struct blk_io_trace *t) @@ -391,11 +440,12 @@ static struct btrace_pid *pid_hash_get(pid_t pid) int i; p = calloc(1, sizeof(*p)); - p->o.first_ttime = -1ULL; - p->o.last_ttime = -1ULL; - for (i = 0; i < DDIR_RWDIR_CNT; i++) + for (i = 0; i < DDIR_RWDIR_CNT; i++) { + p->o.first_ttime[i] = -1ULL; + p->o.last_ttime[i] = -1ULL; p->o.last_end[i] = -1ULL; + } p->pid = pid; flist_add_tail(&p->hash_list, hash_list); @@ -409,15 +459,15 @@ static struct btrace_pid *pid_hash_get(pid_t pid) * Load a blktrace file by reading all the blk_io_trace entries, and storing * them as io_pieces like the fio text version would do. */ -static int load_blktrace(const char *filename, int need_swap) +static int load_blktrace(const char *fname, int need_swap) { struct btrace_pid *p; unsigned long traces; struct blk_io_trace t; struct fifo *fifo; - int fd; + int fd, ret = 0; - fd = open(filename, O_RDONLY); + fd = open(fname, O_RDONLY); if (fd < 0) { perror("open trace file\n"); return 1; @@ -427,14 +477,13 @@ static int load_blktrace(const char *filename, int need_swap) traces = 0; do { - int ret = trace_fifo_get(fifo, fd, &t, sizeof(t)); - + ret = trace_fifo_get(fifo, fd, &t, sizeof(t)); if (ret < 0) goto err; else if (!ret) break; else if (ret < (int) sizeof(t)) { - fprintf(stderr, "fio: short fifo get\n"); + log_err("fio: short fifo get\n"); break; } @@ -442,33 +491,36 @@ static int load_blktrace(const char *filename, int need_swap) byteswap_trace(&t); if ((t.magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) { - fprintf(stderr, "fio: bad magic in blktrace data: %x\n", - t.magic); + log_err("fio: bad magic in blktrace data: %x\n", t.magic); goto err; } if ((t.magic & 0xff) != BLK_IO_TRACE_VERSION) { - fprintf(stderr, "fio: bad blktrace version %d\n", - t.magic & 0xff); + log_err("fio: bad blktrace version %d\n", t.magic & 0xff); goto err; } ret = discard_pdu(fifo, fd, &t); if (ret < 0) { - fprintf(stderr, "blktrace lseek\n"); + log_err("blktrace lseek\n"); goto err; } else if (t.pdu_len != ret) { - fprintf(stderr, "fio: discarded %d of %d\n", ret, t.pdu_len); + log_err("fio: discarded %d of %d\n", ret, t.pdu_len); goto err; } p = pid_hash_get(t.pid); - handle_trace(&t, p); - p->o.last_ttime = t.time; + ret = handle_trace(&t, p); + if (ret) + break; + p->o.last_ttime[t_to_rwdir(&t)] = t.time; traces++; } while (1); fifo_free(fifo); close(fd); + if (ret) + return ret; + if (output_ascii) printf("Traces loaded: %lu\n", traces); @@ -487,11 +539,50 @@ static int bs_cmp(const void *ba, const void *bb) return bsb->nr - bsa->nr; } +static unsigned long o_to_kb_rate(struct btrace_out *o, int rw) +{ + uint64_t usec = (o->last_ttime[rw] - o->first_ttime[rw]) / 1000ULL; + uint64_t val; + + if (!usec) + return 0; + + usec /= 1000; + if (!usec) + return 0; + + val = o->kb[rw] * 1000ULL; + return val / usec; +} + +static uint64_t o_first_ttime(struct btrace_out *o) +{ + uint64_t first; + + first = min(o->first_ttime[0], o->first_ttime[1]); + return min(first, o->first_ttime[2]); +} + +static uint64_t o_longest_ttime(struct btrace_out *o) +{ + uint64_t ret = 0; + int i; + + for (i = 0; i < DDIR_RWDIR_CNT; i++) { + uint64_t diff; + + diff = o->last_ttime[i] - o->first_ttime[i]; + ret = max(diff, ret); + } + + return ret; +} + static void __output_p_ascii(struct btrace_pid *p, unsigned long *ios) { const char *msg[] = { "reads", "writes", "trims" }; struct btrace_out *o = &p->o; - unsigned long total; + unsigned long total, usec; int i, j; printf("[pid:\t%u]\n", p->pid); @@ -510,7 +601,8 @@ static void __output_p_ascii(struct btrace_pid *p, unsigned long *ios) perc = ((float) o->merges[i] * 100.0) / (float) total; printf("\tmerges: %lu (perc=%3.2f%%)\n", o->merges[i], perc); perc = ((float) o->seq[i] * 100.0) / (float) o->ios[i]; - printf("\tseq: %lu (perc=%3.2f%%)\n", o->seq[i], perc); + printf("\tseq: %lu (perc=%3.2f%%)\n", (unsigned long) o->seq[i], perc); + printf("\trate: %lu KB/sec\n", o_to_kb_rate(o, i)); for (j = 0; j < o->nr_bs[i]; j++) { struct bs *bs = &o->bs[i][j]; @@ -521,11 +613,12 @@ static void __output_p_ascii(struct btrace_pid *p, unsigned long *ios) } printf("depth:\t%u\n", o->depth); - printf("usec:\t%llu (delay=%llu)\n", (o->last_ttime - o->first_ttime) / 1000ULL, (unsigned long long) o->start_delay); + usec = o_longest_ttime(o) / 1000ULL; + printf("usec:\t%lu (delay=%llu)\n", usec, (unsigned long long) o->start_delay); printf("files:\t"); - for (i = 0; i < o->nr_files; i++) - printf("%s,", o->files[i].name); + for (i = 0; i < p->nr_files; i++) + printf("%s,", p->files[i].name); printf("\n"); printf("\n"); @@ -543,6 +636,10 @@ static int __output_p_fio(struct btrace_pid *p, unsigned long *ios) log_err("fio: trace has both read/write and trim\n"); return 1; } + if (!p->nr_files) { + log_err("fio: no devices found\n"); + return 1; + } printf("[pid%u]\n", p->pid); printf("direct=1\n"); @@ -581,16 +678,16 @@ static int __output_p_fio(struct btrace_pid *p, unsigned long *ios) printf("\n"); printf("filename="); - for (i = 0; i < o->nr_files; i++) { + for (i = 0; i < p->nr_files; i++) { if (i) printf(":"); - printf("%s", o->files[i].name); + printf("%s", p->files[i].name); } printf("\n"); printf("startdelay=%llus\n", o->start_delay / 1000000ULL); - time = o->last_ttime - o->first_ttime; + time = o_longest_ttime(o); time = (time + 1000000000ULL - 1) / 1000000000ULL; printf("runtime=%llus\n", time); @@ -614,8 +711,23 @@ static int __output_p_fio(struct btrace_pid *p, unsigned long *ios) printf("%u/%u", bs->bs, (int) perc); } } - printf("\n\n"); + printf("\n"); + + if (set_rate) { + printf("rate="); + for (i = 0; i < DDIR_RWDIR_CNT; i++) { + unsigned long rate; + + rate = o_to_kb_rate(o, i); + if (i) + printf(","); + if (rate) + printf("%luk", rate); + } + printf("\n"); + } + printf("\n"); return 0; } @@ -631,9 +743,9 @@ static int __output_p(struct btrace_pid *p, unsigned long *ios) } if (filename) { - o->files = malloc(sizeof(struct trace_file)); - o->nr_files++; - o->files[0].name = filename; + p->files = malloc(sizeof(struct trace_file)); + p->nr_files++; + p->files[0].name = filename; } if (output_ascii) @@ -644,17 +756,39 @@ static int __output_p(struct btrace_pid *p, unsigned long *ios) return ret; } +static void remove_ddir(struct btrace_out *o, int rw) +{ + o->ios[rw] = 0; +} + static int prune_entry(struct btrace_out *o) { + unsigned long rate; uint64_t time; + int i; if (ddir_rw_sum(o->ios) < ios_threshold) return 1; - time = (o->last_ttime - o->first_ttime) / 1000ULL; + time = o_longest_ttime(o) / 1000ULL; if (time < rt_threshold) return 1; + rate = 0; + for (i = 0; i < DDIR_RWDIR_CNT; i++) { + unsigned long this_rate; + + this_rate = o_to_kb_rate(o, i); + if (this_rate < rate_threshold) { + remove_ddir(o, i); + this_rate = 0; + } + rate += this_rate; + } + + if (rate < rate_threshold) + return 1; + return 0; } @@ -666,10 +800,30 @@ static int entry_cmp(void *priv, struct flist_head *a, struct flist_head *b) return ddir_rw_sum(pb->o.ios) - ddir_rw_sum(pa->o.ios); } +static void free_p(struct btrace_pid *p) +{ + struct btrace_out *o = &p->o; + int i; + + for (i = 0; i < p->nr_files; i++) { + if (p->files[i].name && p->files[i].name != filename) + free(p->files[i].name); + } + + for (i = 0; i < DDIR_RWDIR_CNT; i++) + free(o->bs[i]); + + free(p->files); + flist_del(&p->pid_list); + flist_del(&p->hash_list); + free(p); +} + static int output_p(void) { unsigned long ios[DDIR_RWDIR_CNT]; struct flist_head *e, *tmp; + int depth_disabled = 0; int ret = 0; flist_for_each_safe(e, tmp, &pid_list) { @@ -677,14 +831,16 @@ static int output_p(void) p = flist_entry(e, struct btrace_pid, pid_list); if (prune_entry(&p->o)) { - flist_del(&p->pid_list); - flist_del(&p->hash_list); - free(p); + free_p(p); continue; } - p->o.start_delay = (p->o.first_ttime / 1000ULL) - first_ttime; + p->o.start_delay = (o_first_ttime(&p->o) / 1000ULL) - first_ttime; + depth_disabled += p->o.depth_disabled; } + if (depth_disabled) + log_err("fio: missing completion traces, depths capped at %u\n", max_depth); + memset(ios, 0, sizeof(ios)); flist_sort(NULL, &pid_list, entry_cmp); @@ -694,6 +850,8 @@ static int output_p(void) p = flist_entry(e, struct btrace_pid, pid_list); ret |= __output_p(p, ios); + if (ret && !output_ascii) + break; } if (output_ascii) @@ -704,47 +862,25 @@ static int output_p(void) static int usage(char *argv[]) { - fprintf(stderr, "%s: \n", argv[0]); - fprintf(stderr, "\t-t\tUsec threshold to ignore task\n"); - fprintf(stderr, "\t-n\tNumber IOS threshold to ignore task\n"); - fprintf(stderr, "\t-f\tFio job file output\n"); - fprintf(stderr, "\t-d\tUse this file/device for replay\n"); + log_err("%s: \n", argv[0]); + log_err("\t-t\tUsec threshold to ignore task\n"); + log_err("\t-n\tNumber IOS threshold to ignore task\n"); + log_err("\t-f\tFio job file output\n"); + log_err("\t-d\tUse this file/device for replay\n"); + log_err("\t-r\tIgnore jobs with less than this KB/sec rate\n"); + log_err("\t-R\tSet rate in fio job\n"); + log_err("\t-D\tCap queue depth at this value (def=%u)\n", max_depth); return 1; } -int main(int argc, char *argv[]) +static int trace_needs_swap(const char *trace_file, int *swap) { - int fd, ret, need_swap = -1; struct blk_io_trace t; - int i, c; - - if (argc < 2) - return usage(argv); - - while ((c = getopt(argc, argv, "t:n:fd:")) != -1) { - switch (c) { - case 't': - rt_threshold = atoi(optarg); - break; - case 'n': - ios_threshold = atoi(optarg); - break; - case 'f': - output_ascii = 0; - break; - case 'd': - filename = strdup(optarg); - break; - case '?': - default: - return usage(argv); - } - } + int fd, ret; - if (argc == optind) - return usage(argv); - - fd = open(argv[optind], O_RDONLY); + *swap = -1; + + fd = open(trace_file, O_RDONLY); if (fd < 0) { perror("open"); return 1; @@ -752,33 +888,82 @@ int main(int argc, char *argv[]) ret = read(fd, &t, sizeof(t)); if (ret < 0) { + close(fd); perror("read"); return 1; } else if (ret != sizeof(t)) { - fprintf(stderr, "fio: short read on trace file\n"); + close(fd); + log_err("fio: short read on trace file\n"); return 1; } close(fd); if ((t.magic & 0xffffff00) == BLK_IO_TRACE_MAGIC) - need_swap = 0; + *swap = 0; else { /* * Maybe it needs to be endian swapped... */ t.magic = fio_swap32(t.magic); if ((t.magic & 0xffffff00) == BLK_IO_TRACE_MAGIC) - need_swap = 1; + *swap = 1; } - if (need_swap == -1) { - fprintf(stderr, "fio: blktrace appears corrupt\n"); + if (*swap == -1) { + log_err("fio: blktrace appears corrupt\n"); return 1; } + return 0; +} + +int main(int argc, char *argv[]) +{ + int need_swap, i, c; + + if (argc < 2) + return usage(argv); + + while ((c = getopt(argc, argv, "t:n:fd:r:RD:")) != -1) { + switch (c) { + case 'R': + set_rate = 1; + break; + case 'r': + rate_threshold = atoi(optarg); + break; + case 't': + rt_threshold = atoi(optarg); + break; + case 'n': + ios_threshold = atoi(optarg); + break; + case 'f': + output_ascii = 0; + break; + case 'd': + filename = strdup(optarg); + break; + case 'D': + max_depth = atoi(optarg); + break; + case '?': + default: + return usage(argv); + } + } + + if (argc == optind) + return usage(argv); + + if (trace_needs_swap(argv[optind], &need_swap)) + return 1; + for (i = 0; i < PID_HASH_SIZE; i++) INIT_FLIST_HEAD(&pid_hash[i]); + for (i = 0; i < INFLIGHT_HASH_SIZE; i++) + INIT_FLIST_HEAD(&inflight_hash[i]); load_blktrace(argv[optind], need_swap); first_ttime /= 1000ULL;