From 8cb99eeb9fbc557c9387cc3bf23dd2a420cec533 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 10 Nov 2015 14:52:24 -0700 Subject: [PATCH] blk-mq: scalable per-ctx/hctx request completion stats Tracked on a per cpu/ctx basis, but summed on a per hctx basis. Could trivially be extended to per queue, which would just sum the per hctx stats. Not done yet. The stats should work, and there's the beginnings of doing stat windows. Additionally, might be worth tracking depths with the stats. Signed-off-by: Jens Axboe --- block/blk-mq-sysfs.c | 35 ++++++++++++++++++ block/blk-mq.c | 84 ++++++++++++++++++++++++++++++++++++++++++ block/blk-mq.h | 21 +++++++++++ include/linux/blkdev.h | 1 + 4 files changed, 141 insertions(+) diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 1cf18784c5cf..a8ecb233b718 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -247,6 +247,35 @@ static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page) return ret; } +static ssize_t blk_mq_hw_sysfs_stat_store(struct blk_mq_hw_ctx *hctx, + const char *page, size_t count) +{ + blk_mq_hctx_clear_stat(hctx); + return count; +} + +static ssize_t print_stat(char *page, struct blk_rq_stat *stat, const char *pre) +{ + return sprintf(page, "%s samples=%llu, mean=%lld, min=%lld, max=%lld\n", + pre, (long long) stat->nr_samples, + (long long) stat->mean, (long long) stat->min, + (long long) stat->max); +} + +static ssize_t blk_mq_hw_sysfs_stat_show(struct blk_mq_hw_ctx *hctx, char *page) +{ + struct blk_rq_stat stat[2]; + ssize_t ret; + + blk_mq_init_stat(&stat[0]); + blk_mq_init_stat(&stat[1]); + + blk_mq_hctx_get_stat(hctx, stat); + ret = print_stat(page, &stat[0], "read :"); + ret += print_stat(page + ret, &stat[1], "write:"); + return ret; +} + static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_dispatched = { .attr = {.name = "dispatched", .mode = S_IRUGO }, .show = blk_mq_sysfs_dispatched_show, @@ -304,6 +333,11 @@ static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_poll = { .attr = {.name = "io_poll", .mode = S_IRUGO }, .show = blk_mq_hw_sysfs_poll_show, }; +static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_stat = { + .attr = {.name = "stats", .mode = S_IRUGO | S_IWUSR }, + .show = blk_mq_hw_sysfs_stat_show, + .store = blk_mq_hw_sysfs_stat_store, +}; static struct attribute *default_hw_ctx_attrs[] = { &blk_mq_hw_sysfs_queued.attr, @@ -314,6 +348,7 @@ static struct attribute *default_hw_ctx_attrs[] = { &blk_mq_hw_sysfs_cpus.attr, &blk_mq_hw_sysfs_active.attr, &blk_mq_hw_sysfs_poll.attr, + &blk_mq_hw_sysfs_stat.attr, NULL, }; diff --git a/block/blk-mq.c b/block/blk-mq.c index 86bd5b25288e..7c8937b02f5a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -158,6 +158,84 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx) } EXPORT_SYMBOL(blk_mq_can_queue); +void blk_mq_hctx_clear_stat(struct blk_mq_hw_ctx *hctx) +{ + struct blk_mq_ctx *ctx; + unsigned int i; + + hctx_for_each_ctx(hctx, ctx, i) { + blk_mq_init_stat(&ctx->stat[0]); + blk_mq_init_stat(&ctx->stat[1]); + } +} + +static void sum_stat(struct blk_rq_stat *dst, struct blk_rq_stat *src) +{ + if (!src->nr_samples) + return; + + dst->min = min(dst->min, src->min); + dst->max = max(dst->max, src->max); + + if (!dst->nr_samples) + dst->mean = src->mean; + else { + dst->mean = div64_s64((src->mean * src->nr_samples) + + (dst->mean * dst->nr_samples), + dst->nr_samples + src->nr_samples); + } + dst->nr_samples += src->nr_samples; +} + +void blk_mq_hctx_get_stat(struct blk_mq_hw_ctx *hctx, struct blk_rq_stat *dst) +{ + struct blk_mq_ctx *ctx; + unsigned int i; + + hctx_for_each_ctx(hctx, ctx, i) { + sum_stat(&dst[0], &ctx->stat[0]); + sum_stat(&dst[1], &ctx->stat[1]); + } +} + +static void __blk_mq_init_stat(struct blk_rq_stat *stat, s64 time_now) +{ + memset(stat, 0, sizeof(*stat)); + stat->min = -1ULL; + stat->time = time_now; +} + +void blk_mq_init_stat(struct blk_rq_stat *stat) +{ + __blk_mq_init_stat(stat, ktime_to_ns(ktime_get())); +} + +static void blk_mq_add_stat(struct request *rq) +{ + struct blk_mq_ctx *ctx = rq->mq_ctx; + struct blk_rq_stat *stat = &ctx->stat[rq_data_dir(rq)]; + s64 delta, now, value; + + now = ktime_to_ns(ktime_get()); + if (now < rq->issue_time) + return; + + if (now - stat->time >= BLK_MQ_STAT_NSEC) + __blk_mq_init_stat(stat, now); + + value = now - rq->issue_time; + if (value > stat->max) + stat->max = value; + if (value < stat->min) + stat->min = value; + + delta = value - stat->mean; + if (delta) + stat->mean += div64_s64(delta, stat->nr_samples + 1); + + stat->nr_samples++; +} + static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, struct request *rq, unsigned int rw_flags) { @@ -362,6 +440,8 @@ void __blk_mq_complete_request(struct request *rq) { struct request_queue *q = rq->q; + blk_mq_add_stat(rq); + if (!q->softirq_done_fn) blk_mq_end_request(rq, rq->errors); else @@ -405,6 +485,8 @@ void blk_mq_start_request(struct request *rq) if (unlikely(blk_bidi_rq(rq))) rq->next_rq->resid_len = blk_rq_bytes(rq->next_rq); + rq->issue_time = ktime_to_ns(ktime_get()); + blk_add_timer(rq); /* @@ -1778,6 +1860,8 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, spin_lock_init(&__ctx->lock); INIT_LIST_HEAD(&__ctx->rq_list); __ctx->queue = q; + blk_mq_init_stat(&__ctx->stat[0]); + blk_mq_init_stat(&__ctx->stat[1]); /* If the cpu isn't online, the cpu is mapped to first hctx */ if (!cpu_online(i)) diff --git a/block/blk-mq.h b/block/blk-mq.h index b44dce165761..472d80c9c912 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -3,10 +3,25 @@ struct blk_mq_tag_set; +/* + * 0.5s window + */ +#define BLK_MQ_STAT_NSEC 500000000ULL + +struct blk_rq_stat { + s64 mean; + u64 min; + u64 max; + s64 nr_samples; + s64 time; +}; + struct blk_mq_ctx { struct { spinlock_t lock; struct list_head rq_list; + unsigned int rq_list_cnt; + unsigned int in_flight; } ____cacheline_aligned_in_smp; unsigned int cpu; @@ -20,6 +35,7 @@ struct blk_mq_ctx { /* incremented at completion time */ unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + struct blk_rq_stat stat[2]; struct request_queue *queue; struct kobject kobj; @@ -122,4 +138,9 @@ static inline bool blk_mq_hw_queue_mapped(struct blk_mq_hw_ctx *hctx) return hctx->nr_ctx && hctx->tags; } + +void blk_mq_hctx_get_stat(struct blk_mq_hw_ctx *, struct blk_rq_stat *); +void blk_mq_hctx_clear_stat(struct blk_mq_hw_ctx *); +void blk_mq_init_stat(struct blk_rq_stat *); + #endif diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 3fe27f8d91f0..709344b7b127 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -152,6 +152,7 @@ struct request { struct gendisk *rq_disk; struct hd_struct *part; unsigned long start_time; + s64 issue_time; #ifdef CONFIG_BLK_CGROUP struct request_list *rl; /* rl this rq is alloced from */ unsigned long long start_time_ns; -- 2.25.1