Tracked on a per cpu/ctx basis, but summed on a per hctx basis.
Could trivially be extended to per queue, which would just sum
the per hctx stats.
Not done yet. The stats should work, and there's the beginnings
of doing stat windows. Additionally, might be worth tracking
depths with the stats.
Signed-off-by: Jens Axboe <axboe@fb.com>
return ret;
}
+static ssize_t blk_mq_hw_sysfs_stat_store(struct blk_mq_hw_ctx *hctx,
+ const char *page, size_t count)
+{
+ blk_mq_hctx_clear_stat(hctx);
+ return count;
+}
+
+static ssize_t print_stat(char *page, struct blk_rq_stat *stat, const char *pre)
+{
+ return sprintf(page, "%s samples=%llu, mean=%lld, min=%lld, max=%lld\n",
+ pre, (long long) stat->nr_samples,
+ (long long) stat->mean, (long long) stat->min,
+ (long long) stat->max);
+}
+
+static ssize_t blk_mq_hw_sysfs_stat_show(struct blk_mq_hw_ctx *hctx, char *page)
+{
+ struct blk_rq_stat stat[2];
+ ssize_t ret;
+
+ blk_mq_init_stat(&stat[0]);
+ blk_mq_init_stat(&stat[1]);
+
+ blk_mq_hctx_get_stat(hctx, stat);
+ ret = print_stat(page, &stat[0], "read :");
+ ret += print_stat(page + ret, &stat[1], "write:");
+ return ret;
+}
+
static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_dispatched = {
.attr = {.name = "dispatched", .mode = S_IRUGO },
.show = blk_mq_sysfs_dispatched_show,
.attr = {.name = "io_poll", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_poll_show,
};
+static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_stat = {
+ .attr = {.name = "stats", .mode = S_IRUGO | S_IWUSR },
+ .show = blk_mq_hw_sysfs_stat_show,
+ .store = blk_mq_hw_sysfs_stat_store,
+};
static struct attribute *default_hw_ctx_attrs[] = {
&blk_mq_hw_sysfs_queued.attr,
&blk_mq_hw_sysfs_cpus.attr,
&blk_mq_hw_sysfs_active.attr,
&blk_mq_hw_sysfs_poll.attr,
+ &blk_mq_hw_sysfs_stat.attr,
NULL,
};
}
EXPORT_SYMBOL(blk_mq_can_queue);
+void blk_mq_hctx_clear_stat(struct blk_mq_hw_ctx *hctx)
+{
+ struct blk_mq_ctx *ctx;
+ unsigned int i;
+
+ hctx_for_each_ctx(hctx, ctx, i) {
+ blk_mq_init_stat(&ctx->stat[0]);
+ blk_mq_init_stat(&ctx->stat[1]);
+ }
+}
+
+static void sum_stat(struct blk_rq_stat *dst, struct blk_rq_stat *src)
+{
+ if (!src->nr_samples)
+ return;
+
+ dst->min = min(dst->min, src->min);
+ dst->max = max(dst->max, src->max);
+
+ if (!dst->nr_samples)
+ dst->mean = src->mean;
+ else {
+ dst->mean = div64_s64((src->mean * src->nr_samples) +
+ (dst->mean * dst->nr_samples),
+ dst->nr_samples + src->nr_samples);
+ }
+ dst->nr_samples += src->nr_samples;
+}
+
+void blk_mq_hctx_get_stat(struct blk_mq_hw_ctx *hctx, struct blk_rq_stat *dst)
+{
+ struct blk_mq_ctx *ctx;
+ unsigned int i;
+
+ hctx_for_each_ctx(hctx, ctx, i) {
+ sum_stat(&dst[0], &ctx->stat[0]);
+ sum_stat(&dst[1], &ctx->stat[1]);
+ }
+}
+
+static void __blk_mq_init_stat(struct blk_rq_stat *stat, s64 time_now)
+{
+ memset(stat, 0, sizeof(*stat));
+ stat->min = -1ULL;
+ stat->time = time_now;
+}
+
+void blk_mq_init_stat(struct blk_rq_stat *stat)
+{
+ __blk_mq_init_stat(stat, ktime_to_ns(ktime_get()));
+}
+
+static void blk_mq_add_stat(struct request *rq)
+{
+ struct blk_mq_ctx *ctx = rq->mq_ctx;
+ struct blk_rq_stat *stat = &ctx->stat[rq_data_dir(rq)];
+ s64 delta, now, value;
+
+ now = ktime_to_ns(ktime_get());
+ if (now < rq->issue_time)
+ return;
+
+ if (now - stat->time >= BLK_MQ_STAT_NSEC)
+ __blk_mq_init_stat(stat, now);
+
+ value = now - rq->issue_time;
+ if (value > stat->max)
+ stat->max = value;
+ if (value < stat->min)
+ stat->min = value;
+
+ delta = value - stat->mean;
+ if (delta)
+ stat->mean += div64_s64(delta, stat->nr_samples + 1);
+
+ stat->nr_samples++;
+}
+
static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
struct request *rq, unsigned int rw_flags)
{
{
struct request_queue *q = rq->q;
+ blk_mq_add_stat(rq);
+
if (!q->softirq_done_fn)
blk_mq_end_request(rq, rq->errors);
else
if (unlikely(blk_bidi_rq(rq)))
rq->next_rq->resid_len = blk_rq_bytes(rq->next_rq);
+ rq->issue_time = ktime_to_ns(ktime_get());
+
blk_add_timer(rq);
/*
spin_lock_init(&__ctx->lock);
INIT_LIST_HEAD(&__ctx->rq_list);
__ctx->queue = q;
+ blk_mq_init_stat(&__ctx->stat[0]);
+ blk_mq_init_stat(&__ctx->stat[1]);
/* If the cpu isn't online, the cpu is mapped to first hctx */
if (!cpu_online(i))
struct blk_mq_tag_set;
+/*
+ * 0.5s window
+ */
+#define BLK_MQ_STAT_NSEC 500000000ULL
+
+struct blk_rq_stat {
+ s64 mean;
+ u64 min;
+ u64 max;
+ s64 nr_samples;
+ s64 time;
+};
+
struct blk_mq_ctx {
struct {
spinlock_t lock;
struct list_head rq_list;
+ unsigned int rq_list_cnt;
+ unsigned int in_flight;
} ____cacheline_aligned_in_smp;
unsigned int cpu;
/* incremented at completion time */
unsigned long ____cacheline_aligned_in_smp rq_completed[2];
+ struct blk_rq_stat stat[2];
struct request_queue *queue;
struct kobject kobj;
return hctx->nr_ctx && hctx->tags;
}
+
+void blk_mq_hctx_get_stat(struct blk_mq_hw_ctx *, struct blk_rq_stat *);
+void blk_mq_hctx_clear_stat(struct blk_mq_hw_ctx *);
+void blk_mq_init_stat(struct blk_rq_stat *);
+
#endif
struct gendisk *rq_disk;
struct hd_struct *part;
unsigned long start_time;
+ s64 issue_time;
#ifdef CONFIG_BLK_CGROUP
struct request_list *rl; /* rl this rq is alloced from */
unsigned long long start_time_ns;