summaryrefslogtreecommitdiff
path: root/block/blk-sysfs.c
diff options
context:
space:
mode:
authorJens Axboe <axboe@fb.com>2016-04-26 09:14:27 -0600
committerJens Axboe <axboe@fb.com>2016-04-26 09:51:21 -0600
commit6edfbb2f08df9cc3bdb65c2f06fce94a39ab6236 (patch)
tree801df7406e2066b0e64b809d589df920dca7e429 /block/blk-sysfs.c
parent3cd16ff982aafeb88a259d3846c8113feb4bbf28 (diff)
writeback: throttle buffered writebackwb-buf-throttle-v5
Test patch that throttles buffered writeback to make it a lot more smooth, and has way less impact on other system activity. Background writeback should be, by definition, background activity. The fact that we flush huge bundles of it at the time means that it potentially has heavy impacts on foreground workloads, which isn't ideal. We can't easily limit the sizes of writes that we do, since that would impact file system layout in the presence of delayed allocation. So just throttle back buffered writeback, unless someone is waiting for it. The algorithm for when to throttle takes its inspiration in the CoDel networking scheduling algorithm. Like CoDel, blk-wb monitors the minimum latencies of requests over a window of time. In that window of time, if the minimum latency of any request exceeds a given target, then a scale count is incremented and the queue depth is shrunk. The next monitoring window is shrunk accordingly. Unlike CoDel, if we hit a window that exhibits good behavior, then we simply increment the scale count and re-calculate the limits for that scale value. This prevents us from oscillating between a close-to-ideal value and max all the time, instead remaining in the windows where we get good behavior. The patch registers two sysfs entries. The first one, 'wb_window_usec', defines the window of monitoring. The second one, 'wb_lat_usec', sets the latency target for the window. It defaults to 2 msec for non-rotational storage, and 75 msec for rotational storage. Setting this value to '0' disables blk-wb. Generally, a user would not have to touch these settings. Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'block/blk-sysfs.c')
-rw-r--r--block/blk-sysfs.c119
1 files changed, 119 insertions, 0 deletions
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 6e516cc0d3d0..df194bf93598 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -10,6 +10,7 @@
#include <linux/blktrace_api.h>
#include <linux/blk-mq.h>
#include <linux/blk-cgroup.h>
+#include <linux/wbt.h>
#include "blk.h"
#include "blk-mq.h"
@@ -41,6 +42,19 @@ queue_var_store(unsigned long *var, const char *page, size_t count)
return count;
}
+static ssize_t queue_var_store64(u64 *var, const char *page)
+{
+ int err;
+ u64 v;
+
+ err = kstrtou64(page, 10, &v);
+ if (err < 0)
+ return err;
+
+ *var = v;
+ return 0;
+}
+
static ssize_t queue_requests_show(struct request_queue *q, char *page)
{
return queue_var_show(q->nr_requests, (page));
@@ -347,6 +361,58 @@ static ssize_t queue_poll_store(struct request_queue *q, const char *page,
return ret;
}
+static ssize_t queue_wb_win_show(struct request_queue *q, char *page)
+{
+ if (!q->rq_wb)
+ return -EINVAL;
+
+ return sprintf(page, "%llu\n", div_u64(q->rq_wb->win_nsec, 1000));
+}
+
+static ssize_t queue_wb_win_store(struct request_queue *q, const char *page,
+ size_t count)
+{
+ ssize_t ret;
+ u64 val;
+
+ if (!q->rq_wb)
+ return -EINVAL;
+
+ ret = queue_var_store64(&val, page);
+ if (ret < 0)
+ return ret;
+
+ q->rq_wb->win_nsec = val * 1000ULL;
+ wbt_update_limits(q->rq_wb);
+ return count;
+}
+
+static ssize_t queue_wb_lat_show(struct request_queue *q, char *page)
+{
+ if (!q->rq_wb)
+ return -EINVAL;
+
+ return sprintf(page, "%llu\n", div_u64(q->rq_wb->min_lat_nsec, 1000));
+}
+
+static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
+ size_t count)
+{
+ ssize_t ret;
+ u64 val;
+
+ if (!q->rq_wb)
+ return -EINVAL;
+
+ ret = queue_var_store64(&val, page);
+ if (ret < 0)
+ return ret;
+
+ q->rq_wb->min_lat_nsec = val * 1000ULL;
+ wbt_update_limits(q->rq_wb);
+ return count;
+}
+
static ssize_t queue_wc_show(struct request_queue *q, char *page)
{
if (test_bit(QUEUE_FLAG_WC, &q->queue_flags))
@@ -541,6 +607,18 @@ static struct queue_sysfs_entry queue_stats_entry = {
.show = queue_stats_show,
};
+static struct queue_sysfs_entry queue_wb_lat_entry = {
+ .attr = {.name = "wbt_lat_usec", .mode = S_IRUGO | S_IWUSR },
+ .show = queue_wb_lat_show,
+ .store = queue_wb_lat_store,
+};
+
+static struct queue_sysfs_entry queue_wb_win_entry = {
+ .attr = {.name = "wbt_window_usec", .mode = S_IRUGO | S_IWUSR },
+ .show = queue_wb_win_show,
+ .store = queue_wb_win_store,
+};
+
static struct attribute *default_attrs[] = {
&queue_requests_entry.attr,
&queue_ra_entry.attr,
@@ -568,6 +646,8 @@ static struct attribute *default_attrs[] = {
&queue_poll_entry.attr,
&queue_wc_entry.attr,
&queue_stats_entry.attr,
+ &queue_wb_lat_entry.attr,
+ &queue_wb_win_entry.attr,
NULL,
};
@@ -682,6 +762,43 @@ struct kobj_type blk_queue_ktype = {
.release = blk_release_queue,
};
+static void blk_wb_stat_get(void *data, struct blk_rq_stat *stat)
+{
+ blk_queue_stat_get(data, stat);
+}
+
+static void blk_wb_stat_clear(void *data)
+{
+ blk_stat_clear(data);
+}
+
+static struct wb_stat_ops wb_stat_ops = {
+ .get = blk_wb_stat_get,
+ .clear = blk_wb_stat_clear,
+};
+
+static void blk_wb_init(struct request_queue *q)
+{
+ struct rq_wb *rwb;
+
+ rwb = wbt_init(&q->backing_dev_info, &wb_stat_ops, q);
+
+ /*
+ * If this fails, we don't get throttling
+ */
+ if (IS_ERR(rwb))
+ return;
+
+ if (blk_queue_nonrot(q))
+ rwb->min_lat_nsec = 2000000ULL;
+ else
+ rwb->min_lat_nsec = 75000000ULL;
+
+ wbt_set_queue_depth(rwb, blk_queue_depth(q));
+ wbt_set_write_cache(rwb, test_bit(QUEUE_FLAG_WC, &q->queue_flags));
+ q->rq_wb = rwb;
+}
+
int blk_register_queue(struct gendisk *disk)
{
int ret;
@@ -721,6 +838,8 @@ int blk_register_queue(struct gendisk *disk)
if (q->mq_ops)
blk_mq_register_disk(disk);
+ blk_wb_init(q);
+
if (!q->request_fn)
return 0;