summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--block/blk-wbt.c62
-rw-r--r--block/blk-wbt.h20
2 files changed, 48 insertions, 34 deletions
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 2512442873d4..990aa0700b31 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -15,7 +15,7 @@
* positive scaling steps where we shrink the monitoring window, a negative
* scaling step retains the default step==0 window size.
*
- * Copyright (C) 2016 Jens Axboe
+ * Copyright (C) 2016-2017 Jens Axboe
*
*/
#include <linux/kernel.h>
@@ -101,9 +101,20 @@ static bool wb_recent_wait(struct rq_wb *rwb)
return time_before(jiffies, wb->dirty_sleep + HZ);
}
-static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb, bool is_kswapd)
+static bool rw_is_odirect_write(unsigned int rw)
{
- return &rwb->rq_wait[is_kswapd];
+ return (rw & REQ_OP_FLAGS_ODIRECT) == REQ_OP_FLAGS_ODIRECT;
+}
+
+static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb, bool is_kswapd,
+ bool is_odirect)
+{
+ if (is_kswapd)
+ return &rwb->rq_wait[WBT_RWQ_KSWAPD];
+ else if (is_odirect)
+ return &rwb->rq_wait[WBT_RWQ_ODIRECT];
+
+ return &rwb->rq_wait[WBT_RWQ_BUFFERED];
}
static void rwb_wake_all(struct rq_wb *rwb)
@@ -126,7 +137,7 @@ void __wbt_done(struct rq_wb *rwb, enum wbt_flags wb_acct)
if (!(wb_acct & WBT_TRACKED))
return;
- rqw = get_rq_wait(rwb, wb_acct & WBT_KSWAPD);
+ rqw = get_rq_wait(rwb, wb_acct & WBT_KSWAPD, wb_acct & WBT_ODIRECT);
inflight = atomic_dec_return(&rqw->inflight);
/*
@@ -140,7 +151,9 @@ void __wbt_done(struct rq_wb *rwb, enum wbt_flags wb_acct)
/*
* If the device does write back caching, drop further down
- * before we wake people up.
+ * before we wake people up. The exception is if we recently
+ * waited in balance_dirty_pages() - if that's the case, use
+ * the normal wake limit to boost throughput a bit.
*/
if (rwb->wc && !wb_recent_wait(rwb))
limit = 0;
@@ -502,9 +515,9 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
unsigned int limit;
/*
- * At this point we know it's a buffered write. If this is
- * kswapd trying to free memory, or REQ_SYNC is set, set, then
- * it's WB_SYNC_ALL writeback, and we'll use the max limit for
+ * At this point we know it's a write that we should throttle.
+ * If this is kswapd trying to free memory, or REQ_SYNC is set,
+ * then it's WB_SYNC_ALL writeback, and we'll use the max limit for
* that. If the write is marked as a background write, then use
* the idle limit, or go to normal if we haven't had competing
* IO for a bit.
@@ -555,9 +568,10 @@ static void __wbt_wait(struct rq_wb *rwb, unsigned long rw, spinlock_t *lock)
__releases(lock)
__acquires(lock)
{
- struct rq_wait *rqw = get_rq_wait(rwb, current_is_kswapd());
+ struct rq_wait *rqw;
DEFINE_WAIT(wait);
+ rqw = get_rq_wait(rwb, current_is_kswapd(), rw_is_odirect_write(rw));
if (may_queue(rwb, rqw, &wait, rw))
return;
@@ -579,23 +593,12 @@ static void __wbt_wait(struct rq_wb *rwb, unsigned long rw, spinlock_t *lock)
finish_wait(&rqw->wait, &wait);
}
-static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio)
+static inline bool wbt_should_throttle(struct bio *bio)
{
- const int op = bio_op(bio);
-
/*
- * If not a WRITE, do nothing
+ * Throttle all writes, we'll bucketize them appropriately later
*/
- if (op != REQ_OP_WRITE)
- return false;
-
- /*
- * Don't throttle WRITE_ODIRECT
- */
- if ((bio->bi_opf & REQ_OP_FLAGS_ODIRECT) == REQ_OP_FLAGS_ODIRECT)
- return false;
-
- return true;
+ return bio_op(bio) == REQ_OP_WRITE;
}
/*
@@ -606,18 +609,15 @@ static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio)
*/
enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio, spinlock_t *lock)
{
- unsigned int ret = 0;
+ enum wbt_flags ret = 0;
if (!rwb_enabled(rwb))
return 0;
- if (bio_op(bio) == REQ_OP_READ)
- ret = WBT_READ;
-
- if (!wbt_should_throttle(rwb, bio)) {
- if (ret & WBT_READ)
+ if (!wbt_should_throttle(bio)) {
+ if (bio_op(bio) == REQ_OP_READ)
wb_timestamp(rwb, &rwb->last_issue);
- return ret;
+ return 0;
}
__wbt_wait(rwb, bio->bi_opf, lock);
@@ -627,6 +627,8 @@ enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio, spinlock_t *lock)
if (current_is_kswapd())
ret |= WBT_KSWAPD;
+ else if (rw_is_odirect_write(bio->bi_opf))
+ ret |= WBT_ODIRECT;
return ret | WBT_TRACKED;
}
diff --git a/block/blk-wbt.h b/block/blk-wbt.h
index e7c086374e20..a6470175ac9e 100644
--- a/block/blk-wbt.h
+++ b/block/blk-wbt.h
@@ -9,16 +9,28 @@
#include "blk-stat.h"
+/*
+ * First bit is tracked or not, next two bits is a value shifted up
+ */
enum wbt_flags {
WBT_TRACKED = 1, /* write, tracked for throttling */
- WBT_READ = 2, /* read */
- WBT_KSWAPD = 4, /* write, from kswapd */
+ WBT_KSWAPD = 2,
+ WBT_ODIRECT = 4,
WBT_NR_BITS = 3, /* number of bits */
};
+/*
+ * We have three buckets for accounting and waiting - one for buffered
+ * IO, one for kswapd, and one for O_DIRECT writes. This helps us ensure
+ * fairness between different types of writes, without starving one of
+ * them unnecessarily.
+ */
enum {
- WBT_NUM_RWQ = 2,
+ WBT_RWQ_BUFFERED = 0,
+ WBT_RWQ_KSWAPD,
+ WBT_RWQ_ODIRECT,
+ WBT_NUM_RWQ,
};
/*
@@ -52,7 +64,7 @@ static inline bool wbt_is_tracked(struct blk_issue_stat *stat)
static inline bool wbt_is_read(struct blk_issue_stat *stat)
{
- return (stat->stat >> BLK_STAT_RES_SHIFT) & WBT_READ;
+ return (stat->stat >> BLK_STAT_RES_SHIFT) == 0;
}
struct rq_wait {