zram: writeback throttle
authorMinchan Kim <minchan@kernel.org>
Fri, 28 Dec 2018 08:36:54 +0000 (00:36 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 28 Dec 2018 20:11:49 +0000 (12:11 -0800)
If there are lots of write IO with flash device, it could have a
wearout problem of storage. To overcome the problem, admin needs
to design write limitation to guarantee flash health
for entire product life.

This patch creates a new knob "writeback_limit" for zram.

writeback_limit's default value is 0 so that it doesn't limit
any writeback. If admin want to measure writeback count in a
certain period, he could know it via /sys/block/zram0/bd_stat's
3rd column.

If admin want to limit writeback as per-day 400M, he could do it
like below.

MB_SHIFT=20
4K_SHIFT=12
echo $((400<<MB_SHIFT>>4K_SHIFT)) > \
/sys/block/zram0/writeback_limit.

If admin want to allow further write again, he could do it like below

echo 0 > /sys/block/zram0/writeback_limit

If admin want to see remaining writeback budget,

cat /sys/block/zram0/writeback_limit

The writeback_limit count will reset whenever you reset zram (e.g., system
reboot, echo 1 > /sys/block/zramX/reset) so keeping how many of writeback
happened until you reset the zram to allocate extra writeback budget in
next setting is user's job.

[minchan@kernel.org: v4]
Link: http://lkml.kernel.org/r/20181203024045.153534-8-minchan@kernel.org
Link: http://lkml.kernel.org/r/20181127055429.251614-8-minchan@kernel.org
Signed-off-by: Minchan Kim <minchan@kernel.org>
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Joey Pabalinas <joeypabalinas@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Documentation/ABI/testing/sysfs-block-zram
Documentation/blockdev/zram.txt
drivers/block/zram/zram_drv.c
drivers/block/zram/zram_drv.h

index 65fc33b2f53b2bcef7b529679f27a2345c995b3f..9d2339a485c8ab17b2dd13979e139bb1309db8eb 100644 (file)
@@ -121,3 +121,12 @@ Description:
                The bd_stat file is read-only and represents backing device's
                statistics (bd_count, bd_reads, bd_writes) in a format
                similar to block layer statistics file format.
+
+What:          /sys/block/zram<id>/writeback_limit
+Date:          November 2018
+Contact:       Minchan Kim <minchan@kernel.org>
+Description:
+               The writeback_limit file is read-write and specifies the maximum
+               amount of writeback ZRAM can do. The limit could be changed
+               in run time and "0" means disable the limit.
+               No limit is the initial state.
index 906df97527a7ffc102a911f3a7a828373e7f9f36..436c5e98e1b6038511ac5bc1ac9ffd83e418d8ff 100644 (file)
@@ -164,6 +164,8 @@ reset             WO    trigger device reset
 mem_used_max      WO    reset the `mem_used_max' counter (see later)
 mem_limit         WO    specifies the maximum amount of memory ZRAM can use
                         to store the compressed data
+writeback_limit   WO    specifies the maximum amount of write IO zram can
+                       write out to backing device as 4KB unit
 max_comp_streams  RW    the number of possible concurrent compress operations
 comp_algorithm    RW    show and change the compression algorithm
 compact           WO    trigger memory compaction
@@ -275,6 +277,35 @@ Admin can request writeback of those idle pages at right timing via
 
 With the command, zram writeback idle pages from memory to the storage.
 
+If there are lots of write IO with flash device, potentially, it has
+flash wearout problem so that admin needs to design write limitation
+to guarantee storage health for entire product life.
+To overcome the concern, zram supports "writeback_limit".
+The "writeback_limit"'s default value is 0 so that it doesn't limit
+any writeback. If admin want to measure writeback count in a certain
+period, he could know it via /sys/block/zram0/bd_stat's 3rd column.
+
+If admin want to limit writeback as per-day 400M, he could do it
+like below.
+
+    MB_SHIFT=20
+    4K_SHIFT=12
+    echo $((400<<MB_SHIFT>>4K_SHIFT)) > \
+           /sys/block/zram0/writeback_limit.
+
+If admin want to allow further write again, he could do it like below
+
+    echo 0 > /sys/block/zram0/writeback_limit
+
+If admin want to see remaining writeback budget since he set,
+
+    cat /sys/block/zram0/writeback_limit
+
+The writeback_limit count will reset whenever you reset zram(e.g.,
+system reboot, echo 1 > /sys/block/zramX/reset) so keeping how many of
+writeback happened until you reset the zram to allocate extra writeback
+budget in next setting is user's job.
+
 = memory tracking
 
 With CONFIG_ZRAM_MEMORY_TRACKING, user can know information of the
index f1832fa3ba41b4cc184f38199f3c9defb4df0fa7..33c5cc879f246e09b413de0d53687cf9f433a87f 100644 (file)
@@ -330,6 +330,39 @@ next:
 }
 
 #ifdef CONFIG_ZRAM_WRITEBACK
+static ssize_t writeback_limit_store(struct device *dev,
+               struct device_attribute *attr, const char *buf, size_t len)
+{
+       struct zram *zram = dev_to_zram(dev);
+       u64 val;
+       ssize_t ret = -EINVAL;
+
+       if (kstrtoull(buf, 10, &val))
+               return ret;
+
+       down_read(&zram->init_lock);
+       atomic64_set(&zram->stats.bd_wb_limit, val);
+       if (val == 0)
+               zram->stop_writeback = false;
+       up_read(&zram->init_lock);
+       ret = len;
+
+       return ret;
+}
+
+static ssize_t writeback_limit_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       u64 val;
+       struct zram *zram = dev_to_zram(dev);
+
+       down_read(&zram->init_lock);
+       val = atomic64_read(&zram->stats.bd_wb_limit);
+       up_read(&zram->init_lock);
+
+       return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
+}
+
 static void reset_bdev(struct zram *zram)
 {
        struct block_device *bdev;
@@ -612,6 +645,11 @@ static ssize_t writeback_store(struct device *dev,
                bvec.bv_len = PAGE_SIZE;
                bvec.bv_offset = 0;
 
+               if (zram->stop_writeback) {
+                       ret = -EIO;
+                       break;
+               }
+
                if (!blk_idx) {
                        blk_idx = alloc_block_bdev(zram);
                        if (!blk_idx) {
@@ -694,6 +732,11 @@ static ssize_t writeback_store(struct device *dev,
                zram_set_element(zram, index, blk_idx);
                blk_idx = 0;
                atomic64_inc(&zram->stats.pages_stored);
+               if (atomic64_add_unless(&zram->stats.bd_wb_limit,
+                                       -1 << (PAGE_SHIFT - 12), 0)) {
+                       if (atomic64_read(&zram->stats.bd_wb_limit) == 0)
+                               zram->stop_writeback = true;
+               }
 next:
                zram_slot_unlock(zram, index);
        }
@@ -1018,6 +1061,7 @@ static ssize_t mm_stat_show(struct device *dev,
 }
 
 #ifdef CONFIG_ZRAM_WRITEBACK
+#define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12)))
 static ssize_t bd_stat_show(struct device *dev,
                struct device_attribute *attr, char *buf)
 {
@@ -1027,9 +1071,9 @@ static ssize_t bd_stat_show(struct device *dev,
        down_read(&zram->init_lock);
        ret = scnprintf(buf, PAGE_SIZE,
                "%8llu %8llu %8llu\n",
-               (u64)atomic64_read(&zram->stats.bd_count) * (PAGE_SHIFT - 12),
-               (u64)atomic64_read(&zram->stats.bd_reads) * (PAGE_SHIFT - 12),
-               (u64)atomic64_read(&zram->stats.bd_writes) * (PAGE_SHIFT - 12));
+                       FOUR_K((u64)atomic64_read(&zram->stats.bd_count)),
+                       FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)),
+                       FOUR_K((u64)atomic64_read(&zram->stats.bd_writes)));
        up_read(&zram->init_lock);
 
        return ret;
@@ -1767,6 +1811,7 @@ static DEVICE_ATTR_RW(comp_algorithm);
 #ifdef CONFIG_ZRAM_WRITEBACK
 static DEVICE_ATTR_RW(backing_dev);
 static DEVICE_ATTR_WO(writeback);
+static DEVICE_ATTR_RW(writeback_limit);
 #endif
 
 static struct attribute *zram_disk_attrs[] = {
@@ -1782,6 +1827,7 @@ static struct attribute *zram_disk_attrs[] = {
 #ifdef CONFIG_ZRAM_WRITEBACK
        &dev_attr_backing_dev.attr,
        &dev_attr_writeback.attr,
+       &dev_attr_writeback_limit.attr,
 #endif
        &dev_attr_io_stat.attr,
        &dev_attr_mm_stat.attr,
index bc477803530d5a0656b644738a64cd14b64f21de..4bd3afd15e833e2e8a75292e9f8ffcc34e162b63 100644 (file)
@@ -86,6 +86,7 @@ struct zram_stats {
        atomic64_t bd_count;            /* no. of pages in backing device */
        atomic64_t bd_reads;            /* no. of reads from backing device */
        atomic64_t bd_writes;           /* no. of writes from backing device */
+       atomic64_t bd_wb_limit;         /* writeback limit of backing device */
 #endif
 };
 
@@ -113,6 +114,7 @@ struct zram {
         */
        bool claim; /* Protected by bdev->bd_mutex */
        struct file *backing_dev;
+       bool stop_writeback;
 #ifdef CONFIG_ZRAM_WRITEBACK
        struct block_device *bdev;
        unsigned int old_block_size;