mm/page_io.c: annotate refault stalls from swap_readpage
authorMinchan Kim <minchan@google.com>
Sun, 1 Dec 2019 01:58:29 +0000 (17:58 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 1 Dec 2019 20:59:11 +0000 (12:59 -0800)
If a block device supports rw_page operation, it doesn't submit bios so
the annotation in submit_bio() for refault stall doesn't work.  It
happens with zram in android, especially swap read path which could
consume CPU cycle for decompress.  It is also a problem for zswap which
uses frontswap.

Annotate swap_readpage() to account the synchronous IO overhead to
prevent underreport memory pressure.

[akpm@linux-foundation.org: add comment, per Johannes]
Link: http://lkml.kernel.org/r/20191010152134.38545-1-minchan@kernel.org
Signed-off-by: Minchan Kim <minchan@google.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Cc: Seth Jennings <sjenning@redhat.com>
Cc: Dan Streetman <ddstreet@ieee.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
mm/page_io.c

index 60a66a58b9bf9ab0922b545b15da739eb4fd5dd0..3a198deb8bb1a52772e224ea044607b8fde7f0e9 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/writeback.h>
 #include <linux/frontswap.h>
 #include <linux/blkdev.h>
+#include <linux/psi.h>
 #include <linux/uio.h>
 #include <linux/sched/task.h>
 #include <asm/pgtable.h>
@@ -354,10 +355,19 @@ int swap_readpage(struct page *page, bool synchronous)
        struct swap_info_struct *sis = page_swap_info(page);
        blk_qc_t qc;
        struct gendisk *disk;
+       unsigned long pflags;
 
        VM_BUG_ON_PAGE(!PageSwapCache(page) && !synchronous, page);
        VM_BUG_ON_PAGE(!PageLocked(page), page);
        VM_BUG_ON_PAGE(PageUptodate(page), page);
+
+       /*
+        * Count submission time as memory stall. When the device is congested,
+        * or the submitting cgroup IO-throttled, submission can be a
+        * significant part of overall IO time.
+        */
+       psi_memstall_enter(&pflags);
+
        if (frontswap_load(page) == 0) {
                SetPageUptodate(page);
                unlock_page(page);
@@ -371,7 +381,7 @@ int swap_readpage(struct page *page, bool synchronous)
                ret = mapping->a_ops->readpage(swap_file, page);
                if (!ret)
                        count_vm_event(PSWPIN);
-               return ret;
+               goto out;
        }
 
        ret = bdev_read_page(sis->bdev, swap_page_sector(page), page);
@@ -382,7 +392,7 @@ int swap_readpage(struct page *page, bool synchronous)
                }
 
                count_vm_event(PSWPIN);
-               return 0;
+               goto out;
        }
 
        ret = 0;
@@ -418,6 +428,7 @@ int swap_readpage(struct page *page, bool synchronous)
        bio_put(bio);
 
 out:
+       psi_memstall_leave(&pflags);
        return ret;
 }