mm,thp: avoid writes to file with THP in pagecache
authorSong Liu <songliubraving@fb.com>
Mon, 23 Sep 2019 22:38:03 +0000 (15:38 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 24 Sep 2019 22:54:11 +0000 (15:54 -0700)
In previous patch, an application could put part of its text section in
THP via madvise().  These THPs will be protected from writes when the
application is still running (TXTBSY).  However, after the application
exits, the file is available for writes.

This patch avoids writes to file THP by dropping page cache for the file
when the file is open for write.  A new counter nr_thps is added to struct
address_space.  In do_dentry_open(), if the file is open for write and
nr_thps is non-zero, we drop page cache for the whole file.

Link: http://lkml.kernel.org/r/20190801184244.3169074-8-songliubraving@fb.com
Signed-off-by: Song Liu <songliubraving@fb.com>
Reported-by: kbuild test robot <lkp@intel.com>
Acked-by: Rik van Riel <riel@surriel.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hillf Danton <hdanton@sina.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: William Kucharski <william.kucharski@oracle.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
fs/inode.c
fs/open.c
include/linux/fs.h
mm/filemap.c
mm/khugepaged.c

index 64bf28cf05cde859ab0b72abd4a30cbdc7e3912b..fef457a42882ba64092ae01577d8a3746f03f6e8 100644 (file)
@@ -181,6 +181,9 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
        mapping->flags = 0;
        mapping->wb_err = 0;
        atomic_set(&mapping->i_mmap_writable, 0);
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
+       atomic_set(&mapping->nr_thps, 0);
+#endif
        mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
        mapping->private_data = NULL;
        mapping->writeback_index = 0;
index a59abe3c669ae13ac7f3fa68bc0fb5f54a781ee4..c60cd22cc052a41d2e0b2b1f954051268ae10bf8 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -818,6 +818,14 @@ static int do_dentry_open(struct file *f,
                if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO)
                        return -EINVAL;
        }
+
+       /*
+        * XXX: Huge page cache doesn't support writing yet. Drop all page
+        * cache for this file before processing writes.
+        */
+       if ((f->f_mode & FMODE_WRITE) && filemap_nr_thps(inode->i_mapping))
+               truncate_pagecache(inode, 0);
+
        return 0;
 
 cleanup_all:
index 866268c2c6e3a0e127cf87b1276a4ebadf52c37f..b0c6b0d34d0213569c88c9bd83280c46ecaf2be2 100644 (file)
@@ -429,6 +429,7 @@ int pagecache_write_end(struct file *, struct address_space *mapping,
  * @i_pages: Cached pages.
  * @gfp_mask: Memory allocation flags to use for allocating pages.
  * @i_mmap_writable: Number of VM_SHARED mappings.
+ * @nr_thps: Number of THPs in the pagecache (non-shmem only).
  * @i_mmap: Tree of private and shared mappings.
  * @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable.
  * @nrpages: Number of page entries, protected by the i_pages lock.
@@ -446,6 +447,10 @@ struct address_space {
        struct xarray           i_pages;
        gfp_t                   gfp_mask;
        atomic_t                i_mmap_writable;
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
+       /* number of thp, only for non-shmem files */
+       atomic_t                nr_thps;
+#endif
        struct rb_root_cached   i_mmap;
        struct rw_semaphore     i_mmap_rwsem;
        unsigned long           nrpages;
@@ -2798,6 +2803,33 @@ static inline errseq_t filemap_sample_wb_err(struct address_space *mapping)
        return errseq_sample(&mapping->wb_err);
 }
 
+static inline int filemap_nr_thps(struct address_space *mapping)
+{
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
+       return atomic_read(&mapping->nr_thps);
+#else
+       return 0;
+#endif
+}
+
+static inline void filemap_nr_thps_inc(struct address_space *mapping)
+{
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
+       atomic_inc(&mapping->nr_thps);
+#else
+       WARN_ON_ONCE(1);
+#endif
+}
+
+static inline void filemap_nr_thps_dec(struct address_space *mapping)
+{
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
+       atomic_dec(&mapping->nr_thps);
+#else
+       WARN_ON_ONCE(1);
+#endif
+}
+
 extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end,
                           int datasync);
 extern int vfs_fsync(struct file *file, int datasync);
index 91fe3a08ca4a31cca4ad1bc92733287f87b44437..1146fcfa321511b61e5dd258c9825b6bf759426e 100644 (file)
@@ -205,6 +205,7 @@ static void unaccount_page_cache_page(struct address_space *mapping,
                        __dec_node_page_state(page, NR_SHMEM_THPS);
        } else if (PageTransHuge(page)) {
                __dec_node_page_state(page, NR_FILE_THPS);
+               filemap_nr_thps_dec(mapping);
        }
 
        /*
index 8607c77431b3e03e57cde1e6ae19d8adddc78e06..e89430ec5267f941416b1c3818ff10c57624e612 100644 (file)
@@ -1514,8 +1514,10 @@ out_unlock:
 
        if (is_shmem)
                __inc_node_page_state(new_page, NR_SHMEM_THPS);
-       else
+       else {
                __inc_node_page_state(new_page, NR_FILE_THPS);
+               filemap_nr_thps_inc(mapping);
+       }
 
        if (nr_none) {
                struct zone *zone = page_zone(new_page);