Merge tag 'mm-stable-2023-04-27-15-30' of git://git.kernel.org/pub/scm/linux/kernel...

[linux-block.git] / mm / shmem.c
diff --git a/mm/shmem.c b/mm/shmem.c

index b76521ed372dfdfc03117b2773b39f104b9901a6..e40a08c5c6d78ac629b52e4956b4cb5c13948b6c 100644 (file)
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -76,7 +76,6 @@ static struct vfsmount *shm_mnt;
  #include <linux/syscalls.h>
  #include <linux/fcntl.h>
  #include <uapi/linux/memfd.h>
-#include <linux/userfaultfd_k.h>
  #include <linux/rmap.h>
  #include <linux/uuid.h>
  
@@ -116,10 +115,12 @@ struct shmem_options {
         bool full_inums;
         int huge;
         int seen;
+       bool noswap;
  #define SHMEM_SEEN_BLOCKS 1
  #define SHMEM_SEEN_INODES 2
  #define SHMEM_SEEN_HUGE 4
  #define SHMEM_SEEN_INUMS 8
+#define SHMEM_SEEN_NOSWAP 16
  };
  
  #ifdef CONFIG_TMPFS
@@ -603,7 +604,7 @@ next:
  
                 index = (inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT;
                 folio = filemap_get_folio(inode->i_mapping, index);
-               if (!folio)
+               if (IS_ERR(folio))
                         goto drop;
  
                 /* No huge page at the end of the file: nothing to split */
@@ -883,14 +884,21 @@ static struct folio *shmem_get_partial_folio(struct inode *inode, pgoff_t index)
  
         /*
          * At first avoid shmem_get_folio(,,,SGP_READ): that fails
-        * beyond i_size, and reports fallocated pages as holes.
+        * beyond i_size, and reports fallocated folios as holes.
          */
-       folio = __filemap_get_folio(inode->i_mapping, index,
-                                       FGP_ENTRY | FGP_LOCK, 0);
-       if (!xa_is_value(folio))
+       folio = filemap_get_entry(inode->i_mapping, index);
+       if (!folio)
                 return folio;
+       if (!xa_is_value(folio)) {
+               folio_lock(folio);
+               if (folio->mapping == inode->i_mapping)
+                       return folio;
+               /* The folio has been swapped out */
+               folio_unlock(folio);
+               folio_put(folio);
+       }
         /*
-        * But read a page back from swap if any of it is within i_size
+        * But read a folio back from swap if any of it is within i_size
          * (although in some cases this is just a waste of time).
          */
         folio = NULL;
@@ -1331,12 +1339,29 @@ int shmem_unuse(unsigned int type)
  static int shmem_writepage(struct page *page, struct writeback_control *wbc)
  {
         struct folio *folio = page_folio(page);
-       struct shmem_inode_info *info;
-       struct address_space *mapping;
-       struct inode *inode;
+       struct address_space *mapping = folio->mapping;
+       struct inode *inode = mapping->host;
+       struct shmem_inode_info *info = SHMEM_I(inode);
+       struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
         swp_entry_t swap;
         pgoff_t index;
  
+       /*
+        * Our capabilities prevent regular writeback or sync from ever calling
+        * shmem_writepage; but a stacking filesystem might use ->writepage of
+        * its underlying filesystem, in which case tmpfs should write out to
+        * swap only in response to memory pressure, and not for the writeback
+        * threads or sync.
+        */
+       if (WARN_ON_ONCE(!wbc->for_reclaim))
+               goto redirty;
+
+       if (WARN_ON_ONCE((info->flags & VM_LOCKED) || sbinfo->noswap))
+               goto redirty;
+
+       if (!total_swap_pages)
+               goto redirty;
+
         /*
          * If /sys/kernel/mm/transparent_hugepage/shmem_enabled is "always" or
          * "force", drivers/gpu/drm/i915/gem/i915_gem_shmem.c gets huge pages,
@@ -1351,27 +1376,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
                 folio_clear_dirty(folio);
         }
  
-       BUG_ON(!folio_test_locked(folio));
-       mapping = folio->mapping;
         index = folio->index;
-       inode = mapping->host;
-       info = SHMEM_I(inode);
-       if (info->flags & VM_LOCKED)
-               goto redirty;
-       if (!total_swap_pages)
-               goto redirty;
-
-       /*
-        * Our capabilities prevent regular writeback or sync from ever calling
-        * shmem_writepage; but a stacking filesystem might use ->writepage of
-        * its underlying filesystem, in which case tmpfs should write out to
-        * swap only in response to memory pressure, and not for the writeback
-        * threads or sync.
-        */
-       if (!wbc->for_reclaim) {
-               WARN_ON_ONCE(1);        /* Still happens? Tell us about it! */
-               goto redirty;
-       }
  
         /*
          * This is somewhat ridiculous, but without plumbing a SWAP_MAP_FALLOC
@@ -1874,12 +1879,10 @@ repeat:
         sbinfo = SHMEM_SB(inode->i_sb);
         charge_mm = vma ? vma->vm_mm : NULL;
  
-       folio = __filemap_get_folio(mapping, index, FGP_ENTRY | FGP_LOCK, 0);
+       folio = filemap_get_entry(mapping, index);
         if (folio && vma && userfaultfd_minor(vma)) {
-               if (!xa_is_value(folio)) {
-                       folio_unlock(folio);
+               if (!xa_is_value(folio))
                         folio_put(folio);
-               }
                 *fault_type = handle_userfault(vmf, VM_UFFD_MINOR);
                 return 0;
         }
@@ -1895,6 +1898,14 @@ repeat:
         }
  
         if (folio) {
+               folio_lock(folio);
+
+               /* Has the folio been truncated or swapped out? */
+               if (unlikely(folio->mapping != mapping)) {
+                       folio_unlock(folio);
+                       folio_put(folio);
+                       goto repeat;
+               }
                 if (sgp == SGP_WRITE)
                         folio_mark_accessed(folio);
                 if (folio_test_uptodate(folio))
@@ -2376,6 +2387,8 @@ static struct inode *shmem_get_inode(struct mnt_idmap *idmap, struct super_block
                         shmem_set_inode_flags(inode, info->fsflags);
                 INIT_LIST_HEAD(&info->shrinklist);
                 INIT_LIST_HEAD(&info->swaplist);
+               if (sbinfo->noswap)
+                       mapping_set_unevictable(inode->i_mapping);
                 simple_xattrs_init(&info->xattrs);
                 cache_no_acl(inode);
                 mapping_set_large_folios(inode->i_mapping);
@@ -2415,13 +2428,12 @@ static struct inode *shmem_get_inode(struct mnt_idmap *idmap, struct super_block
  }
  
  #ifdef CONFIG_USERFAULTFD
-int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
-                          pmd_t *dst_pmd,
+int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
                            struct vm_area_struct *dst_vma,
                            unsigned long dst_addr,
                            unsigned long src_addr,
-                          bool zeropage, bool wp_copy,
-                          struct page **pagep)
+                          uffd_flags_t flags,
+                          struct folio **foliop)
  {
         struct inode *inode = file_inode(dst_vma->vm_file);
         struct shmem_inode_info *info = SHMEM_I(inode);
@@ -2439,20 +2451,20 @@ int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
                  * and now we find ourselves with -ENOMEM. Release the page, to
                  * avoid a BUG_ON in our caller.
                  */
-               if (unlikely(*pagep)) {
-                       put_page(*pagep);
-                       *pagep = NULL;
+               if (unlikely(*foliop)) {
+                       folio_put(*foliop);
+                       *foliop = NULL;
                 }
                 return -ENOMEM;
         }
  
-       if (!*pagep) {
+       if (!*foliop) {
                 ret = -ENOMEM;
                 folio = shmem_alloc_folio(gfp, info, pgoff);
                 if (!folio)
                         goto out_unacct_blocks;
  
-               if (!zeropage) {        /* COPY */
+               if (uffd_flags_mode_is(flags, MFILL_ATOMIC_COPY)) {
                         page_kaddr = kmap_local_folio(folio, 0);
                         /*
                          * The read mmap_lock is held here.  Despite the
@@ -2478,7 +2490,7 @@ int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
  
                         /* fallback to copy_from_user outside mmap_lock */
                         if (unlikely(ret)) {
-                               *pagep = &folio->page;
+                               *foliop = folio;
                                 ret = -ENOENT;
                                 /* don't free the page */
                                 goto out_unacct_blocks;
@@ -2489,9 +2501,9 @@ int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
                         clear_user_highpage(&folio->page, dst_addr);
                 }
         } else {
-               folio = page_folio(*pagep);
+               folio = *foliop;
                 VM_BUG_ON_FOLIO(folio_test_large(folio), folio);
-               *pagep = NULL;
+               *foliop = NULL;
         }
  
         VM_BUG_ON(folio_test_locked(folio));
@@ -2506,12 +2518,12 @@ int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
                 goto out_release;
  
         ret = shmem_add_to_page_cache(folio, mapping, pgoff, NULL,
-                                     gfp & GFP_RECLAIM_MASK, dst_mm);
+                                     gfp & GFP_RECLAIM_MASK, dst_vma->vm_mm);
         if (ret)
                 goto out_release;
  
-       ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
-                                      &folio->page, true, wp_copy);
+       ret = mfill_atomic_install_pte(dst_pmd, dst_vma, dst_addr,
+                                      &folio->page, true, flags);
         if (ret)
                 goto out_delete_from_cache;
  
@@ -3200,7 +3212,7 @@ static const char *shmem_get_link(struct dentry *dentry,
  
         if (!dentry) {
                 folio = filemap_get_folio(inode->i_mapping, 0);
-               if (!folio)
+               if (IS_ERR(folio))
                         return ERR_PTR(-ECHILD);
                 if (PageHWPoison(folio_page(folio, 0)) ||
                     !folio_test_uptodate(folio)) {
@@ -3459,6 +3471,7 @@ enum shmem_param {
         Opt_uid,
         Opt_inode32,
         Opt_inode64,
+       Opt_noswap,
  };
  
  static const struct constant_table shmem_param_enums_huge[] = {
@@ -3480,6 +3493,7 @@ const struct fs_parameter_spec shmem_fs_parameters[] = {
         fsparam_u32   ("uid",           Opt_uid),
         fsparam_flag  ("inode32",       Opt_inode32),
         fsparam_flag  ("inode64",       Opt_inode64),
+       fsparam_flag  ("noswap",        Opt_noswap),
         {}
  };
  
@@ -3563,6 +3577,14 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
                 ctx->full_inums = true;
                 ctx->seen |= SHMEM_SEEN_INUMS;
                 break;
+       case Opt_noswap:
+               if ((fc->user_ns != &init_user_ns) || !capable(CAP_SYS_ADMIN)) {
+                       return invalfc(fc,
+                                      "Turning off swap in unprivileged tmpfs mounts unsupported");
+               }
+               ctx->noswap = true;
+               ctx->seen |= SHMEM_SEEN_NOSWAP;
+               break;
         }
         return 0;
  
@@ -3661,6 +3683,14 @@ static int shmem_reconfigure(struct fs_context *fc)
                 err = "Current inum too high to switch to 32-bit inums";
                 goto out;
         }
+       if ((ctx->seen & SHMEM_SEEN_NOSWAP) && ctx->noswap && !sbinfo->noswap) {
+               err = "Cannot disable swap on remount";
+               goto out;
+       }
+       if (!(ctx->seen & SHMEM_SEEN_NOSWAP) && !ctx->noswap && sbinfo->noswap) {
+               err = "Cannot enable swap on remount if it was disabled on first mount";
+               goto out;
+       }
  
         if (ctx->seen & SHMEM_SEEN_HUGE)
                 sbinfo->huge = ctx->huge;
@@ -3681,6 +3711,10 @@ static int shmem_reconfigure(struct fs_context *fc)
                 sbinfo->mpol = ctx->mpol;       /* transfers initial ref */
                 ctx->mpol = NULL;
         }
+
+       if (ctx->noswap)
+               sbinfo->noswap = true;
+
         raw_spin_unlock(&sbinfo->stat_lock);
         mpol_put(mpol);
         return 0;
@@ -3735,6 +3769,8 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root)
                 seq_printf(seq, ",huge=%s", shmem_format_huge(sbinfo->huge));
  #endif
         shmem_show_mpol(seq, sbinfo->mpol);
+       if (sbinfo->noswap)
+               seq_printf(seq, ",noswap");
         return 0;
  }
  
@@ -3778,6 +3814,7 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
                         ctx->inodes = shmem_default_max_inodes();
                 if (!(ctx->seen & SHMEM_SEEN_INUMS))
                         ctx->full_inums = IS_ENABLED(CONFIG_TMPFS_INODE64);
+               sbinfo->noswap = ctx->noswap;
         } else {
                 sb->s_flags |= SB_NOUSER;
         }