mm/shmem, swap: fix softlockup with mTHP swapin

author Kairui Song <kasong@tencent.com>

Mon, 9 Jun 2025 17:17:51 +0000 (01:17 +0800)

committer Andrew Morton <akpm@linux-foundation.org>

Fri, 20 Jun 2025 03:48:01 +0000 (20:48 -0700)
author Kairui Song <kasong@tencent.com>
Mon, 9 Jun 2025 17:17:51 +0000 (01:17 +0800)
committer Andrew Morton <akpm@linux-foundation.org>
Fri, 20 Jun 2025 03:48:01 +0000 (20:48 -0700)
diff --git a/mm/memory.c b/mm/memory.c

index 8eba595056fe3f131fbc1f1f06698fda9395a99f..b0cda5aab3985619d70930c0db3cc941df6aa885 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4315,26 +4315,6 @@ static struct folio *__alloc_swap_folio(struct vm_fault *vmf)
  }
  
  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static inline int non_swapcache_batch(swp_entry_t entry, int max_nr)
-{
-       struct swap_info_struct *si = swp_swap_info(entry);
-       pgoff_t offset = swp_offset(entry);
-       int i;
-
-       /*
-        * While allocating a large folio and doing swap_read_folio, which is
-        * the case the being faulted pte doesn't have swapcache. We need to
-        * ensure all PTEs have no cache as well, otherwise, we might go to
-        * swap devices while the content is in swapcache.
-        */
-       for (i = 0; i < max_nr; i++) {
-               if ((si->swap_map[offset + i] & SWAP_HAS_CACHE))
-                       return i;
-       }
-
-       return i;
-}
-
  /*
   * Check if the PTEs within a range are contiguous swap entries
   * and have consistent swapcache, zeromap.
diff --git a/mm/shmem.c b/mm/shmem.c

index 0c5fb4ffa03aadf8397002f0a55f2e96b8eec5c7..3a5a65b1f41a3c41810668f134ba90686e80825b 100644 (file)
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2259,6 +2259,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
         folio = swap_cache_get_folio(swap, NULL, 0);
         order = xa_get_order(&mapping->i_pages, index);
         if (!folio) {
+               int nr_pages = 1 << order;
                 bool fallback_order0 = false;
  
                 /* Or update major stats only when swapin succeeds?? */
@@ -2272,9 +2273,12 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
                  * If uffd is active for the vma, we need per-page fault
                  * fidelity to maintain the uffd semantics, then fallback
                  * to swapin order-0 folio, as well as for zswap case.
+                * Any existing sub folio in the swap cache also blocks
+                * mTHP swapin.
                  */
                 if (order > 0 && ((vma && unlikely(userfaultfd_armed(vma))) ||
-                                 !zswap_never_enabled()))
+                                 !zswap_never_enabled() ||
+                                 non_swapcache_batch(swap, nr_pages) != nr_pages))
                         fallback_order0 = true;
  
                 /* Skip swapcache for synchronous device. */
diff --git a/mm/swap.h b/mm/swap.h

index 2269eb9df0af79564d5b55e646cfb82945a607b6..9096082a915ea273b27cc2a61d03d87869234f1e 100644 (file)
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -106,6 +106,25 @@ static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr,
                 return find_next_bit(sis->zeromap, end, start) - start;
  }
  
+static inline int non_swapcache_batch(swp_entry_t entry, int max_nr)
+{
+       struct swap_info_struct *si = swp_swap_info(entry);
+       pgoff_t offset = swp_offset(entry);
+       int i;
+
+       /*
+        * While allocating a large folio and doing mTHP swapin, we need to
+        * ensure all entries are not cached, otherwise, the mTHP folio will
+        * be in conflict with the folio in swap cache.
+        */
+       for (i = 0; i < max_nr; i++) {
+               if ((si->swap_map[offset + i] & SWAP_HAS_CACHE))
+                       return i;
+       }
+
+       return i;
+}
+
  #else /* CONFIG_SWAP */
  struct swap_iocb;
  static inline void swap_read_folio(struct folio *folio, struct swap_iocb **plug)
@@ -199,6 +218,10 @@ static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr,
         return 0;
  }
  
+static inline int non_swapcache_batch(swp_entry_t entry, int max_nr)
+{
+       return 0;
+}
  #endif /* CONFIG_SWAP */
  
  /**
author	Kairui Song <kasong@tencent.com>
	Mon, 9 Jun 2025 17:17:51 +0000 (01:17 +0800)
committer	Andrew Morton <akpm@linux-foundation.org>
	Fri, 20 Jun 2025 03:48:01 +0000 (20:48 -0700)
mm/memory.c		patch \| blob \| blame \| history
mm/shmem.c		patch \| blob \| blame \| history
mm/swap.h		patch \| blob \| blame \| history