mm: memcontrol: skip moving non-present pages that are mapped elsewhere

author Johannes Weiner <hannes@cmpxchg.org>

Tue, 6 Dec 2022 17:13:39 +0000 (18:13 +0100)

committer Andrew Morton <akpm@linux-foundation.org>

Thu, 19 Jan 2023 01:12:42 +0000 (17:12 -0800)
author Johannes Weiner <hannes@cmpxchg.org>
Tue, 6 Dec 2022 17:13:39 +0000 (18:13 +0100)
committer Andrew Morton <akpm@linux-foundation.org>
Thu, 19 Jan 2023 01:12:42 +0000 (17:12 -0800)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index ab457f0394ab6eeed8bf33c84ee9943586844178..a698a2b6523b58be398e9c350e5bfb95d91e2e1d 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5692,7 +5692,7 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
   * @from: mem_cgroup which the page is moved from.
   * @to:        mem_cgroup which the page is moved to. @from != @to.
   *
- * The caller must make sure the page is not on LRU (isolate_page() is useful.)
+ * The page must be locked and not on the LRU.
   *
   * This function doesn't do "charge" to new cgroup and doesn't do "uncharge"
   * from old cgroup.
@@ -5709,20 +5709,13 @@ static int mem_cgroup_move_account(struct page *page,
         int nid, ret;
  
         VM_BUG_ON(from == to);
+       VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
         VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
         VM_BUG_ON(compound && !folio_test_large(folio));
  
-       /*
-        * Prevent mem_cgroup_migrate() from looking at
-        * page's memory cgroup of its source page while we change it.
-        */
-       ret = -EBUSY;
-       if (!folio_trylock(folio))
-               goto out;
-
         ret = -EINVAL;
         if (folio_memcg(folio) != from)
-               goto out_unlock;
+               goto out;
  
         pgdat = folio_pgdat(folio);
         from_vec = mem_cgroup_lruvec(from, pgdat);
@@ -5809,8 +5802,6 @@ static int mem_cgroup_move_account(struct page *page,
         mem_cgroup_charge_statistics(from, -nr_pages);
         memcg_check_events(from, nid);
         local_irq_enable();
-out_unlock:
-       folio_unlock(folio);
  out:
         return ret;
  }
@@ -5859,6 +5850,29 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
         else if (is_swap_pte(ptent))
                 page = mc_handle_swap_pte(vma, ptent, &ent);
  
+       if (target && page) {
+               if (!trylock_page(page)) {
+                       put_page(page);
+                       return ret;
+               }
+               /*
+                * page_mapped() must be stable during the move. This
+                * pte is locked, so if it's present, the page cannot
+                * become unmapped. If it isn't, we have only partial
+                * control over the mapped state: the page lock will
+                * prevent new faults against pagecache and swapcache,
+                * so an unmapped page cannot become mapped. However,
+                * if the page is already mapped elsewhere, it can
+                * unmap, and there is nothing we can do about it.
+                * Alas, skip moving the page in this case.
+                */
+               if (!pte_present(ptent) && page_mapped(page)) {
+                       unlock_page(page);
+                       put_page(page);
+                       return ret;
+               }
+       }
+
         if (!page && !ent.val)
                 return ret;
         if (page) {
@@ -5875,8 +5889,11 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
                         if (target)
                                 target->page = page;
                 }
-               if (!ret || !target)
+               if (!ret || !target) {
+                       if (target)
+                               unlock_page(page);
                         put_page(page);
+               }
         }
         /*
          * There is a swap entry and a page doesn't exist or isn't charged.
@@ -5916,6 +5933,10 @@ static enum mc_target_type get_mctgt_type_thp(struct vm_area_struct *vma,
                 ret = MC_TARGET_PAGE;
                 if (target) {
                         get_page(page);
+                       if (!trylock_page(page)) {
+                               put_page(page);
+                               return MC_TARGET_NONE;
+                       }
                         target->page = page;
                 }
         }
@@ -6154,6 +6175,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
                                 }
                                 putback_lru_page(page);
                         }
+                       unlock_page(page);
                         put_page(page);
                 } else if (target_type == MC_TARGET_DEVICE) {
                         page = target.page;
@@ -6162,6 +6184,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
                                 mc.precharge -= HPAGE_PMD_NR;
                                 mc.moved_charge += HPAGE_PMD_NR;
                         }
+                       unlock_page(page);
                         put_page(page);
                 }
                 spin_unlock(ptl);
@@ -6204,7 +6227,8 @@ retry:
                         }
                         if (!device)
                                 putback_lru_page(page);
-put:                   /* get_mctgt_type() gets the page */
+put:                   /* get_mctgt_type() gets & locks the page */
+                       unlock_page(page);
                         put_page(page);
                         break;
                 case MC_TARGET_SWAP:
author	Johannes Weiner <hannes@cmpxchg.org>
	Tue, 6 Dec 2022 17:13:39 +0000 (18:13 +0100)
committer	Andrew Morton <akpm@linux-foundation.org>
	Thu, 19 Jan 2023 01:12:42 +0000 (17:12 -0800)