mm/numa: no task_numa_fault() call if PMD is changed
authorZi Yan <ziy@nvidia.com>
Fri, 9 Aug 2024 14:59:05 +0000 (10:59 -0400)
committerAndrew Morton <akpm@linux-foundation.org>
Fri, 16 Aug 2024 05:16:15 +0000 (22:16 -0700)
When handling a numa page fault, task_numa_fault() should be called by a
process that restores the page table of the faulted folio to avoid
duplicated stats counting.  Commit c5b5a3dd2c1f ("mm: thp: refactor NUMA
fault handling") restructured do_huge_pmd_numa_page() and did not avoid
task_numa_fault() call in the second page table check after a numa
migration failure.  Fix it by making all !pmd_same() return immediately.

This issue can cause task_numa_fault() being called more than necessary
and lead to unexpected numa balancing results (It is hard to tell whether
the issue will cause positive or negative performance impact due to
duplicated numa fault counting).

Link: https://lkml.kernel.org/r/20240809145906.1513458-3-ziy@nvidia.com
Fixes: c5b5a3dd2c1f ("mm: thp: refactor NUMA fault handling")
Reported-by: "Huang, Ying" <ying.huang@intel.com>
Closes: https://lore.kernel.org/linux-mm/87zfqfw0yw.fsf@yhuang6-desk2.ccr.corp.intel.com/
Signed-off-by: Zi Yan <ziy@nvidia.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Yang Shi <shy828301@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
mm/huge_memory.c

index f4be468e06a49a7ad25e5ea3af0e74fcf1c2a678..67c86a5d64a6a9825092c54ba801c91fa30121db 100644 (file)
@@ -1685,7 +1685,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
        vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
        if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) {
                spin_unlock(vmf->ptl);
-               goto out;
+               return 0;
        }
 
        pmd = pmd_modify(oldpmd, vma->vm_page_prot);
@@ -1728,22 +1728,16 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
        if (!migrate_misplaced_folio(folio, vma, target_nid)) {
                flags |= TNF_MIGRATED;
                nid = target_nid;
-       } else {
-               flags |= TNF_MIGRATE_FAIL;
-               vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
-               if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) {
-                       spin_unlock(vmf->ptl);
-                       goto out;
-               }
-               goto out_map;
-       }
-
-out:
-       if (nid != NUMA_NO_NODE)
                task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags);
+               return 0;
+       }
 
-       return 0;
-
+       flags |= TNF_MIGRATE_FAIL;
+       vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
+       if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) {
+               spin_unlock(vmf->ptl);
+               return 0;
+       }
 out_map:
        /* Restore the PMD */
        pmd = pmd_modify(oldpmd, vma->vm_page_prot);
@@ -1753,7 +1747,10 @@ out_map:
        set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd);
        update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
        spin_unlock(vmf->ptl);
-       goto out;
+
+       if (nid != NUMA_NO_NODE)
+               task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags);
+       return 0;
 }
 
 /*