Merge branch 'mm-hotfixes-stable' into mm-stable
[linux-2.6-block.git] / mm / huge_memory.c
index f42bb51e023a034ea774ae84c59810c95ca348db..7bf2299cb24bd37d17d1bb5d4f0d5cabd0f19c44 100644 (file)
@@ -70,9 +70,8 @@ static atomic_t huge_zero_refcount;
 struct page *huge_zero_page __read_mostly;
 unsigned long huge_zero_pfn __read_mostly = ~0UL;
 
-bool hugepage_vma_check(struct vm_area_struct *vma,
-                       unsigned long vm_flags,
-                       bool smaps, bool in_pf)
+bool hugepage_vma_check(struct vm_area_struct *vma, unsigned long vm_flags,
+                       bool smaps, bool in_pf, bool enforce_sysfs)
 {
        if (!vma->vm_mm)                /* vdso */
                return false;
@@ -121,11 +120,10 @@ bool hugepage_vma_check(struct vm_area_struct *vma,
        if (!in_pf && shmem_file(vma->vm_file))
                return shmem_huge_enabled(vma);
 
-       if (!hugepage_flags_enabled())
-               return false;
-
-       /* THP settings require madvise. */
-       if (!(vm_flags & VM_HUGEPAGE) && !hugepage_flags_always())
+       /* Enforce sysfs THP requirements as necessary */
+       if (enforce_sysfs &&
+           (!hugepage_flags_enabled() || (!(vm_flags & VM_HUGEPAGE) &&
+                                          !hugepage_flags_always())))
                return false;
 
        /* Only regular file is valid */
@@ -772,8 +770,7 @@ static void set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
                return;
        entry = mk_pmd(zero_page, vma->vm_page_prot);
        entry = pmd_mkhuge(entry);
-       if (pgtable)
-               pgtable_trans_huge_deposit(mm, pmd, pgtable);
+       pgtable_trans_huge_deposit(mm, pmd, pgtable);
        set_pmd_at(mm, haddr, pmd, entry);
        mm_inc_nr_ptes(mm);
 }
@@ -1479,7 +1476,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
        struct page *page;
        unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
        int page_nid = NUMA_NO_NODE;
-       int target_nid, last_cpupid = -1;
+       int target_nid, last_cpupid = (-1 & LAST_CPUPID_MASK);
        bool migrated = false;
        bool was_writable = pmd_savedwrite(oldpmd);
        int flags = 0;
@@ -1500,7 +1497,12 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
                flags |= TNF_NO_GROUP;
 
        page_nid = page_to_nid(page);
-       last_cpupid = page_cpupid_last(page);
+       /*
+        * For memory tiering mode, cpupid of slow memory page is used
+        * to record page access time.  So use default value.
+        */
+       if (node_is_toptier(page_nid))
+               last_cpupid = page_cpupid_last(page);
        target_nid = numa_migrate_prep(page, vma, haddr, page_nid,
                                       &flags);
 
@@ -1824,6 +1826,7 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 
        if (prot_numa) {
                struct page *page;
+               bool toptier;
                /*
                 * Avoid trapping faults against the zero page. The read-only
                 * data is likely to be read-cached on the local CPU and
@@ -1836,13 +1839,18 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
                        goto unlock;
 
                page = pmd_page(*pmd);
+               toptier = node_is_toptier(page_to_nid(page));
                /*
                 * Skip scanning top tier node if normal numa
                 * balancing is disabled
                 */
                if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) &&
-                   node_is_toptier(page_to_nid(page)))
+                   toptier)
                        goto unlock;
+
+               if (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING &&
+                   !toptier)
+                       xchg_page_access_time(page, jiffies_to_msecs(jiffies));
        }
        /*
         * In case prot_numa, we are under mmap_read_lock(mm). It's critical
@@ -2140,6 +2148,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
                 *
                 * In case we cannot clear PageAnonExclusive(), split the PMD
                 * only and let try_to_migrate_one() fail later.
+                *
+                * See page_try_share_anon_rmap(): invalidate PMD first.
                 */
                anon_exclusive = PageAnon(page) && PageAnonExclusive(page);
                if (freeze && anon_exclusive && page_try_share_anon_rmap(page))
@@ -2288,25 +2298,11 @@ out:
 void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
                bool freeze, struct folio *folio)
 {
-       pgd_t *pgd;
-       p4d_t *p4d;
-       pud_t *pud;
-       pmd_t *pmd;
-
-       pgd = pgd_offset(vma->vm_mm, address);
-       if (!pgd_present(*pgd))
-               return;
+       pmd_t *pmd = mm_find_pmd(vma->vm_mm, address);
 
-       p4d = p4d_offset(pgd, address);
-       if (!p4d_present(*p4d))
+       if (!pmd)
                return;
 
-       pud = pud_offset(p4d, address);
-       if (!pud_present(*pud))
-               return;
-
-       pmd = pmd_offset(pud, address);
-
        __split_huge_pmd(vma, pmd, address, freeze, folio);
 }
 
@@ -2649,6 +2645,8 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
                mapping = NULL;
                anon_vma_lock_write(anon_vma);
        } else {
+               gfp_t gfp;
+
                mapping = head->mapping;
 
                /* Truncated ? */
@@ -2657,8 +2655,16 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
                        goto out;
                }
 
-               xas_split_alloc(&xas, head, compound_order(head),
-                               mapping_gfp_mask(mapping) & GFP_RECLAIM_MASK);
+               gfp = current_gfp_context(mapping_gfp_mask(mapping) &
+                                                       GFP_RECLAIM_MASK);
+
+               if (folio_test_private(folio) &&
+                               !filemap_release_folio(folio, gfp)) {
+                       ret = -EBUSY;
+                       goto out;
+               }
+
+               xas_split_alloc(&xas, head, compound_order(head), gfp);
                if (xas_error(&xas)) {
                        ret = xas_error(&xas);
                        goto out;
@@ -3175,6 +3181,7 @@ int set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw,
        flush_cache_range(vma, address, address + HPAGE_PMD_SIZE);
        pmdval = pmdp_invalidate(vma, address, pvmw->pmd);
 
+       /* See page_try_share_anon_rmap(): invalidate PMD first. */
        anon_exclusive = PageAnon(page) && PageAnonExclusive(page);
        if (anon_exclusive && page_try_share_anon_rmap(page)) {
                set_pmd_at(mm, address, pvmw->pmd, pmdval);