Merge branch 'mm-hotfixes-stable' into mm-stable

[linux-2.6-block.git] / mm / huge_memory.c
diff --git a/mm/huge_memory.c b/mm/huge_memory.c

index f42bb51e023a034ea774ae84c59810c95ca348db..7bf2299cb24bd37d17d1bb5d4f0d5cabd0f19c44 100644 (file)
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -70,9 +70,8 @@ static atomic_t huge_zero_refcount;
  struct page *huge_zero_page __read_mostly;
  unsigned long huge_zero_pfn __read_mostly = ~0UL;
  
-bool hugepage_vma_check(struct vm_area_struct *vma,
-                       unsigned long vm_flags,
-                       bool smaps, bool in_pf)
+bool hugepage_vma_check(struct vm_area_struct *vma, unsigned long vm_flags,
+                       bool smaps, bool in_pf, bool enforce_sysfs)
  {
         if (!vma->vm_mm)                /* vdso */
                 return false;
@@ -121,11 +120,10 @@ bool hugepage_vma_check(struct vm_area_struct *vma,
         if (!in_pf && shmem_file(vma->vm_file))
                 return shmem_huge_enabled(vma);
  
-       if (!hugepage_flags_enabled())
-               return false;
-
-       /* THP settings require madvise. */
-       if (!(vm_flags & VM_HUGEPAGE) && !hugepage_flags_always())
+       /* Enforce sysfs THP requirements as necessary */
+       if (enforce_sysfs &&
+           (!hugepage_flags_enabled() || (!(vm_flags & VM_HUGEPAGE) &&
+                                          !hugepage_flags_always())))
                 return false;
  
         /* Only regular file is valid */
@@ -772,8 +770,7 @@ static void set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
                 return;
         entry = mk_pmd(zero_page, vma->vm_page_prot);
         entry = pmd_mkhuge(entry);
-       if (pgtable)
-               pgtable_trans_huge_deposit(mm, pmd, pgtable);
+       pgtable_trans_huge_deposit(mm, pmd, pgtable);
         set_pmd_at(mm, haddr, pmd, entry);
         mm_inc_nr_ptes(mm);
  }
@@ -1479,7 +1476,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
         struct page *page;
         unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
         int page_nid = NUMA_NO_NODE;
-       int target_nid, last_cpupid = -1;
+       int target_nid, last_cpupid = (-1 & LAST_CPUPID_MASK);
         bool migrated = false;
         bool was_writable = pmd_savedwrite(oldpmd);
         int flags = 0;
@@ -1500,7 +1497,12 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
                 flags |= TNF_NO_GROUP;
  
         page_nid = page_to_nid(page);
-       last_cpupid = page_cpupid_last(page);
+       /*
+        * For memory tiering mode, cpupid of slow memory page is used
+        * to record page access time.  So use default value.
+        */
+       if (node_is_toptier(page_nid))
+               last_cpupid = page_cpupid_last(page);
         target_nid = numa_migrate_prep(page, vma, haddr, page_nid,
                                        &flags);
  
@@ -1824,6 +1826,7 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
  
         if (prot_numa) {
                 struct page *page;
+               bool toptier;
                 /*
                  * Avoid trapping faults against the zero page. The read-only
                  * data is likely to be read-cached on the local CPU and
@@ -1836,13 +1839,18 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
                         goto unlock;
  
                 page = pmd_page(*pmd);
+               toptier = node_is_toptier(page_to_nid(page));
                 /*
                  * Skip scanning top tier node if normal numa
                  * balancing is disabled
                  */
                 if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) &&
-                   node_is_toptier(page_to_nid(page)))
+                   toptier)
                         goto unlock;
+
+               if (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING &&
+                   !toptier)
+                       xchg_page_access_time(page, jiffies_to_msecs(jiffies));
         }
         /*
          * In case prot_numa, we are under mmap_read_lock(mm). It's critical
@@ -2140,6 +2148,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
                  *
                  * In case we cannot clear PageAnonExclusive(), split the PMD
                  * only and let try_to_migrate_one() fail later.
+                *
+                * See page_try_share_anon_rmap(): invalidate PMD first.
                  */
                 anon_exclusive = PageAnon(page) && PageAnonExclusive(page);
                 if (freeze && anon_exclusive && page_try_share_anon_rmap(page))
@@ -2288,25 +2298,11 @@ out:
  void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
                 bool freeze, struct folio *folio)
  {
-       pgd_t *pgd;
-       p4d_t *p4d;
-       pud_t *pud;
-       pmd_t *pmd;
-
-       pgd = pgd_offset(vma->vm_mm, address);
-       if (!pgd_present(*pgd))
-               return;
+       pmd_t *pmd = mm_find_pmd(vma->vm_mm, address);
  
-       p4d = p4d_offset(pgd, address);
-       if (!p4d_present(*p4d))
+       if (!pmd)
                 return;
  
-       pud = pud_offset(p4d, address);
-       if (!pud_present(*pud))
-               return;
-
-       pmd = pmd_offset(pud, address);
-
         __split_huge_pmd(vma, pmd, address, freeze, folio);
  }
  
@@ -2649,6 +2645,8 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
                 mapping = NULL;
                 anon_vma_lock_write(anon_vma);
         } else {
+               gfp_t gfp;
+
                 mapping = head->mapping;
  
                 /* Truncated ? */
@@ -2657,8 +2655,16 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
                         goto out;
                 }
  
-               xas_split_alloc(&xas, head, compound_order(head),
-                               mapping_gfp_mask(mapping) & GFP_RECLAIM_MASK);
+               gfp = current_gfp_context(mapping_gfp_mask(mapping) &
+                                                       GFP_RECLAIM_MASK);
+
+               if (folio_test_private(folio) &&
+                               !filemap_release_folio(folio, gfp)) {
+                       ret = -EBUSY;
+                       goto out;
+               }
+
+               xas_split_alloc(&xas, head, compound_order(head), gfp);
                 if (xas_error(&xas)) {
                         ret = xas_error(&xas);
                         goto out;
@@ -3175,6 +3181,7 @@ int set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw,
         flush_cache_range(vma, address, address + HPAGE_PMD_SIZE);
         pmdval = pmdp_invalidate(vma, address, pvmw->pmd);
  
+       /* See page_try_share_anon_rmap(): invalidate PMD first. */
         anon_exclusive = PageAnon(page) && PageAnonExclusive(page);
         if (anon_exclusive && page_try_share_anon_rmap(page)) {
                 set_pmd_at(mm, address, pvmw->pmd, pmdval);