struct page *huge_zero_page __read_mostly;
unsigned long huge_zero_pfn __read_mostly = ~0UL;
-bool hugepage_vma_check(struct vm_area_struct *vma,
- unsigned long vm_flags,
- bool smaps, bool in_pf)
+bool hugepage_vma_check(struct vm_area_struct *vma, unsigned long vm_flags,
+ bool smaps, bool in_pf, bool enforce_sysfs)
{
if (!vma->vm_mm) /* vdso */
return false;
if (!in_pf && shmem_file(vma->vm_file))
return shmem_huge_enabled(vma);
- if (!hugepage_flags_enabled())
- return false;
-
- /* THP settings require madvise. */
- if (!(vm_flags & VM_HUGEPAGE) && !hugepage_flags_always())
+ /* Enforce sysfs THP requirements as necessary */
+ if (enforce_sysfs &&
+ (!hugepage_flags_enabled() || (!(vm_flags & VM_HUGEPAGE) &&
+ !hugepage_flags_always())))
return false;
/* Only regular file is valid */
return;
entry = mk_pmd(zero_page, vma->vm_page_prot);
entry = pmd_mkhuge(entry);
- if (pgtable)
- pgtable_trans_huge_deposit(mm, pmd, pgtable);
+ pgtable_trans_huge_deposit(mm, pmd, pgtable);
set_pmd_at(mm, haddr, pmd, entry);
mm_inc_nr_ptes(mm);
}
struct page *page;
unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
int page_nid = NUMA_NO_NODE;
- int target_nid, last_cpupid = -1;
+ int target_nid, last_cpupid = (-1 & LAST_CPUPID_MASK);
bool migrated = false;
bool was_writable = pmd_savedwrite(oldpmd);
int flags = 0;
flags |= TNF_NO_GROUP;
page_nid = page_to_nid(page);
- last_cpupid = page_cpupid_last(page);
+ /*
+ * For memory tiering mode, cpupid of slow memory page is used
+ * to record page access time. So use default value.
+ */
+ if (node_is_toptier(page_nid))
+ last_cpupid = page_cpupid_last(page);
target_nid = numa_migrate_prep(page, vma, haddr, page_nid,
&flags);
if (prot_numa) {
struct page *page;
+ bool toptier;
/*
* Avoid trapping faults against the zero page. The read-only
* data is likely to be read-cached on the local CPU and
goto unlock;
page = pmd_page(*pmd);
+ toptier = node_is_toptier(page_to_nid(page));
/*
* Skip scanning top tier node if normal numa
* balancing is disabled
*/
if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) &&
- node_is_toptier(page_to_nid(page)))
+ toptier)
goto unlock;
+
+ if (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING &&
+ !toptier)
+ xchg_page_access_time(page, jiffies_to_msecs(jiffies));
}
/*
* In case prot_numa, we are under mmap_read_lock(mm). It's critical
*
* In case we cannot clear PageAnonExclusive(), split the PMD
* only and let try_to_migrate_one() fail later.
+ *
+ * See page_try_share_anon_rmap(): invalidate PMD first.
*/
anon_exclusive = PageAnon(page) && PageAnonExclusive(page);
if (freeze && anon_exclusive && page_try_share_anon_rmap(page))
void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
bool freeze, struct folio *folio)
{
- pgd_t *pgd;
- p4d_t *p4d;
- pud_t *pud;
- pmd_t *pmd;
-
- pgd = pgd_offset(vma->vm_mm, address);
- if (!pgd_present(*pgd))
- return;
+ pmd_t *pmd = mm_find_pmd(vma->vm_mm, address);
- p4d = p4d_offset(pgd, address);
- if (!p4d_present(*p4d))
+ if (!pmd)
return;
- pud = pud_offset(p4d, address);
- if (!pud_present(*pud))
- return;
-
- pmd = pmd_offset(pud, address);
-
__split_huge_pmd(vma, pmd, address, freeze, folio);
}
mapping = NULL;
anon_vma_lock_write(anon_vma);
} else {
+ gfp_t gfp;
+
mapping = head->mapping;
/* Truncated ? */
goto out;
}
- xas_split_alloc(&xas, head, compound_order(head),
- mapping_gfp_mask(mapping) & GFP_RECLAIM_MASK);
+ gfp = current_gfp_context(mapping_gfp_mask(mapping) &
+ GFP_RECLAIM_MASK);
+
+ if (folio_test_private(folio) &&
+ !filemap_release_folio(folio, gfp)) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ xas_split_alloc(&xas, head, compound_order(head), gfp);
if (xas_error(&xas)) {
ret = xas_error(&xas);
goto out;
flush_cache_range(vma, address, address + HPAGE_PMD_SIZE);
pmdval = pmdp_invalidate(vma, address, pvmw->pmd);
+ /* See page_try_share_anon_rmap(): invalidate PMD first. */
anon_exclusive = PageAnon(page) && PageAnonExclusive(page);
if (anon_exclusive && page_try_share_anon_rmap(page)) {
set_pmd_at(mm, address, pvmw->pmd, pmdval);