}
EXPORT_SYMBOL_GPL(PageHuge);
-/*
- * PageHeadHuge() only returns true for hugetlbfs head page, but not for
- * normal or transparent huge pages.
+/**
+ * folio_test_hugetlb - Determine if the folio belongs to hugetlbfs
+ * @folio: The folio to test.
+ *
+ * Context: Any context. Caller should have a reference on the folio to
+ * prevent it from being turned into a tail page.
+ * Return: True for hugetlbfs folios, false for anon folios or folios
+ * belonging to other filesystems.
*/
-int PageHeadHuge(struct page *page_head)
+bool folio_test_hugetlb(struct folio *folio)
{
- struct folio *folio = (struct folio *)page_head;
if (!folio_test_large(folio))
- return 0;
+ return false;
return folio->_folio_dtor == HUGETLB_PAGE_DTOR;
}
-EXPORT_SYMBOL_GPL(PageHeadHuge);
+EXPORT_SYMBOL_GPL(folio_test_hugetlb);
/*
* Find and lock address space (mapping) in write mode.
pgoff_t index = page_index(page_head);
unsigned long compound_idx;
- if (compound_order(page_head) >= MAX_ORDER)
+ if (compound_order(page_head) > MAX_ORDER)
compound_idx = page_to_pfn(page) - page_to_pfn(page_head);
else
compound_idx = page - page_head;
hugetlb_register_all_nodes();
}
+#ifdef CONFIG_SYSCTL
+static void hugetlb_sysctl_init(void);
+#else
+static inline void hugetlb_sysctl_init(void) { }
+#endif
+
static int __init hugetlb_init(void)
{
int i;
hugetlb_sysfs_init();
hugetlb_cgroup_file_init();
+ hugetlb_sysctl_init();
#ifdef CONFIG_SMP
num_fault_mutexes = roundup_pow_of_two(8 * num_possible_cpus());
* The number of default huge pages (for this size) could have been
* specified as the first hugetlb parameter: hugepages=X. If so,
* then default_hstate_max_huge_pages is set. If the default huge
- * page size is gigantic (>= MAX_ORDER), then the pages must be
+ * page size is gigantic (> MAX_ORDER), then the pages must be
* allocated here from bootmem allocator.
*/
if (default_hstate_max_huge_pages) {
return ret;
}
-int hugetlb_sysctl_handler(struct ctl_table *table, int write,
+static int hugetlb_sysctl_handler(struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos)
{
}
#ifdef CONFIG_NUMA
-int hugetlb_mempolicy_sysctl_handler(struct ctl_table *table, int write,
+static int hugetlb_mempolicy_sysctl_handler(struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos)
{
return hugetlb_sysctl_handler_common(true, table, write,
}
#endif /* CONFIG_NUMA */
-int hugetlb_overcommit_handler(struct ctl_table *table, int write,
+static int hugetlb_overcommit_handler(struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos)
{
struct hstate *h = &default_hstate;
return ret;
}
+static struct ctl_table hugetlb_table[] = {
+ {
+ .procname = "nr_hugepages",
+ .data = NULL,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = hugetlb_sysctl_handler,
+ },
+#ifdef CONFIG_NUMA
+ {
+ .procname = "nr_hugepages_mempolicy",
+ .data = NULL,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = &hugetlb_mempolicy_sysctl_handler,
+ },
+#endif
+ {
+ .procname = "hugetlb_shm_group",
+ .data = &sysctl_hugetlb_shm_group,
+ .maxlen = sizeof(gid_t),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "nr_overcommit_hugepages",
+ .data = NULL,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = hugetlb_overcommit_handler,
+ },
+ { }
+};
+
+static void hugetlb_sysctl_init(void)
+{
+ register_sysctl_init("vm", hugetlb_table);
+}
#endif /* CONFIG_SYSCTL */
void hugetlb_report_meminfo(struct seq_file *m)
static void
hugetlb_install_folio(struct vm_area_struct *vma, pte_t *ptep, unsigned long addr,
- struct folio *new_folio)
+ struct folio *new_folio, pte_t old)
{
+ pte_t newpte = make_huge_pte(vma, &new_folio->page, 1);
+
__folio_mark_uptodate(new_folio);
hugepage_add_new_anon_rmap(new_folio, vma, addr);
- set_huge_pte_at(vma->vm_mm, addr, ptep, make_huge_pte(vma, &new_folio->page, 1));
+ if (userfaultfd_wp(vma) && huge_pte_uffd_wp(old))
+ newpte = huge_pte_mkuffd_wp(newpte);
+ set_huge_pte_at(vma->vm_mm, addr, ptep, newpte);
hugetlb_count_add(pages_per_huge_page(hstate_vma(vma)), vma->vm_mm);
folio_set_hugetlb_migratable(new_folio);
}
*/
;
} else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) {
- bool uffd_wp = huge_pte_uffd_wp(entry);
-
- if (!userfaultfd_wp(dst_vma) && uffd_wp)
+ if (!userfaultfd_wp(dst_vma))
entry = huge_pte_clear_uffd_wp(entry);
set_huge_pte_at(dst, addr, dst_pte, entry);
} else if (unlikely(is_hugetlb_entry_migration(entry))) {
swp_entry_t swp_entry = pte_to_swp_entry(entry);
- bool uffd_wp = huge_pte_uffd_wp(entry);
+ bool uffd_wp = pte_swp_uffd_wp(entry);
if (!is_readable_migration_entry(swp_entry) && cow) {
/*
swp_offset(swp_entry));
entry = swp_entry_to_pte(swp_entry);
if (userfaultfd_wp(src_vma) && uffd_wp)
- entry = huge_pte_mkuffd_wp(entry);
+ entry = pte_swp_mkuffd_wp(entry);
set_huge_pte_at(src, addr, src_pte, entry);
}
- if (!userfaultfd_wp(dst_vma) && uffd_wp)
+ if (!userfaultfd_wp(dst_vma))
entry = huge_pte_clear_uffd_wp(entry);
set_huge_pte_at(dst, addr, dst_pte, entry);
} else if (unlikely(is_pte_marker(entry))) {
ret = PTR_ERR(new_folio);
break;
}
- copy_user_huge_page(&new_folio->page, ptepage, addr, dst_vma,
- npages);
+ ret = copy_user_large_folio(new_folio,
+ page_folio(ptepage),
+ addr, dst_vma);
put_page(ptepage);
+ if (ret) {
+ folio_put(new_folio);
+ break;
+ }
/* Install the new hugetlb folio if src pte stable */
dst_ptl = huge_pte_lock(h, dst, dst_pte);
/* huge_ptep of dst_pte won't change as in child */
goto again;
}
- hugetlb_install_folio(dst_vma, dst_pte, addr, new_folio);
+ hugetlb_install_folio(dst_vma, dst_pte, addr,
+ new_folio, src_pte_old);
spin_unlock(src_ptl);
spin_unlock(dst_ptl);
continue;
entry = huge_pte_wrprotect(entry);
}
+ if (!userfaultfd_wp(dst_vma))
+ entry = huge_pte_clear_uffd_wp(entry);
+
set_huge_pte_at(dst, addr, dst_pte, entry);
hugetlb_count_add(npages, dst);
}
goto out_release_all;
}
- copy_user_huge_page(&new_folio->page, old_page, address, vma,
- pages_per_huge_page(h));
+ if (copy_user_large_folio(new_folio, page_folio(old_page), address, vma)) {
+ ret = VM_FAULT_HWPOISON_LARGE;
+ goto out_release_all;
+ }
__folio_mark_uptodate(new_folio);
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, haddr,
spin_lock(ptl);
ptep = hugetlb_walk(vma, haddr, huge_page_size(h));
if (likely(ptep && pte_same(huge_ptep_get(ptep), pte))) {
+ pte_t newpte = make_huge_pte(vma, &new_folio->page, !unshare);
+
/* Break COW or unshare */
huge_ptep_clear_flush(vma, haddr, ptep);
mmu_notifier_invalidate_range(mm, range.start, range.end);
page_remove_rmap(old_page, vma, true);
hugepage_add_new_anon_rmap(new_folio, vma, haddr);
- set_huge_pte_at(mm, haddr, ptep,
- make_huge_pte(vma, &new_folio->page, !unshare));
+ if (huge_pte_uffd_wp(pte))
+ newpte = huge_pte_mkuffd_wp(newpte);
+ set_huge_pte_at(mm, haddr, ptep, newpte);
folio_set_hugetlb_migratable(new_folio);
/* Make the old page be freed below */
new_folio = page_folio(old_page);
*/
new_folio = false;
folio = filemap_lock_folio(mapping, idx);
- if (!folio) {
+ if (IS_ERR(folio)) {
size = i_size_read(mapping->host) >> huge_page_shift(h);
if (idx >= size)
goto out;
vma_end_reservation(h, vma, haddr);
pagecache_folio = filemap_lock_folio(mapping, idx);
+ if (IS_ERR(pagecache_folio))
+ pagecache_folio = NULL;
}
ptl = huge_pte_lock(h, mm, ptep);
#ifdef CONFIG_USERFAULTFD
/*
- * Used by userfaultfd UFFDIO_COPY. Based on mcopy_atomic_pte with
- * modifications for huge pages.
+ * Used by userfaultfd UFFDIO_* ioctls. Based on userfaultfd's mfill_atomic_pte
+ * with modifications for hugetlb pages.
*/
-int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
- pte_t *dst_pte,
- struct vm_area_struct *dst_vma,
- unsigned long dst_addr,
- unsigned long src_addr,
- enum mcopy_atomic_mode mode,
- struct page **pagep,
- bool wp_copy)
-{
- bool is_continue = (mode == MCOPY_ATOMIC_CONTINUE);
+int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
+ struct vm_area_struct *dst_vma,
+ unsigned long dst_addr,
+ unsigned long src_addr,
+ uffd_flags_t flags,
+ struct folio **foliop)
+{
+ struct mm_struct *dst_mm = dst_vma->vm_mm;
+ bool is_continue = uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE);
+ bool wp_enabled = (flags & MFILL_ATOMIC_WP);
struct hstate *h = hstate_vma(dst_vma);
struct address_space *mapping = dst_vma->vm_file->f_mapping;
pgoff_t idx = vma_hugecache_offset(h, dst_vma, dst_addr);
if (is_continue) {
ret = -EFAULT;
folio = filemap_lock_folio(mapping, idx);
- if (!folio)
+ if (IS_ERR(folio))
goto out;
folio_in_pagecache = true;
- } else if (!*pagep) {
- /* If a page already exists, then it's UFFDIO_COPY for
+ } else if (!*foliop) {
+ /* If a folio already exists, then it's UFFDIO_COPY for
* a non-missing case. Return -EEXIST.
*/
if (vm_shared &&
goto out;
}
- ret = copy_huge_page_from_user(&folio->page,
- (const void __user *) src_addr,
- pages_per_huge_page(h), false);
+ ret = copy_folio_from_user(folio, (const void __user *) src_addr,
+ false);
/* fallback to copy_from_user outside mmap_lock */
if (unlikely(ret)) {
ret = -ENOMEM;
goto out;
}
- *pagep = &folio->page;
- /* Set the outparam pagep and return to the caller to
+ *foliop = folio;
+ /* Set the outparam foliop and return to the caller to
* copy the contents outside the lock. Don't free the
- * page.
+ * folio.
*/
goto out;
}
} else {
if (vm_shared &&
hugetlbfs_pagecache_present(h, dst_vma, dst_addr)) {
- put_page(*pagep);
+ folio_put(*foliop);
ret = -EEXIST;
- *pagep = NULL;
+ *foliop = NULL;
goto out;
}
folio = alloc_hugetlb_folio(dst_vma, dst_addr, 0);
if (IS_ERR(folio)) {
- put_page(*pagep);
+ folio_put(*foliop);
ret = -ENOMEM;
- *pagep = NULL;
+ *foliop = NULL;
+ goto out;
+ }
+ ret = copy_user_large_folio(folio, *foliop, dst_addr, dst_vma);
+ folio_put(*foliop);
+ *foliop = NULL;
+ if (ret) {
+ folio_put(folio);
goto out;
}
- copy_user_huge_page(&folio->page, *pagep, dst_addr, dst_vma,
- pages_per_huge_page(h));
- put_page(*pagep);
- *pagep = NULL;
}
/*
* For either: (1) CONTINUE on a non-shared VMA, or (2) UFFDIO_COPY
* with wp flag set, don't set pte write bit.
*/
- if (wp_copy || (is_continue && !vm_shared))
+ if (wp_enabled || (is_continue && !vm_shared))
writable = 0;
else
writable = dst_vma->vm_flags & VM_WRITE;
_dst_pte = huge_pte_mkdirty(_dst_pte);
_dst_pte = pte_mkyoung(_dst_pte);
- if (wp_copy)
+ if (wp_enabled)
_dst_pte = huge_pte_mkuffd_wp(_dst_pte);
set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);