*/
memblock_reserved_mark_noinit(virt_to_phys((void *)m + PAGE_SIZE),
huge_page_size(h) - PAGE_SIZE);
- /* Put them into a private list first because mem_map is not up yet */
+
+ /*
+ * Put them into a private list first because mem_map is not up yet.
+ *
+ * For pre-HVO to work correctly, pages need to be on the list for
+ * the node they were actually allocated from. That node may be
+ * different in the case of fallback by memblock_alloc_try_nid_raw.
+ * So, extract the actual node first.
+ */
+ if (nid == NUMA_NO_NODE)
+ node = early_pfn_to_nid(PHYS_PFN(virt_to_phys(m)));
+
INIT_LIST_HEAD(&m->list);
list_add(&m->list, &huge_boot_pages[node]);
m->hstate = h;
}
}
-static bool __init hugetlb_bootmem_page_zones_valid(int nid,
- struct huge_bootmem_page *m)
+bool __init hugetlb_bootmem_page_zones_valid(int nid,
+ struct huge_bootmem_page *m)
{
unsigned long start_pfn;
bool valid;
__hugetlb_vmemmap_optimize_folios(h, folio_list, true);
}
+#ifdef CONFIG_SPARSEMEM_VMEMMAP_PREINIT
+
+/* Return true of a bootmem allocated HugeTLB page should be pre-HVO-ed */
+static bool vmemmap_should_optimize_bootmem_page(struct huge_bootmem_page *m)
+{
+ unsigned long section_size, psize, pmd_vmemmap_size;
+ phys_addr_t paddr;
+
+ if (!READ_ONCE(vmemmap_optimize_enabled))
+ return false;
+
+ if (!hugetlb_vmemmap_optimizable(m->hstate))
+ return false;
+
+ psize = huge_page_size(m->hstate);
+ paddr = virt_to_phys(m);
+
+ /*
+ * Pre-HVO only works if the bootmem huge page
+ * is aligned to the section size.
+ */
+ section_size = (1UL << PA_SECTION_SHIFT);
+ if (!IS_ALIGNED(paddr, section_size) ||
+ !IS_ALIGNED(psize, section_size))
+ return false;
+
+ /*
+ * The pre-HVO code does not deal with splitting PMDS,
+ * so the bootmem page must be aligned to the number
+ * of base pages that can be mapped with one vmemmap PMD.
+ */
+ pmd_vmemmap_size = (PMD_SIZE / (sizeof(struct page))) << PAGE_SHIFT;
+ if (!IS_ALIGNED(paddr, pmd_vmemmap_size) ||
+ !IS_ALIGNED(psize, pmd_vmemmap_size))
+ return false;
+
+ return true;
+}
+
+/*
+ * Initialize memmap section for a gigantic page, HVO-style.
+ */
+void __init hugetlb_vmemmap_init_early(int nid)
+{
+ unsigned long psize, paddr, section_size;
+ unsigned long ns, i, pnum, pfn, nr_pages;
+ unsigned long start, end;
+ struct huge_bootmem_page *m = NULL;
+ void *map;
+
+ /*
+ * Noting to do if bootmem pages were not allocated
+ * early in boot, or if HVO wasn't enabled in the
+ * first place.
+ */
+ if (!hugetlb_bootmem_allocated())
+ return;
+
+ if (!READ_ONCE(vmemmap_optimize_enabled))
+ return;
+
+ section_size = (1UL << PA_SECTION_SHIFT);
+
+ list_for_each_entry(m, &huge_boot_pages[nid], list) {
+ if (!vmemmap_should_optimize_bootmem_page(m))
+ continue;
+
+ nr_pages = pages_per_huge_page(m->hstate);
+ psize = nr_pages << PAGE_SHIFT;
+ paddr = virt_to_phys(m);
+ pfn = PHYS_PFN(paddr);
+ map = pfn_to_page(pfn);
+ start = (unsigned long)map;
+ end = start + nr_pages * sizeof(struct page);
+
+ if (vmemmap_populate_hvo(start, end, nid,
+ HUGETLB_VMEMMAP_RESERVE_SIZE) < 0)
+ continue;
+
+ memmap_boot_pages_add(HUGETLB_VMEMMAP_RESERVE_SIZE / PAGE_SIZE);
+
+ pnum = pfn_to_section_nr(pfn);
+ ns = psize / section_size;
+
+ for (i = 0; i < ns; i++) {
+ sparse_init_early_section(nid, map, pnum,
+ SECTION_IS_VMEMMAP_PREINIT);
+ map += section_map_size();
+ pnum++;
+ }
+
+ m->flags |= HUGE_BOOTMEM_HVO;
+ }
+}
+
+void __init hugetlb_vmemmap_init_late(int nid)
+{
+ struct huge_bootmem_page *m, *tm;
+ unsigned long phys, nr_pages, start, end;
+ unsigned long pfn, nr_mmap;
+ struct hstate *h;
+ void *map;
+
+ if (!hugetlb_bootmem_allocated())
+ return;
+
+ if (!READ_ONCE(vmemmap_optimize_enabled))
+ return;
+
+ list_for_each_entry_safe(m, tm, &huge_boot_pages[nid], list) {
+ if (!(m->flags & HUGE_BOOTMEM_HVO))
+ continue;
+
+ phys = virt_to_phys(m);
+ h = m->hstate;
+ pfn = PHYS_PFN(phys);
+ nr_pages = pages_per_huge_page(h);
+
+ if (!hugetlb_bootmem_page_zones_valid(nid, m)) {
+ /*
+ * Oops, the hugetlb page spans multiple zones.
+ * Remove it from the list, and undo HVO.
+ */
+ list_del(&m->list);
+
+ map = pfn_to_page(pfn);
+
+ start = (unsigned long)map;
+ end = start + nr_pages * sizeof(struct page);
+
+ vmemmap_undo_hvo(start, end, nid,
+ HUGETLB_VMEMMAP_RESERVE_SIZE);
+ nr_mmap = end - start - HUGETLB_VMEMMAP_RESERVE_SIZE;
+ memmap_boot_pages_add(DIV_ROUND_UP(nr_mmap, PAGE_SIZE));
+
+ memblock_phys_free(phys, huge_page_size(h));
+ continue;
+ } else
+ m->flags |= HUGE_BOOTMEM_ZONES_VALID;
+ }
+}
+#endif
+
static const struct ctl_table hugetlb_vmemmap_sysctls[] = {
{
.procname = "hugetlb_optimize_vmemmap",
#ifndef _LINUX_HUGETLB_VMEMMAP_H
#define _LINUX_HUGETLB_VMEMMAP_H
#include <linux/hugetlb.h>
+#include <linux/io.h>
+#include <linux/memblock.h>
/*
* Reserve one vmemmap page, all vmemmap addresses are mapped to it. See
void hugetlb_vmemmap_optimize_folio(const struct hstate *h, struct folio *folio);
void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list);
void hugetlb_vmemmap_optimize_bootmem_folios(struct hstate *h, struct list_head *folio_list);
+#ifdef CONFIG_SPARSEMEM_VMEMMAP_PREINIT
+void hugetlb_vmemmap_init_early(int nid);
+void hugetlb_vmemmap_init_late(int nid);
+#endif
static inline unsigned int hugetlb_vmemmap_size(const struct hstate *h)
{
}
+static inline void hugetlb_vmemmap_init_early(int nid)
+{
+}
+
+static inline void hugetlb_vmemmap_init_late(int nid)
+{
+}
+
static inline unsigned int hugetlb_vmemmap_optimizable_size(const struct hstate *h)
{
return 0;