| 1 | /* |
| 2 | * PPC Huge TLB Page Support for Kernel. |
| 3 | * |
| 4 | * Copyright (C) 2003 David Gibson, IBM Corporation. |
| 5 | * Copyright (C) 2011 Becky Bruce, Freescale Semiconductor |
| 6 | * |
| 7 | * Based on the IA-32 version: |
| 8 | * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> |
| 9 | */ |
| 10 | |
| 11 | #include <linux/mm.h> |
| 12 | #include <linux/io.h> |
| 13 | #include <linux/slab.h> |
| 14 | #include <linux/hugetlb.h> |
| 15 | #include <linux/export.h> |
| 16 | #include <linux/of_fdt.h> |
| 17 | #include <linux/memblock.h> |
| 18 | #include <linux/moduleparam.h> |
| 19 | #include <linux/swap.h> |
| 20 | #include <linux/swapops.h> |
| 21 | #include <linux/kmemleak.h> |
| 22 | #include <asm/pgalloc.h> |
| 23 | #include <asm/tlb.h> |
| 24 | #include <asm/setup.h> |
| 25 | #include <asm/hugetlb.h> |
| 26 | #include <asm/pte-walk.h> |
| 27 | #include <asm/firmware.h> |
| 28 | |
| 29 | bool hugetlb_disabled = false; |
| 30 | |
| 31 | #define PTE_T_ORDER (__builtin_ffs(sizeof(pte_basic_t)) - \ |
| 32 | __builtin_ffs(sizeof(void *))) |
| 33 | |
| 34 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz) |
| 35 | { |
| 36 | /* |
| 37 | * Only called for hugetlbfs pages, hence can ignore THP and the |
| 38 | * irq disabled walk. |
| 39 | */ |
| 40 | return __find_linux_pte(mm->pgd, addr, NULL, NULL); |
| 41 | } |
| 42 | |
| 43 | pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, |
| 44 | unsigned long addr, unsigned long sz) |
| 45 | { |
| 46 | p4d_t *p4d; |
| 47 | pud_t *pud; |
| 48 | pmd_t *pmd; |
| 49 | |
| 50 | addr &= ~(sz - 1); |
| 51 | |
| 52 | p4d = p4d_offset(pgd_offset(mm, addr), addr); |
| 53 | if (!mm_pud_folded(mm) && sz >= P4D_SIZE) |
| 54 | return (pte_t *)p4d; |
| 55 | |
| 56 | pud = pud_alloc(mm, p4d, addr); |
| 57 | if (!pud) |
| 58 | return NULL; |
| 59 | if (!mm_pmd_folded(mm) && sz >= PUD_SIZE) |
| 60 | return (pte_t *)pud; |
| 61 | |
| 62 | pmd = pmd_alloc(mm, pud, addr); |
| 63 | if (!pmd) |
| 64 | return NULL; |
| 65 | |
| 66 | if (sz >= PMD_SIZE) { |
| 67 | /* On 8xx, all hugepages are handled as contiguous PTEs */ |
| 68 | if (IS_ENABLED(CONFIG_PPC_8xx)) { |
| 69 | int i; |
| 70 | |
| 71 | for (i = 0; i < sz / PMD_SIZE; i++) { |
| 72 | if (!pte_alloc_huge(mm, pmd + i, addr)) |
| 73 | return NULL; |
| 74 | } |
| 75 | } |
| 76 | return (pte_t *)pmd; |
| 77 | } |
| 78 | |
| 79 | return pte_alloc_huge(mm, pmd, addr); |
| 80 | } |
| 81 | |
| 82 | #ifdef CONFIG_PPC_BOOK3S_64 |
| 83 | /* |
| 84 | * Tracks gpages after the device tree is scanned and before the |
| 85 | * huge_boot_pages list is ready on pseries. |
| 86 | */ |
| 87 | #define MAX_NUMBER_GPAGES 1024 |
| 88 | __initdata static u64 gpage_freearray[MAX_NUMBER_GPAGES]; |
| 89 | __initdata static unsigned nr_gpages; |
| 90 | |
| 91 | /* |
| 92 | * Build list of addresses of gigantic pages. This function is used in early |
| 93 | * boot before the buddy allocator is setup. |
| 94 | */ |
| 95 | void __init pseries_add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) |
| 96 | { |
| 97 | if (!addr) |
| 98 | return; |
| 99 | while (number_of_pages > 0) { |
| 100 | gpage_freearray[nr_gpages] = addr; |
| 101 | nr_gpages++; |
| 102 | number_of_pages--; |
| 103 | addr += page_size; |
| 104 | } |
| 105 | } |
| 106 | |
| 107 | static int __init pseries_alloc_bootmem_huge_page(struct hstate *hstate) |
| 108 | { |
| 109 | struct huge_bootmem_page *m; |
| 110 | if (nr_gpages == 0) |
| 111 | return 0; |
| 112 | m = phys_to_virt(gpage_freearray[--nr_gpages]); |
| 113 | gpage_freearray[nr_gpages] = 0; |
| 114 | list_add(&m->list, &huge_boot_pages[0]); |
| 115 | m->hstate = hstate; |
| 116 | m->flags = 0; |
| 117 | return 1; |
| 118 | } |
| 119 | |
| 120 | bool __init hugetlb_node_alloc_supported(void) |
| 121 | { |
| 122 | return false; |
| 123 | } |
| 124 | #endif |
| 125 | |
| 126 | |
| 127 | int __init alloc_bootmem_huge_page(struct hstate *h, int nid) |
| 128 | { |
| 129 | |
| 130 | #ifdef CONFIG_PPC_BOOK3S_64 |
| 131 | if (firmware_has_feature(FW_FEATURE_LPAR) && !radix_enabled()) |
| 132 | return pseries_alloc_bootmem_huge_page(h); |
| 133 | #endif |
| 134 | return __alloc_bootmem_huge_page(h, nid); |
| 135 | } |
| 136 | |
| 137 | bool __init arch_hugetlb_valid_size(unsigned long size) |
| 138 | { |
| 139 | int shift = __ffs(size); |
| 140 | int mmu_psize; |
| 141 | |
| 142 | /* Check that it is a page size supported by the hardware and |
| 143 | * that it fits within pagetable and slice limits. */ |
| 144 | if (size <= PAGE_SIZE || !is_power_of_2(size)) |
| 145 | return false; |
| 146 | |
| 147 | mmu_psize = check_and_get_huge_psize(shift); |
| 148 | if (mmu_psize < 0) |
| 149 | return false; |
| 150 | |
| 151 | BUG_ON(mmu_psize_defs[mmu_psize].shift != shift); |
| 152 | |
| 153 | return true; |
| 154 | } |
| 155 | |
| 156 | static int __init add_huge_page_size(unsigned long long size) |
| 157 | { |
| 158 | int shift = __ffs(size); |
| 159 | |
| 160 | if (!arch_hugetlb_valid_size((unsigned long)size)) |
| 161 | return -EINVAL; |
| 162 | |
| 163 | hugetlb_add_hstate(shift - PAGE_SHIFT); |
| 164 | return 0; |
| 165 | } |
| 166 | |
| 167 | static int __init hugetlbpage_init(void) |
| 168 | { |
| 169 | bool configured = false; |
| 170 | int psize; |
| 171 | |
| 172 | if (hugetlb_disabled) { |
| 173 | pr_info("HugeTLB support is disabled!\n"); |
| 174 | return 0; |
| 175 | } |
| 176 | |
| 177 | if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !radix_enabled() && |
| 178 | !mmu_has_feature(MMU_FTR_16M_PAGE)) |
| 179 | return -ENODEV; |
| 180 | |
| 181 | for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { |
| 182 | unsigned shift; |
| 183 | |
| 184 | if (!mmu_psize_defs[psize].shift) |
| 185 | continue; |
| 186 | |
| 187 | shift = mmu_psize_to_shift(psize); |
| 188 | |
| 189 | if (add_huge_page_size(1ULL << shift) < 0) |
| 190 | continue; |
| 191 | |
| 192 | configured = true; |
| 193 | } |
| 194 | |
| 195 | if (!configured) |
| 196 | pr_info("Failed to initialize. Disabling HugeTLB"); |
| 197 | |
| 198 | return 0; |
| 199 | } |
| 200 | |
| 201 | arch_initcall(hugetlbpage_init); |
| 202 | |
| 203 | void __init gigantic_hugetlb_cma_reserve(void) |
| 204 | { |
| 205 | unsigned long order = 0; |
| 206 | |
| 207 | if (radix_enabled()) |
| 208 | order = PUD_SHIFT - PAGE_SHIFT; |
| 209 | else if (!firmware_has_feature(FW_FEATURE_LPAR) && mmu_psize_defs[MMU_PAGE_16G].shift) |
| 210 | /* |
| 211 | * For pseries we do use ibm,expected#pages for reserving 16G pages. |
| 212 | */ |
| 213 | order = mmu_psize_to_shift(MMU_PAGE_16G) - PAGE_SHIFT; |
| 214 | |
| 215 | if (order) |
| 216 | hugetlb_cma_reserve(order); |
| 217 | } |