Commit | Line | Data |
---|---|---|
ab537dca AK |
1 | #ifndef _ASM_POWERPC_BOOK3S_64_HASH_64K_H |
2 | #define _ASM_POWERPC_BOOK3S_64_HASH_64K_H | |
3 | ||
ab537dca | 4 | #define PTE_INDEX_SIZE 8 |
368ced78 AK |
5 | #define PMD_INDEX_SIZE 5 |
6 | #define PUD_INDEX_SIZE 5 | |
ab537dca AK |
7 | #define PGD_INDEX_SIZE 12 |
8 | ||
9 | #define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) | |
10 | #define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) | |
368ced78 | 11 | #define PTRS_PER_PUD (1 << PUD_INDEX_SIZE) |
ab537dca AK |
12 | #define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) |
13 | ||
14 | /* With 4k base page size, hugepage PTEs go at the PMD level */ | |
15 | #define MIN_HUGEPTE_SHIFT PAGE_SHIFT | |
16 | ||
17 | /* PMD_SHIFT determines what a second-level page table entry can map */ | |
18 | #define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE) | |
19 | #define PMD_SIZE (1UL << PMD_SHIFT) | |
20 | #define PMD_MASK (~(PMD_SIZE-1)) | |
21 | ||
368ced78 AK |
22 | /* PUD_SHIFT determines what a third-level page table entry can map */ |
23 | #define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) | |
24 | #define PUD_SIZE (1UL << PUD_SHIFT) | |
25 | #define PUD_MASK (~(PUD_SIZE-1)) | |
26 | ||
27 | /* PGDIR_SHIFT determines what a fourth-level page table entry can map */ | |
28 | #define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE) | |
ab537dca AK |
29 | #define PGDIR_SIZE (1UL << PGDIR_SHIFT) |
30 | #define PGDIR_MASK (~(PGDIR_SIZE-1)) | |
31 | ||
945537df AK |
32 | #define H_PAGE_COMBO 0x00001000 /* this is a combo 4k page */ |
33 | #define H_PAGE_4K_PFN 0x00002000 /* PFN is for a single 4k page */ | |
bf680d51 | 34 | /* |
945537df AK |
35 | * We need to differentiate between explicit huge page and THP huge |
36 | * page, since THP huge page also need to track real subpage details | |
16c2d476 | 37 | */ |
945537df AK |
38 | #define H_PAGE_THP_HUGE H_PAGE_4K_PFN |
39 | ||
40 | /* | |
41 | * Used to track subpage group valid if H_PAGE_COMBO is set | |
42 | * This overloads H_PAGE_F_GIX and H_PAGE_F_SECOND | |
43 | */ | |
44 | #define H_PAGE_COMBO_VALID (H_PAGE_F_GIX | H_PAGE_F_SECOND) | |
3c726f8d BH |
45 | |
46 | /* PTE flags to conserve for HPTE identification */ | |
945537df AK |
47 | #define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_F_SECOND | \ |
48 | H_PAGE_F_GIX | H_PAGE_HASHPTE | H_PAGE_COMBO) | |
62607bc6 AK |
49 | /* |
50 | * we support 16 fragments per PTE page of 64K size. | |
51 | */ | |
52 | #define PTE_FRAG_NR 16 | |
53 | /* | |
54 | * We use a 2K PTE page fragment and another 2K for storing | |
55 | * real_pte_t hash index | |
56 | */ | |
57 | #define PTE_FRAG_SIZE_SHIFT 12 | |
58 | #define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT) | |
59 | ||
368ced78 AK |
60 | /* Bits to mask out from a PMD to get to the PTE page */ |
61 | #define PMD_MASKED_BITS 0xc0000000000000ffUL | |
62 | /* Bits to mask out from a PUD to get to the PMD page */ | |
63 | #define PUD_MASKED_BITS 0xc0000000000000ffUL | |
64 | /* Bits to mask out from a PGD to get to the PUD page */ | |
65 | #define PGD_MASKED_BITS 0xc0000000000000ffUL | |
3c726f8d | 66 | |
c605782b | 67 | #ifndef __ASSEMBLY__ |
96270b1f | 68 | #include <asm/errno.h> |
3c726f8d | 69 | |
c605782b BH |
70 | /* |
71 | * With 64K pages on hash table, we have a special PTE format that | |
72 | * uses a second "half" of the page table to encode sub-page information | |
73 | * in order to deal with 64K made of 4K HW pages. Thus we override the | |
74 | * generic accessors and iterators here | |
75 | */ | |
85c1fafd AK |
76 | #define __real_pte __real_pte |
77 | static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep) | |
78 | { | |
79 | real_pte_t rpte; | |
506b863c | 80 | unsigned long *hidxp; |
85c1fafd AK |
81 | |
82 | rpte.pte = pte; | |
83 | rpte.hidx = 0; | |
945537df | 84 | if (pte_val(pte) & H_PAGE_COMBO) { |
85c1fafd | 85 | /* |
945537df | 86 | * Make sure we order the hidx load against the H_PAGE_COMBO |
85c1fafd AK |
87 | * check. The store side ordering is done in __hash_page_4K |
88 | */ | |
89 | smp_rmb(); | |
506b863c AK |
90 | hidxp = (unsigned long *)(ptep + PTRS_PER_PTE); |
91 | rpte.hidx = *hidxp; | |
85c1fafd AK |
92 | } |
93 | return rpte; | |
94 | } | |
95 | ||
96 | static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index) | |
97 | { | |
945537df | 98 | if ((pte_val(rpte.pte) & H_PAGE_COMBO)) |
85c1fafd | 99 | return (rpte.hidx >> (index<<2)) & 0xf; |
945537df | 100 | return (pte_val(rpte.pte) >> H_PAGE_F_GIX_SHIFT) & 0xf; |
85c1fafd AK |
101 | } |
102 | ||
3c726f8d | 103 | #define __rpte_to_pte(r) ((r).pte) |
bf680d51 | 104 | extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index); |
ab537dca AK |
105 | /* |
106 | * Trick: we set __end to va + 64k, which happens works for | |
3c726f8d BH |
107 | * a 16M page as well as we want only one iteration |
108 | */ | |
5524a27d AK |
109 | #define pte_iterate_hashed_subpages(rpte, psize, vpn, index, shift) \ |
110 | do { \ | |
111 | unsigned long __end = vpn + (1UL << (PAGE_SHIFT - VPN_SHIFT)); \ | |
112 | unsigned __split = (psize == MMU_PAGE_4K || \ | |
113 | psize == MMU_PAGE_64K_AP); \ | |
114 | shift = mmu_psize_defs[psize].shift; \ | |
115 | for (index = 0; vpn < __end; index++, \ | |
116 | vpn += (1L << (shift - VPN_SHIFT))) { \ | |
117 | if (!__split || __rpte_sub_valid(rpte, index)) \ | |
118 | do { | |
3c726f8d BH |
119 | |
120 | #define pte_iterate_hashed_end() } while(0); } } while(0) | |
121 | ||
16c2d476 | 122 | #define pte_pagesize_index(mm, addr, pte) \ |
945537df | 123 | (((pte) & H_PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K) |
3c726f8d | 124 | |
96270b1f AK |
125 | extern int remap_pfn_range(struct vm_area_struct *, unsigned long addr, |
126 | unsigned long pfn, unsigned long size, pgprot_t); | |
127 | static inline int remap_4k_pfn(struct vm_area_struct *vma, unsigned long addr, | |
128 | unsigned long pfn, pgprot_t prot) | |
129 | { | |
130 | if (pfn > (PTE_RPN_MASK >> PAGE_SHIFT)) { | |
131 | WARN(1, "remap_4k_pfn called with wrong pfn value\n"); | |
132 | return -EINVAL; | |
133 | } | |
134 | return remap_pfn_range(vma, addr, pfn, PAGE_SIZE, | |
945537df | 135 | __pgprot(pgprot_val(prot) | H_PAGE_4K_PFN)); |
96270b1f | 136 | } |
721151d0 | 137 | |
62607bc6 AK |
138 | #define PTE_TABLE_SIZE PTE_FRAG_SIZE |
139 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | |
140 | #define PMD_TABLE_SIZE ((sizeof(pmd_t) << PMD_INDEX_SIZE) + (sizeof(unsigned long) << PMD_INDEX_SIZE)) | |
141 | #else | |
ab537dca | 142 | #define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) |
62607bc6 | 143 | #endif |
368ced78 | 144 | #define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE) |
ab537dca AK |
145 | #define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) |
146 | ||
26a344ae AK |
147 | #ifdef CONFIG_HUGETLB_PAGE |
148 | /* | |
149 | * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have | |
150 | * 16GB hugepage pte in PGD and 16MB hugepage pte at PMD; | |
151 | * | |
152 | * Defined in such a way that we can optimize away code block at build time | |
153 | * if CONFIG_HUGETLB_PAGE=n. | |
154 | */ | |
155 | static inline int pmd_huge(pmd_t pmd) | |
156 | { | |
157 | /* | |
6a119eae | 158 | * leaf pte for huge page |
26a344ae | 159 | */ |
6a119eae | 160 | return !!(pmd_val(pmd) & _PAGE_PTE); |
26a344ae AK |
161 | } |
162 | ||
163 | static inline int pud_huge(pud_t pud) | |
164 | { | |
165 | /* | |
6a119eae | 166 | * leaf pte for huge page |
26a344ae | 167 | */ |
6a119eae | 168 | return !!(pud_val(pud) & _PAGE_PTE); |
26a344ae AK |
169 | } |
170 | ||
171 | static inline int pgd_huge(pgd_t pgd) | |
172 | { | |
173 | /* | |
6a119eae | 174 | * leaf pte for huge page |
26a344ae | 175 | */ |
6a119eae | 176 | return !!(pgd_val(pgd) & _PAGE_PTE); |
26a344ae AK |
177 | } |
178 | #define pgd_huge pgd_huge | |
179 | ||
180 | #ifdef CONFIG_DEBUG_VM | |
181 | extern int hugepd_ok(hugepd_t hpd); | |
182 | #define is_hugepd(hpd) (hugepd_ok(hpd)) | |
183 | #else | |
184 | /* | |
185 | * With 64k page size, we have hugepage ptes in the pgd and pmd entries. We don't | |
186 | * need to setup hugepage directory for them. Our pte and page directory format | |
187 | * enable us to have this enabled. | |
188 | */ | |
189 | static inline int hugepd_ok(hugepd_t hpd) | |
190 | { | |
191 | return 0; | |
192 | } | |
193 | #define is_hugepd(pdep) 0 | |
194 | #endif /* CONFIG_DEBUG_VM */ | |
195 | ||
196 | #endif /* CONFIG_HUGETLB_PAGE */ | |
197 | ||
e34aa03c AK |
198 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
199 | extern unsigned long pmd_hugepage_update(struct mm_struct *mm, | |
200 | unsigned long addr, | |
201 | pmd_t *pmdp, | |
202 | unsigned long clr, | |
203 | unsigned long set); | |
204 | static inline char *get_hpte_slot_array(pmd_t *pmdp) | |
205 | { | |
206 | /* | |
207 | * The hpte hindex is stored in the pgtable whose address is in the | |
208 | * second half of the PMD | |
209 | * | |
210 | * Order this load with the test for pmd_trans_huge in the caller | |
211 | */ | |
212 | smp_rmb(); | |
213 | return *(char **)(pmdp + PTRS_PER_PMD); | |
214 | ||
215 | ||
216 | } | |
217 | /* | |
218 | * The linux hugepage PMD now include the pmd entries followed by the address | |
219 | * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits. | |
849f86a6 | 220 | * [ 000 | 1 bit secondary | 3 bit hidx | 1 bit valid]. We use one byte per |
e34aa03c AK |
221 | * each HPTE entry. With 16MB hugepage and 64K HPTE we need 256 entries and |
222 | * with 4K HPTE we need 4096 entries. Both will fit in a 4K pgtable_t. | |
223 | * | |
849f86a6 | 224 | * The top three bits are intentionally left as zero. This memory location |
e34aa03c AK |
225 | * are also used as normal page PTE pointers. So if we have any pointers |
226 | * left around while we collapse a hugepage, we need to make sure | |
227 | * _PAGE_PRESENT bit of that is zero when we look at them | |
228 | */ | |
229 | static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index) | |
230 | { | |
849f86a6 | 231 | return hpte_slot_array[index] & 0x1; |
e34aa03c AK |
232 | } |
233 | ||
234 | static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array, | |
235 | int index) | |
236 | { | |
849f86a6 | 237 | return hpte_slot_array[index] >> 1; |
e34aa03c AK |
238 | } |
239 | ||
240 | static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array, | |
241 | unsigned int index, unsigned int hidx) | |
242 | { | |
849f86a6 | 243 | hpte_slot_array[index] = (hidx << 1) | 0x1; |
e34aa03c AK |
244 | } |
245 | ||
246 | /* | |
247 | * | |
248 | * For core kernel code by design pmd_trans_huge is never run on any hugetlbfs | |
249 | * page. The hugetlbfs page table walking and mangling paths are totally | |
250 | * separated form the core VM paths and they're differentiated by | |
251 | * VM_HUGETLB being set on vm_flags well before any pmd_trans_huge could run. | |
252 | * | |
253 | * pmd_trans_huge() is defined as false at build time if | |
254 | * CONFIG_TRANSPARENT_HUGEPAGE=n to optimize away code blocks at build | |
255 | * time in such case. | |
256 | * | |
257 | * For ppc64 we need to differntiate from explicit hugepages from THP, because | |
258 | * for THP we also track the subpage details at the pmd level. We don't do | |
259 | * that for explicit huge pages. | |
260 | * | |
261 | */ | |
262 | static inline int pmd_trans_huge(pmd_t pmd) | |
263 | { | |
945537df AK |
264 | return !!((pmd_val(pmd) & (_PAGE_PTE | H_PAGE_THP_HUGE)) == |
265 | (_PAGE_PTE | H_PAGE_THP_HUGE)); | |
e34aa03c AK |
266 | } |
267 | ||
e34aa03c AK |
268 | static inline int pmd_large(pmd_t pmd) |
269 | { | |
6a119eae | 270 | return !!(pmd_val(pmd) & _PAGE_PTE); |
e34aa03c AK |
271 | } |
272 | ||
273 | static inline pmd_t pmd_mknotpresent(pmd_t pmd) | |
274 | { | |
275 | return __pmd(pmd_val(pmd) & ~_PAGE_PRESENT); | |
276 | } | |
277 | ||
e34aa03c AK |
278 | #define __HAVE_ARCH_PMD_SAME |
279 | static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) | |
280 | { | |
ee3caed3 | 281 | return (((pmd_raw(pmd_a) ^ pmd_raw(pmd_b)) & ~cpu_to_be64(_PAGE_HPTEFLAGS)) == 0); |
e34aa03c AK |
282 | } |
283 | ||
284 | static inline int __pmdp_test_and_clear_young(struct mm_struct *mm, | |
285 | unsigned long addr, pmd_t *pmdp) | |
286 | { | |
287 | unsigned long old; | |
288 | ||
945537df | 289 | if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0) |
e34aa03c AK |
290 | return 0; |
291 | old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0); | |
292 | return ((old & _PAGE_ACCESSED) != 0); | |
293 | } | |
294 | ||
295 | #define __HAVE_ARCH_PMDP_SET_WRPROTECT | |
296 | static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr, | |
297 | pmd_t *pmdp) | |
298 | { | |
299 | ||
c7d54842 | 300 | if ((pmd_val(*pmdp) & _PAGE_WRITE) == 0) |
e34aa03c AK |
301 | return; |
302 | ||
c7d54842 | 303 | pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0); |
e34aa03c AK |
304 | } |
305 | ||
306 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | |
c605782b | 307 | #endif /* __ASSEMBLY__ */ |
ab537dca AK |
308 | |
309 | #endif /* _ASM_POWERPC_BOOK3S_64_HASH_64K_H */ |