Commit | Line | Data |
---|---|---|
ab537dca AK |
1 | #ifndef _ASM_POWERPC_BOOK3S_64_HASH_64K_H |
2 | #define _ASM_POWERPC_BOOK3S_64_HASH_64K_H | |
3 | ||
ab537dca | 4 | #define PTE_INDEX_SIZE 8 |
368ced78 AK |
5 | #define PMD_INDEX_SIZE 5 |
6 | #define PUD_INDEX_SIZE 5 | |
ab537dca AK |
7 | #define PGD_INDEX_SIZE 12 |
8 | ||
9 | #define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) | |
10 | #define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) | |
368ced78 | 11 | #define PTRS_PER_PUD (1 << PUD_INDEX_SIZE) |
ab537dca AK |
12 | #define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) |
13 | ||
14 | /* With 4k base page size, hugepage PTEs go at the PMD level */ | |
15 | #define MIN_HUGEPTE_SHIFT PAGE_SHIFT | |
16 | ||
17 | /* PMD_SHIFT determines what a second-level page table entry can map */ | |
18 | #define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE) | |
19 | #define PMD_SIZE (1UL << PMD_SHIFT) | |
20 | #define PMD_MASK (~(PMD_SIZE-1)) | |
21 | ||
368ced78 AK |
22 | /* PUD_SHIFT determines what a third-level page table entry can map */ |
23 | #define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) | |
24 | #define PUD_SIZE (1UL << PUD_SHIFT) | |
25 | #define PUD_MASK (~(PUD_SIZE-1)) | |
26 | ||
27 | /* PGDIR_SHIFT determines what a fourth-level page table entry can map */ | |
28 | #define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE) | |
ab537dca AK |
29 | #define PGDIR_SIZE (1UL << PGDIR_SHIFT) |
30 | #define PGDIR_MASK (~(PGDIR_SIZE-1)) | |
31 | ||
8daf51f5 PM |
32 | #define _PAGE_COMBO 0x00001000 /* this is a combo 4k page */ |
33 | #define _PAGE_4K_PFN 0x00002000 /* PFN is for a single 4k page */ | |
bf680d51 AK |
34 | /* |
35 | * Used to track subpage group valid if _PAGE_COMBO is set | |
36 | * This overloads _PAGE_F_GIX and _PAGE_F_SECOND | |
16c2d476 | 37 | */ |
bf680d51 | 38 | #define _PAGE_COMBO_VALID (_PAGE_F_GIX | _PAGE_F_SECOND) |
3c726f8d BH |
39 | |
40 | /* PTE flags to conserve for HPTE identification */ | |
89ff7250 AK |
41 | #define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_F_SECOND | \ |
42 | _PAGE_F_GIX | _PAGE_HASHPTE | _PAGE_COMBO) | |
3c726f8d BH |
43 | |
44 | /* Shift to put page number into pte. | |
45 | * | |
8daf51f5 PM |
46 | * That gives us a max RPN of 41 bits, which means a max of 57 bits |
47 | * of addressable physical space, or 53 bits for the special 4k PFNs. | |
3c726f8d | 48 | */ |
8daf51f5 PM |
49 | #define PTE_RPN_SHIFT (16) |
50 | #define PTE_RPN_SIZE (41) | |
f1a9ae03 | 51 | |
62607bc6 AK |
52 | /* |
53 | * we support 16 fragments per PTE page of 64K size. | |
54 | */ | |
55 | #define PTE_FRAG_NR 16 | |
56 | /* | |
57 | * We use a 2K PTE page fragment and another 2K for storing | |
58 | * real_pte_t hash index | |
59 | */ | |
60 | #define PTE_FRAG_SIZE_SHIFT 12 | |
61 | #define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT) | |
62 | ||
368ced78 AK |
63 | /* Bits to mask out from a PMD to get to the PTE page */ |
64 | #define PMD_MASKED_BITS 0xc0000000000000ffUL | |
65 | /* Bits to mask out from a PUD to get to the PMD page */ | |
66 | #define PUD_MASKED_BITS 0xc0000000000000ffUL | |
67 | /* Bits to mask out from a PGD to get to the PUD page */ | |
68 | #define PGD_MASKED_BITS 0xc0000000000000ffUL | |
3c726f8d | 69 | |
c605782b | 70 | #ifndef __ASSEMBLY__ |
3c726f8d | 71 | |
c605782b BH |
72 | /* |
73 | * With 64K pages on hash table, we have a special PTE format that | |
74 | * uses a second "half" of the page table to encode sub-page information | |
75 | * in order to deal with 64K made of 4K HW pages. Thus we override the | |
76 | * generic accessors and iterators here | |
77 | */ | |
85c1fafd AK |
78 | #define __real_pte __real_pte |
79 | static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep) | |
80 | { | |
81 | real_pte_t rpte; | |
506b863c | 82 | unsigned long *hidxp; |
85c1fafd AK |
83 | |
84 | rpte.pte = pte; | |
85 | rpte.hidx = 0; | |
86 | if (pte_val(pte) & _PAGE_COMBO) { | |
87 | /* | |
88 | * Make sure we order the hidx load against the _PAGE_COMBO | |
89 | * check. The store side ordering is done in __hash_page_4K | |
90 | */ | |
91 | smp_rmb(); | |
506b863c AK |
92 | hidxp = (unsigned long *)(ptep + PTRS_PER_PTE); |
93 | rpte.hidx = *hidxp; | |
85c1fafd AK |
94 | } |
95 | return rpte; | |
96 | } | |
97 | ||
98 | static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index) | |
99 | { | |
100 | if ((pte_val(rpte.pte) & _PAGE_COMBO)) | |
101 | return (rpte.hidx >> (index<<2)) & 0xf; | |
4d9057c3 | 102 | return (pte_val(rpte.pte) >> _PAGE_F_GIX_SHIFT) & 0xf; |
85c1fafd AK |
103 | } |
104 | ||
3c726f8d | 105 | #define __rpte_to_pte(r) ((r).pte) |
bf680d51 | 106 | extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index); |
ab537dca AK |
107 | /* |
108 | * Trick: we set __end to va + 64k, which happens works for | |
3c726f8d BH |
109 | * a 16M page as well as we want only one iteration |
110 | */ | |
5524a27d AK |
111 | #define pte_iterate_hashed_subpages(rpte, psize, vpn, index, shift) \ |
112 | do { \ | |
113 | unsigned long __end = vpn + (1UL << (PAGE_SHIFT - VPN_SHIFT)); \ | |
114 | unsigned __split = (psize == MMU_PAGE_4K || \ | |
115 | psize == MMU_PAGE_64K_AP); \ | |
116 | shift = mmu_psize_defs[psize].shift; \ | |
117 | for (index = 0; vpn < __end; index++, \ | |
118 | vpn += (1L << (shift - VPN_SHIFT))) { \ | |
119 | if (!__split || __rpte_sub_valid(rpte, index)) \ | |
120 | do { | |
3c726f8d BH |
121 | |
122 | #define pte_iterate_hashed_end() } while(0); } } while(0) | |
123 | ||
16c2d476 | 124 | #define pte_pagesize_index(mm, addr, pte) \ |
bf72aeba | 125 | (((pte) & _PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K) |
3c726f8d | 126 | |
721151d0 | 127 | #define remap_4k_pfn(vma, addr, pfn, prot) \ |
f1a9ae03 | 128 | (WARN_ON(((pfn) >= (1UL << PTE_RPN_SIZE))) ? -EINVAL : \ |
eeb03a6e MK |
129 | remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, \ |
130 | __pgprot(pgprot_val((prot)) | _PAGE_4K_PFN))) | |
721151d0 | 131 | |
62607bc6 AK |
132 | #define PTE_TABLE_SIZE PTE_FRAG_SIZE |
133 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | |
134 | #define PMD_TABLE_SIZE ((sizeof(pmd_t) << PMD_INDEX_SIZE) + (sizeof(unsigned long) << PMD_INDEX_SIZE)) | |
135 | #else | |
ab537dca | 136 | #define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) |
62607bc6 | 137 | #endif |
368ced78 | 138 | #define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE) |
ab537dca AK |
139 | #define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) |
140 | ||
26a344ae AK |
141 | #ifdef CONFIG_HUGETLB_PAGE |
142 | /* | |
143 | * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have | |
144 | * 16GB hugepage pte in PGD and 16MB hugepage pte at PMD; | |
145 | * | |
146 | * Defined in such a way that we can optimize away code block at build time | |
147 | * if CONFIG_HUGETLB_PAGE=n. | |
148 | */ | |
149 | static inline int pmd_huge(pmd_t pmd) | |
150 | { | |
151 | /* | |
6a119eae | 152 | * leaf pte for huge page |
26a344ae | 153 | */ |
6a119eae | 154 | return !!(pmd_val(pmd) & _PAGE_PTE); |
26a344ae AK |
155 | } |
156 | ||
157 | static inline int pud_huge(pud_t pud) | |
158 | { | |
159 | /* | |
6a119eae | 160 | * leaf pte for huge page |
26a344ae | 161 | */ |
6a119eae | 162 | return !!(pud_val(pud) & _PAGE_PTE); |
26a344ae AK |
163 | } |
164 | ||
165 | static inline int pgd_huge(pgd_t pgd) | |
166 | { | |
167 | /* | |
6a119eae | 168 | * leaf pte for huge page |
26a344ae | 169 | */ |
6a119eae | 170 | return !!(pgd_val(pgd) & _PAGE_PTE); |
26a344ae AK |
171 | } |
172 | #define pgd_huge pgd_huge | |
173 | ||
174 | #ifdef CONFIG_DEBUG_VM | |
175 | extern int hugepd_ok(hugepd_t hpd); | |
176 | #define is_hugepd(hpd) (hugepd_ok(hpd)) | |
177 | #else | |
178 | /* | |
179 | * With 64k page size, we have hugepage ptes in the pgd and pmd entries. We don't | |
180 | * need to setup hugepage directory for them. Our pte and page directory format | |
181 | * enable us to have this enabled. | |
182 | */ | |
183 | static inline int hugepd_ok(hugepd_t hpd) | |
184 | { | |
185 | return 0; | |
186 | } | |
187 | #define is_hugepd(pdep) 0 | |
188 | #endif /* CONFIG_DEBUG_VM */ | |
189 | ||
190 | #endif /* CONFIG_HUGETLB_PAGE */ | |
191 | ||
e34aa03c AK |
192 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
193 | extern unsigned long pmd_hugepage_update(struct mm_struct *mm, | |
194 | unsigned long addr, | |
195 | pmd_t *pmdp, | |
196 | unsigned long clr, | |
197 | unsigned long set); | |
198 | static inline char *get_hpte_slot_array(pmd_t *pmdp) | |
199 | { | |
200 | /* | |
201 | * The hpte hindex is stored in the pgtable whose address is in the | |
202 | * second half of the PMD | |
203 | * | |
204 | * Order this load with the test for pmd_trans_huge in the caller | |
205 | */ | |
206 | smp_rmb(); | |
207 | return *(char **)(pmdp + PTRS_PER_PMD); | |
208 | ||
209 | ||
210 | } | |
211 | /* | |
212 | * The linux hugepage PMD now include the pmd entries followed by the address | |
213 | * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits. | |
849f86a6 | 214 | * [ 000 | 1 bit secondary | 3 bit hidx | 1 bit valid]. We use one byte per |
e34aa03c AK |
215 | * each HPTE entry. With 16MB hugepage and 64K HPTE we need 256 entries and |
216 | * with 4K HPTE we need 4096 entries. Both will fit in a 4K pgtable_t. | |
217 | * | |
849f86a6 | 218 | * The top three bits are intentionally left as zero. This memory location |
e34aa03c AK |
219 | * are also used as normal page PTE pointers. So if we have any pointers |
220 | * left around while we collapse a hugepage, we need to make sure | |
221 | * _PAGE_PRESENT bit of that is zero when we look at them | |
222 | */ | |
223 | static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index) | |
224 | { | |
849f86a6 | 225 | return hpte_slot_array[index] & 0x1; |
e34aa03c AK |
226 | } |
227 | ||
228 | static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array, | |
229 | int index) | |
230 | { | |
849f86a6 | 231 | return hpte_slot_array[index] >> 1; |
e34aa03c AK |
232 | } |
233 | ||
234 | static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array, | |
235 | unsigned int index, unsigned int hidx) | |
236 | { | |
849f86a6 | 237 | hpte_slot_array[index] = (hidx << 1) | 0x1; |
e34aa03c AK |
238 | } |
239 | ||
240 | /* | |
241 | * | |
242 | * For core kernel code by design pmd_trans_huge is never run on any hugetlbfs | |
243 | * page. The hugetlbfs page table walking and mangling paths are totally | |
244 | * separated form the core VM paths and they're differentiated by | |
245 | * VM_HUGETLB being set on vm_flags well before any pmd_trans_huge could run. | |
246 | * | |
247 | * pmd_trans_huge() is defined as false at build time if | |
248 | * CONFIG_TRANSPARENT_HUGEPAGE=n to optimize away code blocks at build | |
249 | * time in such case. | |
250 | * | |
251 | * For ppc64 we need to differntiate from explicit hugepages from THP, because | |
252 | * for THP we also track the subpage details at the pmd level. We don't do | |
253 | * that for explicit huge pages. | |
254 | * | |
255 | */ | |
256 | static inline int pmd_trans_huge(pmd_t pmd) | |
257 | { | |
6a119eae AK |
258 | return !!((pmd_val(pmd) & (_PAGE_PTE | _PAGE_THP_HUGE)) == |
259 | (_PAGE_PTE | _PAGE_THP_HUGE)); | |
e34aa03c AK |
260 | } |
261 | ||
e34aa03c AK |
262 | static inline int pmd_large(pmd_t pmd) |
263 | { | |
6a119eae | 264 | return !!(pmd_val(pmd) & _PAGE_PTE); |
e34aa03c AK |
265 | } |
266 | ||
267 | static inline pmd_t pmd_mknotpresent(pmd_t pmd) | |
268 | { | |
269 | return __pmd(pmd_val(pmd) & ~_PAGE_PRESENT); | |
270 | } | |
271 | ||
e34aa03c AK |
272 | #define __HAVE_ARCH_PMD_SAME |
273 | static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) | |
274 | { | |
275 | return (((pmd_val(pmd_a) ^ pmd_val(pmd_b)) & ~_PAGE_HPTEFLAGS) == 0); | |
276 | } | |
277 | ||
278 | static inline int __pmdp_test_and_clear_young(struct mm_struct *mm, | |
279 | unsigned long addr, pmd_t *pmdp) | |
280 | { | |
281 | unsigned long old; | |
282 | ||
283 | if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0) | |
284 | return 0; | |
285 | old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0); | |
286 | return ((old & _PAGE_ACCESSED) != 0); | |
287 | } | |
288 | ||
289 | #define __HAVE_ARCH_PMDP_SET_WRPROTECT | |
290 | static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr, | |
291 | pmd_t *pmdp) | |
292 | { | |
293 | ||
294 | if ((pmd_val(*pmdp) & _PAGE_RW) == 0) | |
295 | return; | |
296 | ||
297 | pmd_hugepage_update(mm, addr, pmdp, _PAGE_RW, 0); | |
298 | } | |
299 | ||
300 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | |
c605782b | 301 | #endif /* __ASSEMBLY__ */ |
ab537dca AK |
302 | |
303 | #endif /* _ASM_POWERPC_BOOK3S_64_HASH_64K_H */ |