Commit | Line | Data |
---|---|---|
ab537dca AK |
1 | #ifndef _ASM_POWERPC_BOOK3S_64_HASH_64K_H |
2 | #define _ASM_POWERPC_BOOK3S_64_HASH_64K_H | |
3 | ||
dd1842a2 AK |
4 | #define H_PTE_INDEX_SIZE 8 |
5 | #define H_PMD_INDEX_SIZE 5 | |
6 | #define H_PUD_INDEX_SIZE 5 | |
7 | #define H_PGD_INDEX_SIZE 12 | |
ab537dca AK |
8 | |
9 | /* With 4k base page size, hugepage PTEs go at the PMD level */ | |
10 | #define MIN_HUGEPTE_SHIFT PAGE_SHIFT | |
11 | ||
945537df AK |
12 | #define H_PAGE_COMBO 0x00001000 /* this is a combo 4k page */ |
13 | #define H_PAGE_4K_PFN 0x00002000 /* PFN is for a single 4k page */ | |
bf680d51 | 14 | /* |
945537df AK |
15 | * We need to differentiate between explicit huge page and THP huge |
16 | * page, since THP huge page also need to track real subpage details | |
16c2d476 | 17 | */ |
945537df AK |
18 | #define H_PAGE_THP_HUGE H_PAGE_4K_PFN |
19 | ||
20 | /* | |
21 | * Used to track subpage group valid if H_PAGE_COMBO is set | |
22 | * This overloads H_PAGE_F_GIX and H_PAGE_F_SECOND | |
23 | */ | |
24 | #define H_PAGE_COMBO_VALID (H_PAGE_F_GIX | H_PAGE_F_SECOND) | |
3c726f8d BH |
25 | |
26 | /* PTE flags to conserve for HPTE identification */ | |
945537df AK |
27 | #define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_F_SECOND | \ |
28 | H_PAGE_F_GIX | H_PAGE_HASHPTE | H_PAGE_COMBO) | |
62607bc6 AK |
29 | /* |
30 | * we support 16 fragments per PTE page of 64K size. | |
31 | */ | |
5ed7ecd0 | 32 | #define H_PTE_FRAG_NR 16 |
62607bc6 AK |
33 | /* |
34 | * We use a 2K PTE page fragment and another 2K for storing | |
35 | * real_pte_t hash index | |
36 | */ | |
5ed7ecd0 | 37 | #define H_PTE_FRAG_SIZE_SHIFT 12 |
62607bc6 AK |
38 | #define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT) |
39 | ||
c605782b | 40 | #ifndef __ASSEMBLY__ |
96270b1f | 41 | #include <asm/errno.h> |
3c726f8d | 42 | |
c605782b BH |
43 | /* |
44 | * With 64K pages on hash table, we have a special PTE format that | |
45 | * uses a second "half" of the page table to encode sub-page information | |
46 | * in order to deal with 64K made of 4K HW pages. Thus we override the | |
47 | * generic accessors and iterators here | |
48 | */ | |
85c1fafd AK |
49 | #define __real_pte __real_pte |
50 | static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep) | |
51 | { | |
52 | real_pte_t rpte; | |
506b863c | 53 | unsigned long *hidxp; |
85c1fafd AK |
54 | |
55 | rpte.pte = pte; | |
56 | rpte.hidx = 0; | |
945537df | 57 | if (pte_val(pte) & H_PAGE_COMBO) { |
85c1fafd | 58 | /* |
945537df | 59 | * Make sure we order the hidx load against the H_PAGE_COMBO |
85c1fafd AK |
60 | * check. The store side ordering is done in __hash_page_4K |
61 | */ | |
62 | smp_rmb(); | |
506b863c AK |
63 | hidxp = (unsigned long *)(ptep + PTRS_PER_PTE); |
64 | rpte.hidx = *hidxp; | |
85c1fafd AK |
65 | } |
66 | return rpte; | |
67 | } | |
68 | ||
69 | static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index) | |
70 | { | |
945537df | 71 | if ((pte_val(rpte.pte) & H_PAGE_COMBO)) |
85c1fafd | 72 | return (rpte.hidx >> (index<<2)) & 0xf; |
945537df | 73 | return (pte_val(rpte.pte) >> H_PAGE_F_GIX_SHIFT) & 0xf; |
85c1fafd AK |
74 | } |
75 | ||
3c726f8d | 76 | #define __rpte_to_pte(r) ((r).pte) |
bf680d51 | 77 | extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index); |
ab537dca AK |
78 | /* |
79 | * Trick: we set __end to va + 64k, which happens works for | |
3c726f8d BH |
80 | * a 16M page as well as we want only one iteration |
81 | */ | |
5524a27d AK |
82 | #define pte_iterate_hashed_subpages(rpte, psize, vpn, index, shift) \ |
83 | do { \ | |
84 | unsigned long __end = vpn + (1UL << (PAGE_SHIFT - VPN_SHIFT)); \ | |
85 | unsigned __split = (psize == MMU_PAGE_4K || \ | |
86 | psize == MMU_PAGE_64K_AP); \ | |
87 | shift = mmu_psize_defs[psize].shift; \ | |
88 | for (index = 0; vpn < __end; index++, \ | |
89 | vpn += (1L << (shift - VPN_SHIFT))) { \ | |
90 | if (!__split || __rpte_sub_valid(rpte, index)) \ | |
91 | do { | |
3c726f8d BH |
92 | |
93 | #define pte_iterate_hashed_end() } while(0); } } while(0) | |
94 | ||
16c2d476 | 95 | #define pte_pagesize_index(mm, addr, pte) \ |
945537df | 96 | (((pte) & H_PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K) |
3c726f8d | 97 | |
96270b1f AK |
98 | extern int remap_pfn_range(struct vm_area_struct *, unsigned long addr, |
99 | unsigned long pfn, unsigned long size, pgprot_t); | |
6cc1a0ee AK |
100 | static inline int hash__remap_4k_pfn(struct vm_area_struct *vma, unsigned long addr, |
101 | unsigned long pfn, pgprot_t prot) | |
96270b1f AK |
102 | { |
103 | if (pfn > (PTE_RPN_MASK >> PAGE_SHIFT)) { | |
104 | WARN(1, "remap_4k_pfn called with wrong pfn value\n"); | |
105 | return -EINVAL; | |
106 | } | |
107 | return remap_pfn_range(vma, addr, pfn, PAGE_SIZE, | |
945537df | 108 | __pgprot(pgprot_val(prot) | H_PAGE_4K_PFN)); |
96270b1f | 109 | } |
721151d0 | 110 | |
dd1842a2 | 111 | #define H_PTE_TABLE_SIZE PTE_FRAG_SIZE |
62607bc6 | 112 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
dd1842a2 AK |
113 | #define H_PMD_TABLE_SIZE ((sizeof(pmd_t) << PMD_INDEX_SIZE) + \ |
114 | (sizeof(unsigned long) << PMD_INDEX_SIZE)) | |
62607bc6 | 115 | #else |
dd1842a2 | 116 | #define H_PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) |
62607bc6 | 117 | #endif |
dd1842a2 AK |
118 | #define H_PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE) |
119 | #define H_PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) | |
ab537dca | 120 | |
e34aa03c AK |
121 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
122 | extern unsigned long pmd_hugepage_update(struct mm_struct *mm, | |
123 | unsigned long addr, | |
124 | pmd_t *pmdp, | |
125 | unsigned long clr, | |
126 | unsigned long set); | |
127 | static inline char *get_hpte_slot_array(pmd_t *pmdp) | |
128 | { | |
129 | /* | |
130 | * The hpte hindex is stored in the pgtable whose address is in the | |
131 | * second half of the PMD | |
132 | * | |
133 | * Order this load with the test for pmd_trans_huge in the caller | |
134 | */ | |
135 | smp_rmb(); | |
136 | return *(char **)(pmdp + PTRS_PER_PMD); | |
137 | ||
138 | ||
139 | } | |
140 | /* | |
141 | * The linux hugepage PMD now include the pmd entries followed by the address | |
142 | * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits. | |
849f86a6 | 143 | * [ 000 | 1 bit secondary | 3 bit hidx | 1 bit valid]. We use one byte per |
e34aa03c AK |
144 | * each HPTE entry. With 16MB hugepage and 64K HPTE we need 256 entries and |
145 | * with 4K HPTE we need 4096 entries. Both will fit in a 4K pgtable_t. | |
146 | * | |
849f86a6 | 147 | * The top three bits are intentionally left as zero. This memory location |
e34aa03c AK |
148 | * are also used as normal page PTE pointers. So if we have any pointers |
149 | * left around while we collapse a hugepage, we need to make sure | |
150 | * _PAGE_PRESENT bit of that is zero when we look at them | |
151 | */ | |
152 | static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index) | |
153 | { | |
849f86a6 | 154 | return hpte_slot_array[index] & 0x1; |
e34aa03c AK |
155 | } |
156 | ||
157 | static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array, | |
158 | int index) | |
159 | { | |
849f86a6 | 160 | return hpte_slot_array[index] >> 1; |
e34aa03c AK |
161 | } |
162 | ||
163 | static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array, | |
164 | unsigned int index, unsigned int hidx) | |
165 | { | |
849f86a6 | 166 | hpte_slot_array[index] = (hidx << 1) | 0x1; |
e34aa03c AK |
167 | } |
168 | ||
169 | /* | |
170 | * | |
171 | * For core kernel code by design pmd_trans_huge is never run on any hugetlbfs | |
172 | * page. The hugetlbfs page table walking and mangling paths are totally | |
173 | * separated form the core VM paths and they're differentiated by | |
174 | * VM_HUGETLB being set on vm_flags well before any pmd_trans_huge could run. | |
175 | * | |
176 | * pmd_trans_huge() is defined as false at build time if | |
177 | * CONFIG_TRANSPARENT_HUGEPAGE=n to optimize away code blocks at build | |
178 | * time in such case. | |
179 | * | |
180 | * For ppc64 we need to differntiate from explicit hugepages from THP, because | |
181 | * for THP we also track the subpage details at the pmd level. We don't do | |
182 | * that for explicit huge pages. | |
183 | * | |
184 | */ | |
6cc1a0ee | 185 | static inline int hash__pmd_trans_huge(pmd_t pmd) |
e34aa03c | 186 | { |
945537df AK |
187 | return !!((pmd_val(pmd) & (_PAGE_PTE | H_PAGE_THP_HUGE)) == |
188 | (_PAGE_PTE | H_PAGE_THP_HUGE)); | |
e34aa03c AK |
189 | } |
190 | ||
6cc1a0ee | 191 | static inline int hash__pmd_same(pmd_t pmd_a, pmd_t pmd_b) |
e34aa03c | 192 | { |
ee3caed3 | 193 | return (((pmd_raw(pmd_a) ^ pmd_raw(pmd_b)) & ~cpu_to_be64(_PAGE_HPTEFLAGS)) == 0); |
e34aa03c AK |
194 | } |
195 | ||
e34aa03c | 196 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
c605782b | 197 | #endif /* __ASSEMBLY__ */ |
ab537dca AK |
198 | |
199 | #endif /* _ASM_POWERPC_BOOK3S_64_HASH_64K_H */ |