Commit | Line | Data |
---|---|---|
caab277b | 1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
4f04d8f0 CM |
2 | /* |
3 | * Copyright (C) 2012 ARM Ltd. | |
4f04d8f0 CM |
4 | */ |
5 | #ifndef __ASM_PGTABLE_H | |
6 | #define __ASM_PGTABLE_H | |
7 | ||
2f4b829c | 8 | #include <asm/bug.h> |
4f04d8f0 CM |
9 | #include <asm/proc-fns.h> |
10 | ||
11 | #include <asm/memory.h> | |
34bfeea4 | 12 | #include <asm/mte.h> |
4f04d8f0 | 13 | #include <asm/pgtable-hwdef.h> |
3eca86e7 | 14 | #include <asm/pgtable-prot.h> |
3403e56b | 15 | #include <asm/tlbflush.h> |
4f04d8f0 CM |
16 | |
17 | /* | |
3e1907d5 | 18 | * VMALLOC range. |
08375198 | 19 | * |
f9040773 | 20 | * VMALLOC_START: beginning of the kernel vmalloc space |
d432b8d5 | 21 | * VMALLOC_END: extends to the available space below vmemmap |
4f04d8f0 | 22 | */ |
f9040773 | 23 | #define VMALLOC_START (MODULES_END) |
d432b8d5 | 24 | #if VA_BITS == VA_BITS_MIN |
b730b0f2 | 25 | #define VMALLOC_END (VMEMMAP_START - SZ_8M) |
d432b8d5 AB |
26 | #else |
27 | #define VMEMMAP_UNUSED_NPAGES ((_PAGE_OFFSET(vabits_actual) - PAGE_OFFSET) >> PAGE_SHIFT) | |
28 | #define VMALLOC_END (VMEMMAP_START + VMEMMAP_UNUSED_NPAGES * sizeof(struct page) - SZ_8M) | |
29 | #endif | |
4f04d8f0 | 30 | |
7bc1a0f9 AB |
31 | #define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT)) |
32 | ||
4f04d8f0 | 33 | #ifndef __ASSEMBLY__ |
2f4b829c | 34 | |
3bbf7157 | 35 | #include <asm/cmpxchg.h> |
961faac1 | 36 | #include <asm/fixmap.h> |
7f955be9 | 37 | #include <asm/por.h> |
2f4b829c | 38 | #include <linux/mmdebug.h> |
86c9e812 WD |
39 | #include <linux/mm_types.h> |
40 | #include <linux/sched.h> | |
42b25471 | 41 | #include <linux/page_table_check.h> |
2f4b829c | 42 | |
5fdd05ef RR |
43 | static inline void emit_pte_barriers(void) |
44 | { | |
45 | /* | |
46 | * These barriers are emitted under certain conditions after a pte entry | |
47 | * was modified (see e.g. __set_pte_complete()). The dsb makes the store | |
48 | * visible to the table walker. The isb ensures that any previous | |
49 | * speculative "invalid translation" marker that is in the CPU's | |
50 | * pipeline gets cleared, so that any access to that address after | |
51 | * setting the pte to valid won't cause a spurious fault. If the thread | |
52 | * gets preempted after storing to the pgtable but before emitting these | |
53 | * barriers, __switch_to() emits a dsb which ensure the walker gets to | |
54 | * see the store. There is no guarantee of an isb being issued though. | |
55 | * This is safe because it will still get issued (albeit on a | |
56 | * potentially different CPU) when the thread starts running again, | |
57 | * before any access to the address. | |
58 | */ | |
59 | dsb(ishst); | |
60 | isb(); | |
61 | } | |
62 | ||
63 | static inline void queue_pte_barriers(void) | |
64 | { | |
65 | unsigned long flags; | |
66 | ||
b81c6884 RR |
67 | if (in_interrupt()) { |
68 | emit_pte_barriers(); | |
69 | return; | |
70 | } | |
71 | ||
5fdd05ef RR |
72 | flags = read_thread_flags(); |
73 | ||
74 | if (flags & BIT(TIF_LAZY_MMU)) { | |
75 | /* Avoid the atomic op if already set. */ | |
76 | if (!(flags & BIT(TIF_LAZY_MMU_PENDING))) | |
77 | set_thread_flag(TIF_LAZY_MMU_PENDING); | |
78 | } else { | |
79 | emit_pte_barriers(); | |
80 | } | |
81 | } | |
82 | ||
83 | #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE | |
84 | static inline void arch_enter_lazy_mmu_mode(void) | |
85 | { | |
1ef3095b RR |
86 | /* |
87 | * lazy_mmu_mode is not supposed to permit nesting. But in practice this | |
88 | * does happen with CONFIG_DEBUG_PAGEALLOC, where a page allocation | |
89 | * inside a lazy_mmu_mode section (such as zap_pte_range()) will change | |
90 | * permissions on the linear map with apply_to_page_range(), which | |
91 | * re-enters lazy_mmu_mode. So we tolerate nesting in our | |
92 | * implementation. The first call to arch_leave_lazy_mmu_mode() will | |
93 | * flush and clear the flag such that the remainder of the work in the | |
94 | * outer nest behaves as if outside of lazy mmu mode. This is safe and | |
95 | * keeps tracking simple. | |
96 | */ | |
97 | ||
b81c6884 RR |
98 | if (in_interrupt()) |
99 | return; | |
100 | ||
5fdd05ef RR |
101 | set_thread_flag(TIF_LAZY_MMU); |
102 | } | |
103 | ||
104 | static inline void arch_flush_lazy_mmu_mode(void) | |
105 | { | |
b81c6884 RR |
106 | if (in_interrupt()) |
107 | return; | |
108 | ||
5fdd05ef RR |
109 | if (test_and_clear_thread_flag(TIF_LAZY_MMU_PENDING)) |
110 | emit_pte_barriers(); | |
111 | } | |
112 | ||
113 | static inline void arch_leave_lazy_mmu_mode(void) | |
114 | { | |
b81c6884 RR |
115 | if (in_interrupt()) |
116 | return; | |
117 | ||
5fdd05ef RR |
118 | arch_flush_lazy_mmu_mode(); |
119 | clear_thread_flag(TIF_LAZY_MMU); | |
120 | } | |
121 | ||
a7ac1cfa ZY |
122 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
123 | #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE | |
124 | ||
125 | /* Set stride and tlb_level in flush_*_tlb_range */ | |
126 | #define flush_pmd_tlb_range(vma, addr, end) \ | |
127 | __flush_tlb_range(vma, addr, end, PMD_SIZE, false, 2) | |
128 | #define flush_pud_tlb_range(vma, addr, end) \ | |
129 | __flush_tlb_range(vma, addr, end, PUD_SIZE, false, 1) | |
130 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | |
131 | ||
6a1bdb17 WD |
132 | /* |
133 | * Outside of a few very special situations (e.g. hibernation), we always | |
134 | * use broadcast TLB invalidation instructions, therefore a spurious page | |
135 | * fault on one CPU which has been handled concurrently by another CPU | |
136 | * does not need to perform additional invalidation. | |
137 | */ | |
99c29133 | 138 | #define flush_tlb_fix_spurious_fault(vma, address, ptep) do { } while (0) |
6a1bdb17 | 139 | |
4f04d8f0 CM |
140 | /* |
141 | * ZERO_PAGE is a global shared page that is always zero: used | |
142 | * for zero-mapped memory areas etc.. | |
143 | */ | |
5227cfa7 | 144 | extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; |
2077be67 | 145 | #define ZERO_PAGE(vaddr) phys_to_page(__pa_symbol(empty_zero_page)) |
4f04d8f0 | 146 | |
2cf660eb GS |
147 | #define pte_ERROR(e) \ |
148 | pr_err("%s:%d: bad pte %016llx.\n", __FILE__, __LINE__, pte_val(e)) | |
7078db46 | 149 | |
75387b92 | 150 | #ifdef CONFIG_ARM64_PA_BITS_52 |
c7c386fb AB |
151 | static inline phys_addr_t __pte_to_phys(pte_t pte) |
152 | { | |
925a0eb4 | 153 | pte_val(pte) &= ~PTE_MAYBE_SHARED; |
c7c386fb | 154 | return (pte_val(pte) & PTE_ADDR_LOW) | |
a4ee2861 | 155 | ((pte_val(pte) & PTE_ADDR_HIGH) << PTE_ADDR_HIGH_SHIFT); |
c7c386fb AB |
156 | } |
157 | static inline pteval_t __phys_to_pte_val(phys_addr_t phys) | |
158 | { | |
925a0eb4 | 159 | return (phys | (phys >> PTE_ADDR_HIGH_SHIFT)) & PHYS_TO_PTE_ADDR_MASK; |
c7c386fb | 160 | } |
75387b92 | 161 | #else |
2d7872f3 AK |
162 | static inline phys_addr_t __pte_to_phys(pte_t pte) |
163 | { | |
164 | return pte_val(pte) & PTE_ADDR_LOW; | |
165 | } | |
166 | ||
167 | static inline pteval_t __phys_to_pte_val(phys_addr_t phys) | |
168 | { | |
169 | return phys; | |
170 | } | |
75387b92 | 171 | #endif |
4f04d8f0 | 172 | |
75387b92 KM |
173 | #define pte_pfn(pte) (__pte_to_phys(pte) >> PAGE_SHIFT) |
174 | #define pfn_pte(pfn,prot) \ | |
175 | __pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)) | |
4f04d8f0 CM |
176 | |
177 | #define pte_none(pte) (!pte_val(pte)) | |
5a00bfd6 RR |
178 | #define __pte_clear(mm, addr, ptep) \ |
179 | __set_pte(ptep, __pte(0)) | |
4f04d8f0 | 180 | #define pte_page(pte) (pfn_to_page(pte_pfn(pte))) |
7078db46 | 181 | |
4f04d8f0 CM |
182 | /* |
183 | * The following only work if pte_present(). Undefined behaviour otherwise. | |
184 | */ | |
f0f5863a | 185 | #define pte_present(pte) (pte_valid(pte) || pte_present_invalid(pte)) |
84fe6826 SC |
186 | #define pte_young(pte) (!!(pte_val(pte) & PTE_AF)) |
187 | #define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL)) | |
188 | #define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE)) | |
d0ba9612 | 189 | #define pte_rdonly(pte) (!!(pte_val(pte) & PTE_RDONLY)) |
42b25471 | 190 | #define pte_user(pte) (!!(pte_val(pte) & PTE_USER)) |
ec663d96 | 191 | #define pte_user_exec(pte) (!(pte_val(pte) & PTE_UXN)) |
93ef666a | 192 | #define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT)) |
73b20c84 | 193 | #define pte_devmap(pte) (!!(pte_val(pte) & PTE_DEVMAP)) |
34bfeea4 CM |
194 | #define pte_tagged(pte) ((pte_val(pte) & PTE_ATTRINDX_MASK) == \ |
195 | PTE_ATTRINDX(MT_NORMAL_TAGGED)) | |
4f04d8f0 | 196 | |
d27cfa1f AB |
197 | #define pte_cont_addr_end(addr, end) \ |
198 | ({ unsigned long __boundary = ((addr) + CONT_PTE_SIZE) & CONT_PTE_MASK; \ | |
199 | (__boundary - 1 < (end) - 1) ? __boundary : (end); \ | |
200 | }) | |
201 | ||
202 | #define pmd_cont_addr_end(addr, end) \ | |
203 | ({ unsigned long __boundary = ((addr) + CONT_PMD_SIZE) & CONT_PMD_MASK; \ | |
204 | (__boundary - 1 < (end) - 1) ? __boundary : (end); \ | |
205 | }) | |
206 | ||
d0ba9612 | 207 | #define pte_hw_dirty(pte) (pte_write(pte) && !pte_rdonly(pte)) |
2f4b829c CM |
208 | #define pte_sw_dirty(pte) (!!(pte_val(pte) & PTE_DIRTY)) |
209 | #define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte)) | |
210 | ||
766ffb69 | 211 | #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) |
b28c74e2 RR |
212 | #define pte_present_invalid(pte) \ |
213 | ((pte_val(pte) & (PTE_VALID | PTE_PRESENT_INVALID)) == PTE_PRESENT_INVALID) | |
18107f8a VM |
214 | /* |
215 | * Execute-only user mappings do not have the PTE_USER bit set. All valid | |
216 | * kernel mappings have the PTE_UXN bit set. | |
217 | */ | |
ec663d96 | 218 | #define pte_valid_not_user(pte) \ |
18107f8a | 219 | ((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN)) |
4602e575 RR |
220 | /* |
221 | * Returns true if the pte is valid and has the contiguous bit set. | |
222 | */ | |
223 | #define pte_valid_cont(pte) (pte_valid(pte) && pte_cont(pte)) | |
76c714be WD |
224 | /* |
225 | * Could the pte be present in the TLB? We must check mm_tlb_flush_pending | |
226 | * so that we don't erroneously return false for pages that have been | |
227 | * remapped as PROT_NONE but are yet to be flushed from the TLB. | |
07509e10 | 228 | * Note that we can't make any assumptions based on the state of the access |
5a00bfd6 | 229 | * flag, since __ptep_clear_flush_young() elides a DSB when invalidating the |
07509e10 | 230 | * TLB. |
76c714be WD |
231 | */ |
232 | #define pte_accessible(mm, pte) \ | |
07509e10 | 233 | (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte)) |
4f04d8f0 | 234 | |
7f955be9 JG |
235 | static inline bool por_el0_allows_pkey(u8 pkey, bool write, bool execute) |
236 | { | |
237 | u64 por; | |
238 | ||
239 | if (!system_supports_poe()) | |
240 | return true; | |
241 | ||
242 | por = read_sysreg_s(SYS_POR_EL0); | |
243 | ||
244 | if (write) | |
245 | return por_elx_allows_write(por, pkey); | |
246 | ||
247 | if (execute) | |
248 | return por_elx_allows_exec(por, pkey); | |
249 | ||
250 | return por_elx_allows_read(por, pkey); | |
251 | } | |
252 | ||
6218f96c | 253 | /* |
18107f8a VM |
254 | * p??_access_permitted() is true for valid user mappings (PTE_USER |
255 | * bit set, subject to the write permission check). For execute-only | |
256 | * mappings, like PROT_EXEC with EPAN (both PTE_USER and PTE_UXN bits | |
257 | * not set) must return false. PROT_NONE mappings do not have the | |
258 | * PTE_VALID bit set. | |
6218f96c | 259 | */ |
fc2d9cd3 | 260 | #define pte_access_permitted_no_overlay(pte, write) \ |
18107f8a | 261 | (((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) && (!(write) || pte_write(pte))) |
fc2d9cd3 | 262 | #define pte_access_permitted(pte, write) \ |
7f955be9 JG |
263 | (pte_access_permitted_no_overlay(pte, write) && \ |
264 | por_el0_allows_pkey(FIELD_GET(PTE_PO_IDX_MASK, pte_val(pte)), write, false)) | |
6218f96c CM |
265 | #define pmd_access_permitted(pmd, write) \ |
266 | (pte_access_permitted(pmd_pte(pmd), (write))) | |
267 | #define pud_access_permitted(pud, write) \ | |
268 | (pte_access_permitted(pud_pte(pud), (write))) | |
269 | ||
b6d4f280 | 270 | static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot) |
44b6dfc5 | 271 | { |
b6d4f280 | 272 | pte_val(pte) &= ~pgprot_val(prot); |
44b6dfc5 SC |
273 | return pte; |
274 | } | |
275 | ||
b6d4f280 | 276 | static inline pte_t set_pte_bit(pte_t pte, pgprot_t prot) |
44b6dfc5 | 277 | { |
b6d4f280 | 278 | pte_val(pte) |= pgprot_val(prot); |
44b6dfc5 SC |
279 | return pte; |
280 | } | |
281 | ||
b65399f6 AK |
282 | static inline pmd_t clear_pmd_bit(pmd_t pmd, pgprot_t prot) |
283 | { | |
284 | pmd_val(pmd) &= ~pgprot_val(prot); | |
285 | return pmd; | |
286 | } | |
287 | ||
288 | static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot) | |
289 | { | |
290 | pmd_val(pmd) |= pgprot_val(prot); | |
291 | return pmd; | |
292 | } | |
293 | ||
2f0584f3 | 294 | static inline pte_t pte_mkwrite_novma(pte_t pte) |
b6d4f280 | 295 | { |
73e86cb0 CM |
296 | pte = set_pte_bit(pte, __pgprot(PTE_WRITE)); |
297 | pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY)); | |
298 | return pte; | |
b6d4f280 LA |
299 | } |
300 | ||
44b6dfc5 SC |
301 | static inline pte_t pte_mkclean(pte_t pte) |
302 | { | |
8781bcbc SC |
303 | pte = clear_pte_bit(pte, __pgprot(PTE_DIRTY)); |
304 | pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); | |
305 | ||
306 | return pte; | |
44b6dfc5 SC |
307 | } |
308 | ||
309 | static inline pte_t pte_mkdirty(pte_t pte) | |
310 | { | |
8781bcbc SC |
311 | pte = set_pte_bit(pte, __pgprot(PTE_DIRTY)); |
312 | ||
313 | if (pte_write(pte)) | |
314 | pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY)); | |
315 | ||
316 | return pte; | |
44b6dfc5 SC |
317 | } |
318 | ||
ff1712f9 WD |
319 | static inline pte_t pte_wrprotect(pte_t pte) |
320 | { | |
321 | /* | |
322 | * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY | |
323 | * clear), set the PTE_DIRTY bit. | |
324 | */ | |
325 | if (pte_hw_dirty(pte)) | |
6477c388 | 326 | pte = set_pte_bit(pte, __pgprot(PTE_DIRTY)); |
ff1712f9 WD |
327 | |
328 | pte = clear_pte_bit(pte, __pgprot(PTE_WRITE)); | |
329 | pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); | |
330 | return pte; | |
331 | } | |
332 | ||
44b6dfc5 SC |
333 | static inline pte_t pte_mkold(pte_t pte) |
334 | { | |
b6d4f280 | 335 | return clear_pte_bit(pte, __pgprot(PTE_AF)); |
44b6dfc5 SC |
336 | } |
337 | ||
338 | static inline pte_t pte_mkyoung(pte_t pte) | |
339 | { | |
b6d4f280 | 340 | return set_pte_bit(pte, __pgprot(PTE_AF)); |
44b6dfc5 SC |
341 | } |
342 | ||
343 | static inline pte_t pte_mkspecial(pte_t pte) | |
344 | { | |
b6d4f280 | 345 | return set_pte_bit(pte, __pgprot(PTE_SPECIAL)); |
44b6dfc5 | 346 | } |
4f04d8f0 | 347 | |
93ef666a JL |
348 | static inline pte_t pte_mkcont(pte_t pte) |
349 | { | |
ced84170 | 350 | return set_pte_bit(pte, __pgprot(PTE_CONT)); |
93ef666a JL |
351 | } |
352 | ||
353 | static inline pte_t pte_mknoncont(pte_t pte) | |
354 | { | |
355 | return clear_pte_bit(pte, __pgprot(PTE_CONT)); | |
356 | } | |
357 | ||
16922658 | 358 | static inline pte_t pte_mkvalid(pte_t pte) |
5ebe3a44 JM |
359 | { |
360 | return set_pte_bit(pte, __pgprot(PTE_VALID)); | |
361 | } | |
362 | ||
b28c74e2 RR |
363 | static inline pte_t pte_mkinvalid(pte_t pte) |
364 | { | |
365 | pte = set_pte_bit(pte, __pgprot(PTE_PRESENT_INVALID)); | |
366 | pte = clear_pte_bit(pte, __pgprot(PTE_VALID)); | |
367 | return pte; | |
368 | } | |
369 | ||
66b3923a DW |
370 | static inline pmd_t pmd_mkcont(pmd_t pmd) |
371 | { | |
372 | return __pmd(pmd_val(pmd) | PMD_SECT_CONT); | |
373 | } | |
374 | ||
73b20c84 RM |
375 | static inline pte_t pte_mkdevmap(pte_t pte) |
376 | { | |
30e23538 | 377 | return set_pte_bit(pte, __pgprot(PTE_DEVMAP | PTE_SPECIAL)); |
73b20c84 RM |
378 | } |
379 | ||
5b32510a RR |
380 | #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP |
381 | static inline int pte_uffd_wp(pte_t pte) | |
382 | { | |
383 | return !!(pte_val(pte) & PTE_UFFD_WP); | |
384 | } | |
385 | ||
386 | static inline pte_t pte_mkuffd_wp(pte_t pte) | |
387 | { | |
388 | return pte_wrprotect(set_pte_bit(pte, __pgprot(PTE_UFFD_WP))); | |
389 | } | |
390 | ||
391 | static inline pte_t pte_clear_uffd_wp(pte_t pte) | |
392 | { | |
393 | return clear_pte_bit(pte, __pgprot(PTE_UFFD_WP)); | |
394 | } | |
395 | #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ | |
396 | ||
1fcb7cea | 397 | static inline void __set_pte_nosync(pte_t *ptep, pte_t pte) |
4f04d8f0 | 398 | { |
20a004e7 | 399 | WRITE_ONCE(*ptep, pte); |
1fcb7cea RR |
400 | } |
401 | ||
f89b399e | 402 | static inline void __set_pte_complete(pte_t pte) |
1fcb7cea | 403 | { |
7f0b1bf0 CM |
404 | /* |
405 | * Only if the new pte is valid and kernel, otherwise TLB maintenance | |
f89b399e | 406 | * has the necessary barriers. |
7f0b1bf0 | 407 | */ |
5fdd05ef RR |
408 | if (pte_valid_not_user(pte)) |
409 | queue_pte_barriers(); | |
4f04d8f0 CM |
410 | } |
411 | ||
f89b399e RR |
412 | static inline void __set_pte(pte_t *ptep, pte_t pte) |
413 | { | |
414 | __set_pte_nosync(ptep, pte); | |
415 | __set_pte_complete(pte); | |
416 | } | |
417 | ||
5a00bfd6 | 418 | static inline pte_t __ptep_get(pte_t *ptep) |
53273655 RR |
419 | { |
420 | return READ_ONCE(*ptep); | |
421 | } | |
422 | ||
907e21c1 | 423 | extern void __sync_icache_dcache(pte_t pteval); |
8ef41786 | 424 | bool pgattr_change_is_safe(pteval_t old, pteval_t new); |
4f04d8f0 | 425 | |
2f4b829c CM |
426 | /* |
427 | * PTE bits configuration in the presence of hardware Dirty Bit Management | |
428 | * (PTE_WRITE == PTE_DBM): | |
429 | * | |
430 | * Dirty Writable | PTE_RDONLY PTE_WRITE PTE_DIRTY (sw) | |
431 | * 0 0 | 1 0 0 | |
432 | * 0 1 | 1 1 0 | |
433 | * 1 0 | 1 0 1 | |
434 | * 1 1 | 0 1 x | |
435 | * | |
436 | * When hardware DBM is not present, the sofware PTE_DIRTY bit is updated via | |
437 | * the page fault mechanism. Checking the dirty status of a pte becomes: | |
438 | * | |
b847415c | 439 | * PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY) |
2f4b829c | 440 | */ |
9b604722 | 441 | |
004fc58f | 442 | static inline void __check_safe_pte_update(struct mm_struct *mm, pte_t *ptep, |
9b604722 | 443 | pte_t pte) |
4f04d8f0 | 444 | { |
20a004e7 WD |
445 | pte_t old_pte; |
446 | ||
9b604722 MR |
447 | if (!IS_ENABLED(CONFIG_DEBUG_VM)) |
448 | return; | |
449 | ||
5a00bfd6 | 450 | old_pte = __ptep_get(ptep); |
9b604722 MR |
451 | |
452 | if (!pte_valid(old_pte) || !pte_valid(pte)) | |
453 | return; | |
454 | if (mm != current->active_mm && atomic_read(&mm->mm_users) <= 1) | |
455 | return; | |
02522463 | 456 | |
2f4b829c | 457 | /* |
9b604722 | 458 | * Check for potential race with hardware updates of the pte |
5a00bfd6 | 459 | * (__ptep_set_access_flags safely changes valid ptes without going |
9b604722 | 460 | * through an invalid entry). |
2f4b829c | 461 | */ |
9b604722 MR |
462 | VM_WARN_ONCE(!pte_young(pte), |
463 | "%s: racy access flag clearing: 0x%016llx -> 0x%016llx", | |
464 | __func__, pte_val(old_pte), pte_val(pte)); | |
465 | VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte), | |
466 | "%s: racy dirty state clearing: 0x%016llx -> 0x%016llx", | |
467 | __func__, pte_val(old_pte), pte_val(pte)); | |
004fc58f AK |
468 | VM_WARN_ONCE(!pgattr_change_is_safe(pte_val(old_pte), pte_val(pte)), |
469 | "%s: unsafe attribute change: 0x%016llx -> 0x%016llx", | |
470 | __func__, pte_val(old_pte), pte_val(pte)); | |
9b604722 MR |
471 | } |
472 | ||
3425cec4 | 473 | static inline void __sync_cache_and_tags(pte_t pte, unsigned int nr_pages) |
9b604722 MR |
474 | { |
475 | if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte)) | |
476 | __sync_icache_dcache(pte); | |
477 | ||
69e3b846 SP |
478 | /* |
479 | * If the PTE would provide user space access to the tags associated | |
480 | * with it then ensure that the MTE tags are synchronised. Although | |
fc2d9cd3 JG |
481 | * pte_access_permitted_no_overlay() returns false for exec only |
482 | * mappings, they don't expose tags (instruction fetches don't check | |
483 | * tags). | |
69e3b846 | 484 | */ |
fc2d9cd3 | 485 | if (system_supports_mte() && pte_access_permitted_no_overlay(pte, false) && |
332c151c | 486 | !pte_special(pte) && pte_tagged(pte)) |
3425cec4 | 487 | mte_sync_tags(pte, nr_pages); |
4f04d8f0 CM |
488 | } |
489 | ||
6e8f5887 RR |
490 | /* |
491 | * Select all bits except the pfn | |
492 | */ | |
0515e022 | 493 | #define pte_pgprot pte_pgprot |
6e8f5887 RR |
494 | static inline pgprot_t pte_pgprot(pte_t pte) |
495 | { | |
496 | unsigned long pfn = pte_pfn(pte); | |
497 | ||
498 | return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte)); | |
499 | } | |
500 | ||
c1bd2b40 RR |
501 | #define pte_advance_pfn pte_advance_pfn |
502 | static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr) | |
6e8f5887 | 503 | { |
c1bd2b40 | 504 | return pfn_pte(pte_pfn(pte) + nr, pte_pgprot(pte)); |
6e8f5887 RR |
505 | } |
506 | ||
084bd298 SC |
507 | /* |
508 | * Hugetlb definitions. | |
509 | */ | |
66b3923a | 510 | #define HUGE_MAX_HSTATE 4 |
084bd298 SC |
511 | #define HPAGE_SHIFT PMD_SHIFT |
512 | #define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT) | |
513 | #define HPAGE_MASK (~(HPAGE_SIZE - 1)) | |
514 | #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) | |
4f04d8f0 | 515 | |
75387b92 KM |
516 | static inline pte_t pgd_pte(pgd_t pgd) |
517 | { | |
518 | return __pte(pgd_val(pgd)); | |
519 | } | |
520 | ||
e9f63768 MR |
521 | static inline pte_t p4d_pte(p4d_t p4d) |
522 | { | |
523 | return __pte(p4d_val(p4d)); | |
524 | } | |
525 | ||
29e56940 SC |
526 | static inline pte_t pud_pte(pud_t pud) |
527 | { | |
528 | return __pte(pud_val(pud)); | |
529 | } | |
530 | ||
eb3f0624 PA |
531 | static inline pud_t pte_pud(pte_t pte) |
532 | { | |
533 | return __pud(pte_val(pte)); | |
534 | } | |
535 | ||
29e56940 SC |
536 | static inline pmd_t pud_pmd(pud_t pud) |
537 | { | |
538 | return __pmd(pud_val(pud)); | |
539 | } | |
540 | ||
9c7e535f SC |
541 | static inline pte_t pmd_pte(pmd_t pmd) |
542 | { | |
543 | return __pte(pmd_val(pmd)); | |
544 | } | |
af074848 | 545 | |
9c7e535f SC |
546 | static inline pmd_t pte_pmd(pte_t pte) |
547 | { | |
548 | return __pmd(pte_val(pte)); | |
549 | } | |
af074848 | 550 | |
f7f0097a | 551 | static inline pgprot_t mk_pud_sect_prot(pgprot_t prot) |
8ce837ce | 552 | { |
dba95480 | 553 | return __pgprot((pgprot_val(prot) & ~PUD_TYPE_MASK) | PUD_TYPE_SECT); |
f7f0097a AK |
554 | } |
555 | ||
556 | static inline pgprot_t mk_pmd_sect_prot(pgprot_t prot) | |
8ce837ce | 557 | { |
dba95480 | 558 | return __pgprot((pgprot_val(prot) & ~PMD_TYPE_MASK) | PMD_TYPE_SECT); |
8ce837ce AB |
559 | } |
560 | ||
570ef363 DH |
561 | static inline pte_t pte_swp_mkexclusive(pte_t pte) |
562 | { | |
563 | return set_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE)); | |
564 | } | |
565 | ||
403d1338 | 566 | static inline bool pte_swp_exclusive(pte_t pte) |
570ef363 DH |
567 | { |
568 | return pte_val(pte) & PTE_SWP_EXCLUSIVE; | |
569 | } | |
570 | ||
571 | static inline pte_t pte_swp_clear_exclusive(pte_t pte) | |
572 | { | |
573 | return clear_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE)); | |
574 | } | |
575 | ||
5b32510a RR |
576 | #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP |
577 | static inline pte_t pte_swp_mkuffd_wp(pte_t pte) | |
578 | { | |
579 | return set_pte_bit(pte, __pgprot(PTE_SWP_UFFD_WP)); | |
580 | } | |
581 | ||
582 | static inline int pte_swp_uffd_wp(pte_t pte) | |
583 | { | |
584 | return !!(pte_val(pte) & PTE_SWP_UFFD_WP); | |
585 | } | |
586 | ||
587 | static inline pte_t pte_swp_clear_uffd_wp(pte_t pte) | |
588 | { | |
589 | return clear_pte_bit(pte, __pgprot(PTE_SWP_UFFD_WP)); | |
590 | } | |
591 | #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ | |
592 | ||
56166230 GK |
593 | #ifdef CONFIG_NUMA_BALANCING |
594 | /* | |
ca5999fd | 595 | * See the comment in include/linux/pgtable.h |
56166230 GK |
596 | */ |
597 | static inline int pte_protnone(pte_t pte) | |
598 | { | |
f0f5863a RR |
599 | /* |
600 | * pte_present_invalid() tells us that the pte is invalid from HW | |
601 | * perspective but present from SW perspective, so the fields are to be | |
602 | * interpretted as per the HW layout. The second 2 checks are the unique | |
603 | * encoding that we use for PROT_NONE. It is insufficient to only use | |
604 | * the first check because we share the same encoding scheme with pmds | |
605 | * which support pmd_mkinvalid(), so can be present-invalid without | |
606 | * being PROT_NONE. | |
607 | */ | |
608 | return pte_present_invalid(pte) && !pte_user(pte) && !pte_user_exec(pte); | |
56166230 GK |
609 | } |
610 | ||
611 | static inline int pmd_protnone(pmd_t pmd) | |
612 | { | |
613 | return pte_protnone(pmd_pte(pmd)); | |
614 | } | |
615 | #endif | |
616 | ||
f0f5863a | 617 | #define pmd_present(pmd) pte_present(pmd_pte(pmd)) |
c164e038 | 618 | #define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) |
9c7e535f | 619 | #define pmd_young(pmd) pte_young(pmd_pte(pmd)) |
0795edaf | 620 | #define pmd_valid(pmd) pte_valid(pmd_pte(pmd)) |
42b25471 KW |
621 | #define pmd_user(pmd) pte_user(pmd_pte(pmd)) |
622 | #define pmd_user_exec(pmd) pte_user_exec(pmd_pte(pmd)) | |
d55863db | 623 | #define pmd_cont(pmd) pte_cont(pmd_pte(pmd)) |
9c7e535f | 624 | #define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd))) |
9c7e535f | 625 | #define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd))) |
2f0584f3 | 626 | #define pmd_mkwrite_novma(pmd) pte_pmd(pte_mkwrite_novma(pmd_pte(pmd))) |
ab4db1f2 | 627 | #define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd))) |
9c7e535f SC |
628 | #define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) |
629 | #define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) | |
b28c74e2 | 630 | #define pmd_mkinvalid(pmd) pte_pmd(pte_mkinvalid(pmd_pte(pmd))) |
5b32510a RR |
631 | #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP |
632 | #define pmd_uffd_wp(pmd) pte_uffd_wp(pmd_pte(pmd)) | |
633 | #define pmd_mkuffd_wp(pmd) pte_pmd(pte_mkuffd_wp(pmd_pte(pmd))) | |
634 | #define pmd_clear_uffd_wp(pmd) pte_pmd(pte_clear_uffd_wp(pmd_pte(pmd))) | |
635 | #define pmd_swp_uffd_wp(pmd) pte_swp_uffd_wp(pmd_pte(pmd)) | |
636 | #define pmd_swp_mkuffd_wp(pmd) pte_pmd(pte_swp_mkuffd_wp(pmd_pte(pmd))) | |
637 | #define pmd_swp_clear_uffd_wp(pmd) \ | |
638 | pte_pmd(pte_swp_clear_uffd_wp(pmd_pte(pmd))) | |
639 | #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ | |
af074848 | 640 | |
9c7e535f | 641 | #define pmd_write(pmd) pte_write(pmd_pte(pmd)) |
af074848 | 642 | |
1601df9e AK |
643 | static inline pmd_t pmd_mkhuge(pmd_t pmd) |
644 | { | |
645 | /* | |
646 | * It's possible that the pmd is present-invalid on entry | |
647 | * and in that case it needs to remain present-invalid on | |
648 | * exit. So ensure the VALID bit does not get modified. | |
649 | */ | |
650 | pmdval_t mask = PMD_TYPE_MASK & ~PTE_VALID; | |
651 | pmdval_t val = PMD_TYPE_SECT & ~PTE_VALID; | |
652 | ||
653 | return __pmd((pmd_val(pmd) & ~mask) | val); | |
654 | } | |
af074848 | 655 | |
73b20c84 RM |
656 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
657 | #define pmd_devmap(pmd) pte_devmap(pmd_pte(pmd)) | |
658 | #endif | |
30e23538 JH |
659 | static inline pmd_t pmd_mkdevmap(pmd_t pmd) |
660 | { | |
661 | return pte_pmd(set_pte_bit(pmd_pte(pmd), __pgprot(PTE_DEVMAP))); | |
662 | } | |
73b20c84 | 663 | |
3e509c9b PX |
664 | #ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP |
665 | #define pmd_special(pte) (!!((pmd_val(pte) & PTE_SPECIAL))) | |
666 | static inline pmd_t pmd_mkspecial(pmd_t pmd) | |
667 | { | |
668 | return set_pmd_bit(pmd, __pgprot(PTE_SPECIAL)); | |
669 | } | |
670 | #endif | |
671 | ||
75387b92 KM |
672 | #define __pmd_to_phys(pmd) __pte_to_phys(pmd_pte(pmd)) |
673 | #define __phys_to_pmd_val(phys) __phys_to_pte_val(phys) | |
674 | #define pmd_pfn(pmd) ((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT) | |
675 | #define pfn_pmd(pfn,prot) __pmd(__phys_to_pmd_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)) | |
af074848 | 676 | |
35a63966 | 677 | #define pud_young(pud) pte_young(pud_pte(pud)) |
eb3f0624 | 678 | #define pud_mkyoung(pud) pte_pud(pte_mkyoung(pud_pte(pud))) |
29e56940 | 679 | #define pud_write(pud) pte_write(pud_pte(pud)) |
75387b92 | 680 | |
1601df9e AK |
681 | static inline pud_t pud_mkhuge(pud_t pud) |
682 | { | |
683 | /* | |
684 | * It's possible that the pud is present-invalid on entry | |
685 | * and in that case it needs to remain present-invalid on | |
686 | * exit. So ensure the VALID bit does not get modified. | |
687 | */ | |
688 | pudval_t mask = PUD_TYPE_MASK & ~PTE_VALID; | |
689 | pudval_t val = PUD_TYPE_SECT & ~PTE_VALID; | |
690 | ||
691 | return __pud((pud_val(pud) & ~mask) | val); | |
692 | } | |
b8e0ba7c | 693 | |
75387b92 KM |
694 | #define __pud_to_phys(pud) __pte_to_phys(pud_pte(pud)) |
695 | #define __phys_to_pud_val(phys) __phys_to_pte_val(phys) | |
696 | #define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT) | |
697 | #define pfn_pud(pfn,prot) __pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)) | |
af074848 | 698 | |
3e509c9b PX |
699 | #define pmd_pgprot pmd_pgprot |
700 | static inline pgprot_t pmd_pgprot(pmd_t pmd) | |
701 | { | |
702 | unsigned long pfn = pmd_pfn(pmd); | |
703 | ||
704 | return __pgprot(pmd_val(pfn_pmd(pfn, __pgprot(0))) ^ pmd_val(pmd)); | |
705 | } | |
706 | ||
707 | #define pud_pgprot pud_pgprot | |
708 | static inline pgprot_t pud_pgprot(pud_t pud) | |
709 | { | |
710 | unsigned long pfn = pud_pfn(pud); | |
711 | ||
712 | return __pgprot(pud_val(pfn_pud(pfn, __pgprot(0))) ^ pud_val(pud)); | |
713 | } | |
714 | ||
ef493d23 RR |
715 | static inline void __set_ptes_anysz(struct mm_struct *mm, pte_t *ptep, |
716 | pte_t pte, unsigned int nr, | |
717 | unsigned long pgsize) | |
718 | { | |
719 | unsigned long stride = pgsize >> PAGE_SHIFT; | |
720 | ||
721 | switch (pgsize) { | |
722 | case PAGE_SIZE: | |
723 | page_table_check_ptes_set(mm, ptep, pte, nr); | |
724 | break; | |
725 | case PMD_SIZE: | |
726 | page_table_check_pmds_set(mm, (pmd_t *)ptep, pte_pmd(pte), nr); | |
727 | break; | |
728 | #ifndef __PAGETABLE_PMD_FOLDED | |
729 | case PUD_SIZE: | |
730 | page_table_check_puds_set(mm, (pud_t *)ptep, pte_pud(pte), nr); | |
731 | break; | |
732 | #endif | |
733 | default: | |
734 | VM_WARN_ON(1); | |
735 | } | |
736 | ||
737 | __sync_cache_and_tags(pte, nr * stride); | |
738 | ||
739 | for (;;) { | |
740 | __check_safe_pte_update(mm, ptep, pte); | |
f89b399e | 741 | __set_pte_nosync(ptep, pte); |
ef493d23 RR |
742 | if (--nr == 0) |
743 | break; | |
744 | ptep++; | |
745 | pte = pte_advance_pfn(pte, stride); | |
746 | } | |
f89b399e RR |
747 | |
748 | __set_pte_complete(pte); | |
ef493d23 RR |
749 | } |
750 | ||
751 | static inline void __set_ptes(struct mm_struct *mm, | |
752 | unsigned long __always_unused addr, | |
753 | pte_t *ptep, pte_t pte, unsigned int nr) | |
3425cec4 | 754 | { |
ef493d23 | 755 | __set_ptes_anysz(mm, ptep, pte, nr, PAGE_SIZE); |
3425cec4 RR |
756 | } |
757 | ||
ef493d23 RR |
758 | static inline void __set_pmds(struct mm_struct *mm, |
759 | unsigned long __always_unused addr, | |
760 | pmd_t *pmdp, pmd_t pmd, unsigned int nr) | |
42b25471 | 761 | { |
ef493d23 | 762 | __set_ptes_anysz(mm, (pte_t *)pmdp, pmd_pte(pmd), nr, PMD_SIZE); |
42b25471 | 763 | } |
ef493d23 | 764 | #define set_pmd_at(mm, addr, pmdp, pmd) __set_pmds(mm, addr, pmdp, pmd, 1) |
42b25471 | 765 | |
ef493d23 RR |
766 | static inline void __set_puds(struct mm_struct *mm, |
767 | unsigned long __always_unused addr, | |
768 | pud_t *pudp, pud_t pud, unsigned int nr) | |
42b25471 | 769 | { |
ef493d23 | 770 | __set_ptes_anysz(mm, (pte_t *)pudp, pud_pte(pud), nr, PUD_SIZE); |
42b25471 | 771 | } |
ef493d23 | 772 | #define set_pud_at(mm, addr, pudp, pud) __set_puds(mm, addr, pudp, pud, 1) |
af074848 | 773 | |
e9f63768 MR |
774 | #define __p4d_to_phys(p4d) __pte_to_phys(p4d_pte(p4d)) |
775 | #define __phys_to_p4d_val(phys) __phys_to_pte_val(phys) | |
776 | ||
75387b92 KM |
777 | #define __pgd_to_phys(pgd) __pte_to_phys(pgd_pte(pgd)) |
778 | #define __phys_to_pgd_val(phys) __phys_to_pte_val(phys) | |
779 | ||
a501e324 CM |
780 | #define __pgprot_modify(prot,mask,bits) \ |
781 | __pgprot((pgprot_val(prot) & ~(mask)) | (bits)) | |
782 | ||
cca98e9f | 783 | #define pgprot_nx(prot) \ |
034aa9cd | 784 | __pgprot_modify(prot, PTE_MAYBE_GP, PTE_PXN) |
cca98e9f | 785 | |
42be24a4 SP |
786 | #define pgprot_decrypted(prot) \ |
787 | __pgprot_modify(prot, PROT_NS_SHARED, PROT_NS_SHARED) | |
788 | #define pgprot_encrypted(prot) \ | |
789 | __pgprot_modify(prot, PROT_NS_SHARED, 0) | |
790 | ||
4f04d8f0 CM |
791 | /* |
792 | * Mark the prot value as uncacheable and unbufferable. | |
793 | */ | |
794 | #define pgprot_noncached(prot) \ | |
de2db743 | 795 | __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRnE) | PTE_PXN | PTE_UXN) |
4f04d8f0 | 796 | #define pgprot_writecombine(prot) \ |
de2db743 | 797 | __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN) |
d1e6dc91 LD |
798 | #define pgprot_device(prot) \ |
799 | __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRE) | PTE_PXN | PTE_UXN) | |
d15dfd31 CM |
800 | #define pgprot_tagged(prot) \ |
801 | __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_TAGGED)) | |
802 | #define pgprot_mhp pgprot_tagged | |
3e4e1d3f CH |
803 | /* |
804 | * DMA allocations for non-coherent devices use what the Arm architecture calls | |
805 | * "Normal non-cacheable" memory, which permits speculation, unaligned accesses | |
806 | * and merging of writes. This is different from "Device-nGnR[nE]" memory which | |
807 | * is intended for MMIO and thus forbids speculation, preserves access size, | |
808 | * requires strict alignment and can also force write responses to come from the | |
809 | * endpoint. | |
810 | */ | |
419e2f18 CH |
811 | #define pgprot_dmacoherent(prot) \ |
812 | __pgprot_modify(prot, PTE_ATTRINDX_MASK, \ | |
813 | PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN) | |
814 | ||
4f04d8f0 CM |
815 | #define __HAVE_PHYS_MEM_ACCESS_PROT |
816 | struct file; | |
817 | extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, | |
818 | unsigned long size, pgprot_t vma_prot); | |
819 | ||
820 | #define pmd_none(pmd) (!pmd_val(pmd)) | |
4f04d8f0 | 821 | |
36311607 MZ |
822 | #define pmd_table(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \ |
823 | PMD_TYPE_TABLE) | |
824 | #define pmd_sect(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \ | |
825 | PMD_TYPE_SECT) | |
23bc8f69 | 826 | #define pmd_leaf(pmd) (pmd_present(pmd) && !pmd_table(pmd)) |
e377ab82 | 827 | #define pmd_bad(pmd) (!pmd_table(pmd)) |
36311607 | 828 | |
d55863db PZ |
829 | #define pmd_leaf_size(pmd) (pmd_cont(pmd) ? CONT_PMD_SIZE : PMD_SIZE) |
830 | #define pte_leaf_size(pte) (pte_cont(pte) ? CONT_PTE_SIZE : PAGE_SIZE) | |
831 | ||
d1770e90 RR |
832 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
833 | static inline int pmd_trans_huge(pmd_t pmd) | |
834 | { | |
835 | /* | |
836 | * If pmd is present-invalid, pmd_table() won't detect it | |
837 | * as a table, so force the valid bit for the comparison. | |
838 | */ | |
13c63ce3 | 839 | return pmd_present(pmd) && !pmd_table(__pmd(pmd_val(pmd) | PTE_VALID)); |
d1770e90 RR |
840 | } |
841 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | |
842 | ||
cac4b8cd | 843 | #if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS < 3 |
7d4e2dcf QC |
844 | static inline bool pud_sect(pud_t pud) { return false; } |
845 | static inline bool pud_table(pud_t pud) { return true; } | |
206a2a73 SC |
846 | #else |
847 | #define pud_sect(pud) ((pud_val(pud) & PUD_TYPE_MASK) == \ | |
848 | PUD_TYPE_SECT) | |
523d6e9f | 849 | #define pud_table(pud) ((pud_val(pud) & PUD_TYPE_MASK) == \ |
850 | PUD_TYPE_TABLE) | |
206a2a73 | 851 | #endif |
36311607 | 852 | |
6ed8a3a0 AB |
853 | extern pgd_t swapper_pg_dir[]; |
854 | extern pgd_t idmap_pg_dir[]; | |
855 | extern pgd_t tramp_pg_dir[]; | |
856 | extern pgd_t reserved_pg_dir[]; | |
2330b7ca JY |
857 | |
858 | extern void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd); | |
859 | ||
860 | static inline bool in_swapper_pgdir(void *addr) | |
861 | { | |
862 | return ((unsigned long)addr & PAGE_MASK) == | |
863 | ((unsigned long)swapper_pg_dir & PAGE_MASK); | |
864 | } | |
865 | ||
4f04d8f0 CM |
866 | static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) |
867 | { | |
e9ed821b JM |
868 | #ifdef __PAGETABLE_PMD_FOLDED |
869 | if (in_swapper_pgdir(pmdp)) { | |
2330b7ca JY |
870 | set_swapper_pgd((pgd_t *)pmdp, __pgd(pmd_val(pmd))); |
871 | return; | |
872 | } | |
e9ed821b | 873 | #endif /* __PAGETABLE_PMD_FOLDED */ |
2330b7ca | 874 | |
20a004e7 | 875 | WRITE_ONCE(*pmdp, pmd); |
0795edaf | 876 | |
5fdd05ef RR |
877 | if (pmd_valid(pmd)) |
878 | queue_pte_barriers(); | |
4f04d8f0 CM |
879 | } |
880 | ||
881 | static inline void pmd_clear(pmd_t *pmdp) | |
882 | { | |
883 | set_pmd(pmdp, __pmd(0)); | |
884 | } | |
885 | ||
dca56dca | 886 | static inline phys_addr_t pmd_page_paddr(pmd_t pmd) |
4f04d8f0 | 887 | { |
75387b92 | 888 | return __pmd_to_phys(pmd); |
4f04d8f0 CM |
889 | } |
890 | ||
974b9b2c MR |
891 | static inline unsigned long pmd_page_vaddr(pmd_t pmd) |
892 | { | |
893 | return (unsigned long)__va(pmd_page_paddr(pmd)); | |
894 | } | |
74dd022f | 895 | |
053520f7 | 896 | /* Find an entry in the third-level page table. */ |
f069faba | 897 | #define pte_offset_phys(dir,addr) (pmd_page_paddr(READ_ONCE(*(dir))) + pte_index(addr) * sizeof(pte_t)) |
053520f7 | 898 | |
961faac1 MR |
899 | #define pte_set_fixmap(addr) ((pte_t *)set_fixmap_offset(FIX_PTE, addr)) |
900 | #define pte_set_fixmap_offset(pmd, addr) pte_set_fixmap(pte_offset_phys(pmd, addr)) | |
901 | #define pte_clear_fixmap() clear_fixmap(FIX_PTE) | |
902 | ||
68ecabd0 | 903 | #define pmd_page(pmd) phys_to_page(__pmd_to_phys(pmd)) |
4f04d8f0 | 904 | |
6533945a AB |
905 | /* use ONLY for statically allocated translation tables */ |
906 | #define pte_offset_kimg(dir,addr) ((pte_t *)__phys_to_kimg(pte_offset_phys((dir), (addr)))) | |
907 | ||
9f25e6ad | 908 | #if CONFIG_PGTABLE_LEVELS > 2 |
4f04d8f0 | 909 | |
2cf660eb GS |
910 | #define pmd_ERROR(e) \ |
911 | pr_err("%s:%d: bad pmd %016llx.\n", __FILE__, __LINE__, pmd_val(e)) | |
7078db46 | 912 | |
4f04d8f0 | 913 | #define pud_none(pud) (!pud_val(pud)) |
bfb1d2b9 RR |
914 | #define pud_bad(pud) ((pud_val(pud) & PUD_TYPE_MASK) != \ |
915 | PUD_TYPE_TABLE) | |
f02ab08a | 916 | #define pud_present(pud) pte_present(pud_pte(pud)) |
961a6ee5 | 917 | #ifndef __PAGETABLE_PMD_FOLDED |
23bc8f69 | 918 | #define pud_leaf(pud) (pud_present(pud) && !pud_table(pud)) |
961a6ee5 PX |
919 | #else |
920 | #define pud_leaf(pud) false | |
921 | #endif | |
0795edaf | 922 | #define pud_valid(pud) pte_valid(pud_pte(pud)) |
42b25471 | 923 | #define pud_user(pud) pte_user(pud_pte(pud)) |
730a11f9 | 924 | #define pud_user_exec(pud) pte_user_exec(pud_pte(pud)) |
4f04d8f0 | 925 | |
90e636f6 AB |
926 | static inline bool pgtable_l4_enabled(void); |
927 | ||
4f04d8f0 CM |
928 | static inline void set_pud(pud_t *pudp, pud_t pud) |
929 | { | |
90e636f6 | 930 | if (!pgtable_l4_enabled() && in_swapper_pgdir(pudp)) { |
2330b7ca JY |
931 | set_swapper_pgd((pgd_t *)pudp, __pgd(pud_val(pud))); |
932 | return; | |
933 | } | |
934 | ||
20a004e7 | 935 | WRITE_ONCE(*pudp, pud); |
0795edaf | 936 | |
5fdd05ef RR |
937 | if (pud_valid(pud)) |
938 | queue_pte_barriers(); | |
4f04d8f0 CM |
939 | } |
940 | ||
941 | static inline void pud_clear(pud_t *pudp) | |
942 | { | |
943 | set_pud(pudp, __pud(0)); | |
944 | } | |
945 | ||
dca56dca | 946 | static inline phys_addr_t pud_page_paddr(pud_t pud) |
4f04d8f0 | 947 | { |
75387b92 | 948 | return __pud_to_phys(pud); |
4f04d8f0 CM |
949 | } |
950 | ||
9cf6fa24 | 951 | static inline pmd_t *pud_pgtable(pud_t pud) |
974b9b2c | 952 | { |
9cf6fa24 | 953 | return (pmd_t *)__va(pud_page_paddr(pud)); |
974b9b2c | 954 | } |
7078db46 | 955 | |
974b9b2c | 956 | /* Find an entry in the second-level page table. */ |
20a004e7 | 957 | #define pmd_offset_phys(dir, addr) (pud_page_paddr(READ_ONCE(*(dir))) + pmd_index(addr) * sizeof(pmd_t)) |
7078db46 | 958 | |
961faac1 MR |
959 | #define pmd_set_fixmap(addr) ((pmd_t *)set_fixmap_offset(FIX_PMD, addr)) |
960 | #define pmd_set_fixmap_offset(pud, addr) pmd_set_fixmap(pmd_offset_phys(pud, addr)) | |
961 | #define pmd_clear_fixmap() clear_fixmap(FIX_PMD) | |
7078db46 | 962 | |
68ecabd0 | 963 | #define pud_page(pud) phys_to_page(__pud_to_phys(pud)) |
29e56940 | 964 | |
6533945a AB |
965 | /* use ONLY for statically allocated translation tables */ |
966 | #define pmd_offset_kimg(dir,addr) ((pmd_t *)__phys_to_kimg(pmd_offset_phys((dir), (addr)))) | |
967 | ||
dca56dca MR |
968 | #else |
969 | ||
cb67ea12 | 970 | #define pud_valid(pud) false |
dca56dca | 971 | #define pud_page_paddr(pud) ({ BUILD_BUG(); 0; }) |
4e4ff23a | 972 | #define pud_user_exec(pud) pud_user(pud) /* Always 0 with folding */ |
dca56dca | 973 | |
961faac1 MR |
974 | /* Match pmd_offset folding in <asm/generic/pgtable-nopmd.h> */ |
975 | #define pmd_set_fixmap(addr) NULL | |
976 | #define pmd_set_fixmap_offset(pudp, addr) ((pmd_t *)pudp) | |
977 | #define pmd_clear_fixmap() | |
978 | ||
6533945a AB |
979 | #define pmd_offset_kimg(dir,addr) ((pmd_t *)dir) |
980 | ||
9f25e6ad | 981 | #endif /* CONFIG_PGTABLE_LEVELS > 2 */ |
4f04d8f0 | 982 | |
9f25e6ad | 983 | #if CONFIG_PGTABLE_LEVELS > 3 |
c79b954b | 984 | |
0dd4f60a AB |
985 | static __always_inline bool pgtable_l4_enabled(void) |
986 | { | |
987 | if (CONFIG_PGTABLE_LEVELS > 4 || !IS_ENABLED(CONFIG_ARM64_LPA2)) | |
988 | return true; | |
989 | if (!alternative_has_cap_likely(ARM64_ALWAYS_BOOT)) | |
990 | return vabits_actual == VA_BITS; | |
991 | return alternative_has_cap_unlikely(ARM64_HAS_VA52); | |
992 | } | |
993 | ||
994 | static inline bool mm_pud_folded(const struct mm_struct *mm) | |
995 | { | |
996 | return !pgtable_l4_enabled(); | |
997 | } | |
998 | #define mm_pud_folded mm_pud_folded | |
999 | ||
2cf660eb GS |
1000 | #define pud_ERROR(e) \ |
1001 | pr_err("%s:%d: bad pud %016llx.\n", __FILE__, __LINE__, pud_val(e)) | |
7078db46 | 1002 | |
0dd4f60a | 1003 | #define p4d_none(p4d) (pgtable_l4_enabled() && !p4d_val(p4d)) |
4fa8a9c0 AK |
1004 | #define p4d_bad(p4d) (pgtable_l4_enabled() && \ |
1005 | ((p4d_val(p4d) & P4D_TYPE_MASK) != \ | |
1006 | P4D_TYPE_TABLE)) | |
0dd4f60a | 1007 | #define p4d_present(p4d) (!p4d_none(p4d)) |
c79b954b | 1008 | |
e9f63768 | 1009 | static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) |
c79b954b | 1010 | { |
e9f63768 MR |
1011 | if (in_swapper_pgdir(p4dp)) { |
1012 | set_swapper_pgd((pgd_t *)p4dp, __pgd(p4d_val(p4d))); | |
2330b7ca JY |
1013 | return; |
1014 | } | |
1015 | ||
e9f63768 | 1016 | WRITE_ONCE(*p4dp, p4d); |
5fdd05ef | 1017 | queue_pte_barriers(); |
c79b954b JL |
1018 | } |
1019 | ||
e9f63768 | 1020 | static inline void p4d_clear(p4d_t *p4dp) |
c79b954b | 1021 | { |
0dd4f60a AB |
1022 | if (pgtable_l4_enabled()) |
1023 | set_p4d(p4dp, __p4d(0)); | |
c79b954b JL |
1024 | } |
1025 | ||
e9f63768 | 1026 | static inline phys_addr_t p4d_page_paddr(p4d_t p4d) |
c79b954b | 1027 | { |
e9f63768 | 1028 | return __p4d_to_phys(p4d); |
c79b954b JL |
1029 | } |
1030 | ||
0dd4f60a AB |
1031 | #define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) |
1032 | ||
1033 | static inline pud_t *p4d_to_folded_pud(p4d_t *p4dp, unsigned long addr) | |
1034 | { | |
baec2397 AB |
1035 | /* Ensure that 'p4dp' indexes a page table according to 'addr' */ |
1036 | VM_BUG_ON(((addr >> P4D_SHIFT) ^ ((u64)p4dp >> 3)) % PTRS_PER_P4D); | |
1037 | ||
0dd4f60a AB |
1038 | return (pud_t *)PTR_ALIGN_DOWN(p4dp, PAGE_SIZE) + pud_index(addr); |
1039 | } | |
1040 | ||
dc4875f0 | 1041 | static inline pud_t *p4d_pgtable(p4d_t p4d) |
974b9b2c | 1042 | { |
dc4875f0 | 1043 | return (pud_t *)__va(p4d_page_paddr(p4d)); |
974b9b2c | 1044 | } |
7078db46 | 1045 | |
0dd4f60a AB |
1046 | static inline phys_addr_t pud_offset_phys(p4d_t *p4dp, unsigned long addr) |
1047 | { | |
1048 | BUG_ON(!pgtable_l4_enabled()); | |
7078db46 | 1049 | |
0dd4f60a AB |
1050 | return p4d_page_paddr(READ_ONCE(*p4dp)) + pud_index(addr) * sizeof(pud_t); |
1051 | } | |
7078db46 | 1052 | |
0dd4f60a AB |
1053 | static inline |
1054 | pud_t *pud_offset_lockless(p4d_t *p4dp, p4d_t p4d, unsigned long addr) | |
1055 | { | |
1056 | if (!pgtable_l4_enabled()) | |
1057 | return p4d_to_folded_pud(p4dp, addr); | |
1058 | return (pud_t *)__va(p4d_page_paddr(p4d)) + pud_index(addr); | |
1059 | } | |
1060 | #define pud_offset_lockless pud_offset_lockless | |
1061 | ||
1062 | static inline pud_t *pud_offset(p4d_t *p4dp, unsigned long addr) | |
1063 | { | |
1064 | return pud_offset_lockless(p4dp, READ_ONCE(*p4dp), addr); | |
1065 | } | |
1066 | #define pud_offset pud_offset | |
1067 | ||
1068 | static inline pud_t *pud_set_fixmap(unsigned long addr) | |
1069 | { | |
1070 | if (!pgtable_l4_enabled()) | |
1071 | return NULL; | |
1072 | return (pud_t *)set_fixmap_offset(FIX_PUD, addr); | |
1073 | } | |
1074 | ||
1075 | static inline pud_t *pud_set_fixmap_offset(p4d_t *p4dp, unsigned long addr) | |
1076 | { | |
1077 | if (!pgtable_l4_enabled()) | |
1078 | return p4d_to_folded_pud(p4dp, addr); | |
1079 | return pud_set_fixmap(pud_offset_phys(p4dp, addr)); | |
1080 | } | |
1081 | ||
1082 | static inline void pud_clear_fixmap(void) | |
1083 | { | |
1084 | if (pgtable_l4_enabled()) | |
1085 | clear_fixmap(FIX_PUD); | |
1086 | } | |
5d96e0cb | 1087 | |
6533945a | 1088 | /* use ONLY for statically allocated translation tables */ |
0dd4f60a AB |
1089 | static inline pud_t *pud_offset_kimg(p4d_t *p4dp, u64 addr) |
1090 | { | |
1091 | if (!pgtable_l4_enabled()) | |
1092 | return p4d_to_folded_pud(p4dp, addr); | |
1093 | return (pud_t *)__phys_to_kimg(pud_offset_phys(p4dp, addr)); | |
1094 | } | |
1095 | ||
1096 | #define p4d_page(p4d) pfn_to_page(__phys_to_pfn(__p4d_to_phys(p4d))) | |
6533945a | 1097 | |
dca56dca MR |
1098 | #else |
1099 | ||
0dd4f60a AB |
1100 | static inline bool pgtable_l4_enabled(void) { return false; } |
1101 | ||
e9f63768 | 1102 | #define p4d_page_paddr(p4d) ({ BUILD_BUG(); 0;}) |
dca56dca | 1103 | |
961faac1 MR |
1104 | /* Match pud_offset folding in <asm/generic/pgtable-nopud.h> */ |
1105 | #define pud_set_fixmap(addr) NULL | |
1106 | #define pud_set_fixmap_offset(pgdp, addr) ((pud_t *)pgdp) | |
1107 | #define pud_clear_fixmap() | |
1108 | ||
6533945a AB |
1109 | #define pud_offset_kimg(dir,addr) ((pud_t *)dir) |
1110 | ||
9f25e6ad | 1111 | #endif /* CONFIG_PGTABLE_LEVELS > 3 */ |
c79b954b | 1112 | |
a6bbf5d4 AB |
1113 | #if CONFIG_PGTABLE_LEVELS > 4 |
1114 | ||
1115 | static __always_inline bool pgtable_l5_enabled(void) | |
1116 | { | |
1117 | if (!alternative_has_cap_likely(ARM64_ALWAYS_BOOT)) | |
1118 | return vabits_actual == VA_BITS; | |
1119 | return alternative_has_cap_unlikely(ARM64_HAS_VA52); | |
1120 | } | |
1121 | ||
1122 | static inline bool mm_p4d_folded(const struct mm_struct *mm) | |
1123 | { | |
1124 | return !pgtable_l5_enabled(); | |
1125 | } | |
1126 | #define mm_p4d_folded mm_p4d_folded | |
1127 | ||
1128 | #define p4d_ERROR(e) \ | |
1129 | pr_err("%s:%d: bad p4d %016llx.\n", __FILE__, __LINE__, p4d_val(e)) | |
1130 | ||
1131 | #define pgd_none(pgd) (pgtable_l5_enabled() && !pgd_val(pgd)) | |
4fa8a9c0 AK |
1132 | #define pgd_bad(pgd) (pgtable_l5_enabled() && \ |
1133 | ((pgd_val(pgd) & PGD_TYPE_MASK) != \ | |
1134 | PGD_TYPE_TABLE)) | |
a6bbf5d4 AB |
1135 | #define pgd_present(pgd) (!pgd_none(pgd)) |
1136 | ||
1137 | static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) | |
1138 | { | |
1139 | if (in_swapper_pgdir(pgdp)) { | |
1140 | set_swapper_pgd(pgdp, __pgd(pgd_val(pgd))); | |
1141 | return; | |
1142 | } | |
1143 | ||
1144 | WRITE_ONCE(*pgdp, pgd); | |
5fdd05ef | 1145 | queue_pte_barriers(); |
a6bbf5d4 AB |
1146 | } |
1147 | ||
1148 | static inline void pgd_clear(pgd_t *pgdp) | |
1149 | { | |
1150 | if (pgtable_l5_enabled()) | |
1151 | set_pgd(pgdp, __pgd(0)); | |
1152 | } | |
1153 | ||
1154 | static inline phys_addr_t pgd_page_paddr(pgd_t pgd) | |
1155 | { | |
1156 | return __pgd_to_phys(pgd); | |
1157 | } | |
1158 | ||
1159 | #define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1)) | |
1160 | ||
1161 | static inline p4d_t *pgd_to_folded_p4d(pgd_t *pgdp, unsigned long addr) | |
1162 | { | |
baec2397 AB |
1163 | /* Ensure that 'pgdp' indexes a page table according to 'addr' */ |
1164 | VM_BUG_ON(((addr >> PGDIR_SHIFT) ^ ((u64)pgdp >> 3)) % PTRS_PER_PGD); | |
1165 | ||
a6bbf5d4 AB |
1166 | return (p4d_t *)PTR_ALIGN_DOWN(pgdp, PAGE_SIZE) + p4d_index(addr); |
1167 | } | |
1168 | ||
1169 | static inline phys_addr_t p4d_offset_phys(pgd_t *pgdp, unsigned long addr) | |
1170 | { | |
1171 | BUG_ON(!pgtable_l5_enabled()); | |
1172 | ||
1173 | return pgd_page_paddr(READ_ONCE(*pgdp)) + p4d_index(addr) * sizeof(p4d_t); | |
1174 | } | |
1175 | ||
1176 | static inline | |
1177 | p4d_t *p4d_offset_lockless(pgd_t *pgdp, pgd_t pgd, unsigned long addr) | |
1178 | { | |
1179 | if (!pgtable_l5_enabled()) | |
1180 | return pgd_to_folded_p4d(pgdp, addr); | |
1181 | return (p4d_t *)__va(pgd_page_paddr(pgd)) + p4d_index(addr); | |
1182 | } | |
1183 | #define p4d_offset_lockless p4d_offset_lockless | |
1184 | ||
1185 | static inline p4d_t *p4d_offset(pgd_t *pgdp, unsigned long addr) | |
1186 | { | |
1187 | return p4d_offset_lockless(pgdp, READ_ONCE(*pgdp), addr); | |
1188 | } | |
1189 | ||
6ed8a3a0 AB |
1190 | static inline p4d_t *p4d_set_fixmap(unsigned long addr) |
1191 | { | |
1192 | if (!pgtable_l5_enabled()) | |
1193 | return NULL; | |
1194 | return (p4d_t *)set_fixmap_offset(FIX_P4D, addr); | |
1195 | } | |
1196 | ||
1197 | static inline p4d_t *p4d_set_fixmap_offset(pgd_t *pgdp, unsigned long addr) | |
1198 | { | |
1199 | if (!pgtable_l5_enabled()) | |
1200 | return pgd_to_folded_p4d(pgdp, addr); | |
1201 | return p4d_set_fixmap(p4d_offset_phys(pgdp, addr)); | |
1202 | } | |
1203 | ||
1204 | static inline void p4d_clear_fixmap(void) | |
1205 | { | |
1206 | if (pgtable_l5_enabled()) | |
1207 | clear_fixmap(FIX_P4D); | |
1208 | } | |
1209 | ||
1210 | /* use ONLY for statically allocated translation tables */ | |
1211 | static inline p4d_t *p4d_offset_kimg(pgd_t *pgdp, u64 addr) | |
1212 | { | |
1213 | if (!pgtable_l5_enabled()) | |
1214 | return pgd_to_folded_p4d(pgdp, addr); | |
1215 | return (p4d_t *)__phys_to_kimg(p4d_offset_phys(pgdp, addr)); | |
1216 | } | |
1217 | ||
a6bbf5d4 AB |
1218 | #define pgd_page(pgd) pfn_to_page(__phys_to_pfn(__pgd_to_phys(pgd))) |
1219 | ||
1220 | #else | |
1221 | ||
1222 | static inline bool pgtable_l5_enabled(void) { return false; } | |
1223 | ||
0e9df1c9 RR |
1224 | #define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1)) |
1225 | ||
6ed8a3a0 AB |
1226 | /* Match p4d_offset folding in <asm/generic/pgtable-nop4d.h> */ |
1227 | #define p4d_set_fixmap(addr) NULL | |
1228 | #define p4d_set_fixmap_offset(p4dp, addr) ((p4d_t *)p4dp) | |
1229 | #define p4d_clear_fixmap() | |
1230 | ||
1231 | #define p4d_offset_kimg(dir,addr) ((p4d_t *)dir) | |
1232 | ||
36639013 WD |
1233 | static inline |
1234 | p4d_t *p4d_offset_lockless_folded(pgd_t *pgdp, pgd_t pgd, unsigned long addr) | |
1235 | { | |
1236 | /* | |
1237 | * With runtime folding of the pud, pud_offset_lockless() passes | |
1238 | * the 'pgd_t *' we return here to p4d_to_folded_pud(), which | |
1239 | * will offset the pointer assuming that it points into | |
1240 | * a page-table page. However, the fast GUP path passes us a | |
1241 | * pgd_t allocated on the stack and so we must use the original | |
1242 | * pointer in 'pgdp' to construct the p4d pointer instead of | |
1243 | * using the generic p4d_offset_lockless() implementation. | |
1244 | * | |
1245 | * Note: reusing the original pointer means that we may | |
1246 | * dereference the same (live) page-table entry multiple times. | |
1247 | * This is safe because it is still only loaded once in the | |
1248 | * context of each level and the CPU guarantees same-address | |
1249 | * read-after-read ordering. | |
1250 | */ | |
1251 | return p4d_offset(pgdp, addr); | |
1252 | } | |
1253 | #define p4d_offset_lockless p4d_offset_lockless_folded | |
1254 | ||
a6bbf5d4 AB |
1255 | #endif /* CONFIG_PGTABLE_LEVELS > 4 */ |
1256 | ||
2cf660eb GS |
1257 | #define pgd_ERROR(e) \ |
1258 | pr_err("%s:%d: bad pgd %016llx.\n", __FILE__, __LINE__, pgd_val(e)) | |
7078db46 | 1259 | |
961faac1 MR |
1260 | #define pgd_set_fixmap(addr) ((pgd_t *)set_fixmap_offset(FIX_PGD, addr)) |
1261 | #define pgd_clear_fixmap() clear_fixmap(FIX_PGD) | |
1262 | ||
4f04d8f0 CM |
1263 | static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) |
1264 | { | |
9f341931 CM |
1265 | /* |
1266 | * Normal and Normal-Tagged are two different memory types and indices | |
1267 | * in MAIR_EL1. The mask below has to include PTE_ATTRINDX_MASK. | |
1268 | */ | |
a6fadf7e | 1269 | const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY | |
f0f5863a | 1270 | PTE_PRESENT_INVALID | PTE_VALID | PTE_WRITE | |
6580a36d JG |
1271 | PTE_GP | PTE_ATTRINDX_MASK | PTE_PO_IDX_MASK; |
1272 | ||
2f4b829c CM |
1273 | /* preserve the hardware dirty information */ |
1274 | if (pte_hw_dirty(pte)) | |
6477c388 AK |
1275 | pte = set_pte_bit(pte, __pgprot(PTE_DIRTY)); |
1276 | ||
4f04d8f0 | 1277 | pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask); |
3c069607 JH |
1278 | /* |
1279 | * If we end up clearing hw dirtiness for a sw-dirty PTE, set hardware | |
1280 | * dirtiness again. | |
1281 | */ | |
1282 | if (pte_sw_dirty(pte)) | |
1283 | pte = pte_mkdirty(pte); | |
4f04d8f0 CM |
1284 | return pte; |
1285 | } | |
1286 | ||
9c7e535f SC |
1287 | static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) |
1288 | { | |
1289 | return pte_pmd(pte_modify(pmd_pte(pmd), newprot)); | |
1290 | } | |
1291 | ||
5a00bfd6 | 1292 | extern int __ptep_set_access_flags(struct vm_area_struct *vma, |
66dbd6e6 CM |
1293 | unsigned long address, pte_t *ptep, |
1294 | pte_t entry, int dirty); | |
1295 | ||
282aa705 CM |
1296 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
1297 | #define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS | |
1298 | static inline int pmdp_set_access_flags(struct vm_area_struct *vma, | |
1299 | unsigned long address, pmd_t *pmdp, | |
1300 | pmd_t entry, int dirty) | |
1301 | { | |
5a00bfd6 RR |
1302 | return __ptep_set_access_flags(vma, address, (pte_t *)pmdp, |
1303 | pmd_pte(entry), dirty); | |
282aa705 | 1304 | } |
73b20c84 RM |
1305 | |
1306 | static inline int pud_devmap(pud_t pud) | |
1307 | { | |
1308 | return 0; | |
1309 | } | |
1310 | ||
1311 | static inline int pgd_devmap(pgd_t pgd) | |
1312 | { | |
1313 | return 0; | |
1314 | } | |
282aa705 CM |
1315 | #endif |
1316 | ||
ed928a34 TT |
1317 | #ifdef CONFIG_PAGE_TABLE_CHECK |
1318 | static inline bool pte_user_accessible_page(pte_t pte) | |
1319 | { | |
f0f5863a | 1320 | return pte_valid(pte) && (pte_user(pte) || pte_user_exec(pte)); |
ed928a34 TT |
1321 | } |
1322 | ||
1323 | static inline bool pmd_user_accessible_page(pmd_t pmd) | |
1324 | { | |
f0f5863a | 1325 | return pmd_valid(pmd) && !pmd_table(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd)); |
ed928a34 TT |
1326 | } |
1327 | ||
1328 | static inline bool pud_user_accessible_page(pud_t pud) | |
1329 | { | |
f0f5863a | 1330 | return pud_valid(pud) && !pud_table(pud) && (pud_user(pud) || pud_user_exec(pud)); |
ed928a34 TT |
1331 | } |
1332 | #endif | |
1333 | ||
2f4b829c CM |
1334 | /* |
1335 | * Atomic pte/pmd modifications. | |
1336 | */ | |
5a00bfd6 RR |
1337 | static inline int __ptep_test_and_clear_young(struct vm_area_struct *vma, |
1338 | unsigned long address, | |
1339 | pte_t *ptep) | |
2f4b829c | 1340 | { |
3bbf7157 | 1341 | pte_t old_pte, pte; |
2f4b829c | 1342 | |
5a00bfd6 | 1343 | pte = __ptep_get(ptep); |
3bbf7157 CM |
1344 | do { |
1345 | old_pte = pte; | |
1346 | pte = pte_mkold(pte); | |
1347 | pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), | |
1348 | pte_val(old_pte), pte_val(pte)); | |
1349 | } while (pte_val(pte) != pte_val(old_pte)); | |
2f4b829c | 1350 | |
3bbf7157 | 1351 | return pte_young(pte); |
2f4b829c CM |
1352 | } |
1353 | ||
5a00bfd6 | 1354 | static inline int __ptep_clear_flush_young(struct vm_area_struct *vma, |
3403e56b AVB |
1355 | unsigned long address, pte_t *ptep) |
1356 | { | |
5a00bfd6 | 1357 | int young = __ptep_test_and_clear_young(vma, address, ptep); |
3403e56b AVB |
1358 | |
1359 | if (young) { | |
1360 | /* | |
1361 | * We can elide the trailing DSB here since the worst that can | |
1362 | * happen is that a CPU continues to use the young entry in its | |
1363 | * TLB and we mistakenly reclaim the associated page. The | |
1364 | * window for such an event is bounded by the next | |
1365 | * context-switch, which provides a DSB to complete the TLB | |
1366 | * invalidation. | |
1367 | */ | |
1368 | flush_tlb_page_nosync(vma, address); | |
1369 | } | |
1370 | ||
1371 | return young; | |
1372 | } | |
1373 | ||
62df5870 | 1374 | #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) |
2f4b829c CM |
1375 | #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG |
1376 | static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, | |
1377 | unsigned long address, | |
1378 | pmd_t *pmdp) | |
1379 | { | |
b349a5a2 YY |
1380 | /* Operation applies to PMD table entry only if FEAT_HAFT is enabled */ |
1381 | VM_WARN_ON(pmd_table(READ_ONCE(*pmdp)) && !system_supports_haft()); | |
5a00bfd6 | 1382 | return __ptep_test_and_clear_young(vma, address, (pte_t *)pmdp); |
2f4b829c | 1383 | } |
62df5870 | 1384 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG */ |
2f4b829c | 1385 | |
ef493d23 RR |
1386 | static inline pte_t __ptep_get_and_clear_anysz(struct mm_struct *mm, |
1387 | pte_t *ptep, | |
1388 | unsigned long pgsize) | |
2f4b829c | 1389 | { |
42b25471 KW |
1390 | pte_t pte = __pte(xchg_relaxed(&pte_val(*ptep), 0)); |
1391 | ||
ef493d23 RR |
1392 | switch (pgsize) { |
1393 | case PAGE_SIZE: | |
1394 | page_table_check_pte_clear(mm, pte); | |
1395 | break; | |
1396 | case PMD_SIZE: | |
1397 | page_table_check_pmd_clear(mm, pte_pmd(pte)); | |
1398 | break; | |
1399 | #ifndef __PAGETABLE_PMD_FOLDED | |
1400 | case PUD_SIZE: | |
1401 | page_table_check_pud_clear(mm, pte_pud(pte)); | |
1402 | break; | |
1403 | #endif | |
1404 | default: | |
1405 | VM_WARN_ON(1); | |
1406 | } | |
42b25471 KW |
1407 | |
1408 | return pte; | |
2f4b829c CM |
1409 | } |
1410 | ||
ef493d23 RR |
1411 | static inline pte_t __ptep_get_and_clear(struct mm_struct *mm, |
1412 | unsigned long address, pte_t *ptep) | |
1413 | { | |
1414 | return __ptep_get_and_clear_anysz(mm, ptep, PAGE_SIZE); | |
1415 | } | |
1416 | ||
6b1e4efb RR |
1417 | static inline void __clear_full_ptes(struct mm_struct *mm, unsigned long addr, |
1418 | pte_t *ptep, unsigned int nr, int full) | |
1419 | { | |
1420 | for (;;) { | |
1421 | __ptep_get_and_clear(mm, addr, ptep); | |
1422 | if (--nr == 0) | |
1423 | break; | |
1424 | ptep++; | |
1425 | addr += PAGE_SIZE; | |
1426 | } | |
1427 | } | |
1428 | ||
1429 | static inline pte_t __get_and_clear_full_ptes(struct mm_struct *mm, | |
1430 | unsigned long addr, pte_t *ptep, | |
1431 | unsigned int nr, int full) | |
1432 | { | |
1433 | pte_t pte, tmp_pte; | |
1434 | ||
1435 | pte = __ptep_get_and_clear(mm, addr, ptep); | |
1436 | while (--nr) { | |
1437 | ptep++; | |
1438 | addr += PAGE_SIZE; | |
1439 | tmp_pte = __ptep_get_and_clear(mm, addr, ptep); | |
1440 | if (pte_dirty(tmp_pte)) | |
1441 | pte = pte_mkdirty(pte); | |
1442 | if (pte_young(tmp_pte)) | |
1443 | pte = pte_mkyoung(pte); | |
1444 | } | |
1445 | return pte; | |
1446 | } | |
1447 | ||
2f4b829c | 1448 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
911f56ee CM |
1449 | #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR |
1450 | static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, | |
1451 | unsigned long address, pmd_t *pmdp) | |
2f4b829c | 1452 | { |
ef493d23 | 1453 | return pte_pmd(__ptep_get_and_clear_anysz(mm, (pte_t *)pmdp, PMD_SIZE)); |
2f4b829c CM |
1454 | } |
1455 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | |
1456 | ||
311a6cf2 RR |
1457 | static inline void ___ptep_set_wrprotect(struct mm_struct *mm, |
1458 | unsigned long address, pte_t *ptep, | |
1459 | pte_t pte) | |
2f4b829c | 1460 | { |
311a6cf2 | 1461 | pte_t old_pte; |
3bbf7157 | 1462 | |
3bbf7157 CM |
1463 | do { |
1464 | old_pte = pte; | |
3bbf7157 CM |
1465 | pte = pte_wrprotect(pte); |
1466 | pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), | |
1467 | pte_val(old_pte), pte_val(pte)); | |
1468 | } while (pte_val(pte) != pte_val(old_pte)); | |
2f4b829c CM |
1469 | } |
1470 | ||
311a6cf2 | 1471 | /* |
e281bd22 | 1472 | * __ptep_set_wrprotect - mark read-only while transferring potential hardware |
311a6cf2 RR |
1473 | * dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit. |
1474 | */ | |
1475 | static inline void __ptep_set_wrprotect(struct mm_struct *mm, | |
1476 | unsigned long address, pte_t *ptep) | |
1477 | { | |
1478 | ___ptep_set_wrprotect(mm, address, ptep, __ptep_get(ptep)); | |
1479 | } | |
1480 | ||
1481 | static inline void __wrprotect_ptes(struct mm_struct *mm, unsigned long address, | |
1482 | pte_t *ptep, unsigned int nr) | |
1483 | { | |
1484 | unsigned int i; | |
1485 | ||
1486 | for (i = 0; i < nr; i++, address += PAGE_SIZE, ptep++) | |
1487 | __ptep_set_wrprotect(mm, address, ptep); | |
1488 | } | |
1489 | ||
89e86854 LY |
1490 | static inline void __clear_young_dirty_pte(struct vm_area_struct *vma, |
1491 | unsigned long addr, pte_t *ptep, | |
1492 | pte_t pte, cydp_t flags) | |
1493 | { | |
1494 | pte_t old_pte; | |
1495 | ||
1496 | do { | |
1497 | old_pte = pte; | |
1498 | ||
1499 | if (flags & CYDP_CLEAR_YOUNG) | |
1500 | pte = pte_mkold(pte); | |
1501 | if (flags & CYDP_CLEAR_DIRTY) | |
1502 | pte = pte_mkclean(pte); | |
1503 | ||
1504 | pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), | |
1505 | pte_val(old_pte), pte_val(pte)); | |
1506 | } while (pte_val(pte) != pte_val(old_pte)); | |
1507 | } | |
1508 | ||
1509 | static inline void __clear_young_dirty_ptes(struct vm_area_struct *vma, | |
1510 | unsigned long addr, pte_t *ptep, | |
1511 | unsigned int nr, cydp_t flags) | |
1512 | { | |
1513 | pte_t pte; | |
1514 | ||
1515 | for (;;) { | |
1516 | pte = __ptep_get(ptep); | |
1517 | ||
1518 | if (flags == (CYDP_CLEAR_YOUNG | CYDP_CLEAR_DIRTY)) | |
1519 | __set_pte(ptep, pte_mkclean(pte_mkold(pte))); | |
1520 | else | |
1521 | __clear_young_dirty_pte(vma, addr, ptep, pte, flags); | |
1522 | ||
1523 | if (--nr == 0) | |
1524 | break; | |
1525 | ptep++; | |
1526 | addr += PAGE_SIZE; | |
1527 | } | |
1528 | } | |
1529 | ||
2f4b829c CM |
1530 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
1531 | #define __HAVE_ARCH_PMDP_SET_WRPROTECT | |
1532 | static inline void pmdp_set_wrprotect(struct mm_struct *mm, | |
1533 | unsigned long address, pmd_t *pmdp) | |
1534 | { | |
5a00bfd6 | 1535 | __ptep_set_wrprotect(mm, address, (pte_t *)pmdp); |
2f4b829c | 1536 | } |
1d78a62c CM |
1537 | |
1538 | #define pmdp_establish pmdp_establish | |
1539 | static inline pmd_t pmdp_establish(struct vm_area_struct *vma, | |
1540 | unsigned long address, pmd_t *pmdp, pmd_t pmd) | |
1541 | { | |
a3b83713 | 1542 | page_table_check_pmd_set(vma->vm_mm, pmdp, pmd); |
1d78a62c CM |
1543 | return __pmd(xchg_relaxed(&pmd_val(*pmdp), pmd_val(pmd))); |
1544 | } | |
2f4b829c | 1545 | #endif |
2f4b829c | 1546 | |
4f04d8f0 CM |
1547 | /* |
1548 | * Encode and decode a swap entry: | |
3676f9ef | 1549 | * bits 0-1: present (must be zero) |
570ef363 | 1550 | * bits 2: remember PG_anon_exclusive |
5b32510a | 1551 | * bit 3: remember uffd-wp state |
55564814 RR |
1552 | * bits 6-10: swap type |
1553 | * bit 11: PTE_PRESENT_INVALID (must be zero) | |
1554 | * bits 12-61: swap offset | |
4f04d8f0 | 1555 | */ |
55564814 | 1556 | #define __SWP_TYPE_SHIFT 6 |
570ef363 | 1557 | #define __SWP_TYPE_BITS 5 |
4f04d8f0 | 1558 | #define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1) |
55564814 RR |
1559 | #define __SWP_OFFSET_SHIFT 12 |
1560 | #define __SWP_OFFSET_BITS 50 | |
3676f9ef | 1561 | #define __SWP_OFFSET_MASK ((1UL << __SWP_OFFSET_BITS) - 1) |
4f04d8f0 CM |
1562 | |
1563 | #define __swp_type(x) (((x).val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK) | |
3676f9ef | 1564 | #define __swp_offset(x) (((x).val >> __SWP_OFFSET_SHIFT) & __SWP_OFFSET_MASK) |
4f04d8f0 CM |
1565 | #define __swp_entry(type,offset) ((swp_entry_t) { ((type) << __SWP_TYPE_SHIFT) | ((offset) << __SWP_OFFSET_SHIFT) }) |
1566 | ||
1567 | #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) | |
1568 | #define __swp_entry_to_pte(swp) ((pte_t) { (swp).val }) | |
1569 | ||
53fa117b AK |
1570 | #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION |
1571 | #define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val(pmd) }) | |
1572 | #define __swp_entry_to_pmd(swp) __pmd((swp).val) | |
1573 | #endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ | |
1574 | ||
4f04d8f0 CM |
1575 | /* |
1576 | * Ensure that there are not more swap files than can be encoded in the kernel | |
aad9061b | 1577 | * PTEs. |
4f04d8f0 CM |
1578 | */ |
1579 | #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS) | |
1580 | ||
36943aba SP |
1581 | #ifdef CONFIG_ARM64_MTE |
1582 | ||
1583 | #define __HAVE_ARCH_PREPARE_TO_SWAP | |
f238b8c3 | 1584 | extern int arch_prepare_to_swap(struct folio *folio); |
36943aba SP |
1585 | |
1586 | #define __HAVE_ARCH_SWAP_INVALIDATE | |
1587 | static inline void arch_swap_invalidate_page(int type, pgoff_t offset) | |
1588 | { | |
1589 | if (system_supports_mte()) | |
1590 | mte_invalidate_tags(type, offset); | |
1591 | } | |
1592 | ||
1593 | static inline void arch_swap_invalidate_area(int type) | |
1594 | { | |
1595 | if (system_supports_mte()) | |
1596 | mte_invalidate_tags_area(type); | |
1597 | } | |
1598 | ||
1599 | #define __HAVE_ARCH_SWAP_RESTORE | |
f238b8c3 | 1600 | extern void arch_swap_restore(swp_entry_t entry, struct folio *folio); |
36943aba SP |
1601 | |
1602 | #endif /* CONFIG_ARM64_MTE */ | |
1603 | ||
cba3574f | 1604 | /* |
5a00bfd6 | 1605 | * On AArch64, the cache coherency is handled via the __set_ptes() function. |
cba3574f | 1606 | */ |
4a169d61 MWO |
1607 | static inline void update_mmu_cache_range(struct vm_fault *vmf, |
1608 | struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, | |
1609 | unsigned int nr) | |
cba3574f WD |
1610 | { |
1611 | /* | |
120798d2 WD |
1612 | * We don't do anything here, so there's a very small chance of |
1613 | * us retaking a user fault which we just fixed up. The alternative | |
1614 | * is doing a dsb(ishst), but that penalises the fastpath. | |
cba3574f | 1615 | */ |
cba3574f WD |
1616 | } |
1617 | ||
4a169d61 MWO |
1618 | #define update_mmu_cache(vma, addr, ptep) \ |
1619 | update_mmu_cache_range(NULL, vma, addr, ptep, 1) | |
cba3574f WD |
1620 | #define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) |
1621 | ||
529c4b05 KM |
1622 | #ifdef CONFIG_ARM64_PA_BITS_52 |
1623 | #define phys_to_ttbr(addr) (((addr) | ((addr) >> 46)) & TTBR_BADDR_MASK_52) | |
1624 | #else | |
1625 | #define phys_to_ttbr(addr) (addr) | |
1626 | #endif | |
1627 | ||
6af31226 JH |
1628 | /* |
1629 | * On arm64 without hardware Access Flag, copying from user will fail because | |
1630 | * the pte is old and cannot be marked young. So we always end up with zeroed | |
1631 | * page after fork() + CoW for pfn mappings. We don't always have a | |
1632 | * hardware-managed access flag on arm64. | |
1633 | */ | |
e1fd09e3 | 1634 | #define arch_has_hw_pte_young cpu_has_hw_af |
0388f9c7 | 1635 | |
62df5870 YY |
1636 | #ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG |
1637 | #define arch_has_hw_nonleaf_pmd_young system_supports_haft | |
1638 | #endif | |
1639 | ||
0388f9c7 WD |
1640 | /* |
1641 | * Experimentally, it's cheap to set the access flag in hardware and we | |
1642 | * benefit from prefaulting mappings as 'old' to start with. | |
1643 | */ | |
e1fd09e3 | 1644 | #define arch_wants_old_prefaulted_pte cpu_has_hw_af |
6af31226 | 1645 | |
f8b46c4b AK |
1646 | static inline bool pud_sect_supported(void) |
1647 | { | |
1648 | return PAGE_SIZE == SZ_4K; | |
1649 | } | |
1650 | ||
18107f8a | 1651 | |
5db568e7 AK |
1652 | #define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION |
1653 | #define ptep_modify_prot_start ptep_modify_prot_start | |
1654 | extern pte_t ptep_modify_prot_start(struct vm_area_struct *vma, | |
1655 | unsigned long addr, pte_t *ptep); | |
1656 | ||
1657 | #define ptep_modify_prot_commit ptep_modify_prot_commit | |
1658 | extern void ptep_modify_prot_commit(struct vm_area_struct *vma, | |
1659 | unsigned long addr, pte_t *ptep, | |
1660 | pte_t old_pte, pte_t new_pte); | |
5a00bfd6 | 1661 | |
4602e575 RR |
1662 | #ifdef CONFIG_ARM64_CONTPTE |
1663 | ||
1664 | /* | |
1665 | * The contpte APIs are used to transparently manage the contiguous bit in ptes | |
1666 | * where it is possible and makes sense to do so. The PTE_CONT bit is considered | |
1667 | * a private implementation detail of the public ptep API (see below). | |
1668 | */ | |
f0c22649 RR |
1669 | extern void __contpte_try_fold(struct mm_struct *mm, unsigned long addr, |
1670 | pte_t *ptep, pte_t pte); | |
4602e575 RR |
1671 | extern void __contpte_try_unfold(struct mm_struct *mm, unsigned long addr, |
1672 | pte_t *ptep, pte_t pte); | |
1673 | extern pte_t contpte_ptep_get(pte_t *ptep, pte_t orig_pte); | |
1674 | extern pte_t contpte_ptep_get_lockless(pte_t *orig_ptep); | |
1675 | extern void contpte_set_ptes(struct mm_struct *mm, unsigned long addr, | |
1676 | pte_t *ptep, pte_t pte, unsigned int nr); | |
6b1e4efb RR |
1677 | extern void contpte_clear_full_ptes(struct mm_struct *mm, unsigned long addr, |
1678 | pte_t *ptep, unsigned int nr, int full); | |
1679 | extern pte_t contpte_get_and_clear_full_ptes(struct mm_struct *mm, | |
1680 | unsigned long addr, pte_t *ptep, | |
1681 | unsigned int nr, int full); | |
4602e575 RR |
1682 | extern int contpte_ptep_test_and_clear_young(struct vm_area_struct *vma, |
1683 | unsigned long addr, pte_t *ptep); | |
1684 | extern int contpte_ptep_clear_flush_young(struct vm_area_struct *vma, | |
1685 | unsigned long addr, pte_t *ptep); | |
311a6cf2 RR |
1686 | extern void contpte_wrprotect_ptes(struct mm_struct *mm, unsigned long addr, |
1687 | pte_t *ptep, unsigned int nr); | |
4602e575 RR |
1688 | extern int contpte_ptep_set_access_flags(struct vm_area_struct *vma, |
1689 | unsigned long addr, pte_t *ptep, | |
1690 | pte_t entry, int dirty); | |
89e86854 LY |
1691 | extern void contpte_clear_young_dirty_ptes(struct vm_area_struct *vma, |
1692 | unsigned long addr, pte_t *ptep, | |
1693 | unsigned int nr, cydp_t flags); | |
4602e575 | 1694 | |
f0c22649 RR |
1695 | static __always_inline void contpte_try_fold(struct mm_struct *mm, |
1696 | unsigned long addr, pte_t *ptep, pte_t pte) | |
1697 | { | |
1698 | /* | |
1699 | * Only bother trying if both the virtual and physical addresses are | |
1700 | * aligned and correspond to the last entry in a contig range. The core | |
1701 | * code mostly modifies ranges from low to high, so this is the likely | |
1702 | * the last modification in the contig range, so a good time to fold. | |
1703 | * We can't fold special mappings, because there is no associated folio. | |
1704 | */ | |
1705 | ||
1706 | const unsigned long contmask = CONT_PTES - 1; | |
1707 | bool valign = ((addr >> PAGE_SHIFT) & contmask) == contmask; | |
1708 | ||
1709 | if (unlikely(valign)) { | |
1710 | bool palign = (pte_pfn(pte) & contmask) == contmask; | |
1711 | ||
1712 | if (unlikely(palign && | |
1713 | pte_valid(pte) && !pte_cont(pte) && !pte_special(pte))) | |
1714 | __contpte_try_fold(mm, addr, ptep, pte); | |
1715 | } | |
1716 | } | |
1717 | ||
b972fc6a RR |
1718 | static __always_inline void contpte_try_unfold(struct mm_struct *mm, |
1719 | unsigned long addr, pte_t *ptep, pte_t pte) | |
4602e575 RR |
1720 | { |
1721 | if (unlikely(pte_valid_cont(pte))) | |
1722 | __contpte_try_unfold(mm, addr, ptep, pte); | |
1723 | } | |
1724 | ||
fb5451e5 RR |
1725 | #define pte_batch_hint pte_batch_hint |
1726 | static inline unsigned int pte_batch_hint(pte_t *ptep, pte_t pte) | |
1727 | { | |
1728 | if (!pte_valid_cont(pte)) | |
1729 | return 1; | |
1730 | ||
1731 | return CONT_PTES - (((unsigned long)ptep >> 3) & (CONT_PTES - 1)); | |
1732 | } | |
1733 | ||
4602e575 RR |
1734 | /* |
1735 | * The below functions constitute the public API that arm64 presents to the | |
1736 | * core-mm to manipulate PTE entries within their page tables (or at least this | |
1737 | * is the subset of the API that arm64 needs to implement). These public | |
1738 | * versions will automatically and transparently apply the contiguous bit where | |
1739 | * it makes sense to do so. Therefore any users that are contig-aware (e.g. | |
1740 | * hugetlb, kernel mapper) should NOT use these APIs, but instead use the | |
1741 | * private versions, which are prefixed with double underscore. All of these | |
1742 | * APIs except for ptep_get_lockless() are expected to be called with the PTL | |
1743 | * held. Although the contiguous bit is considered private to the | |
1744 | * implementation, it is deliberately allowed to leak through the getters (e.g. | |
1745 | * ptep_get()), back to core code. This is required so that pte_leaf_size() can | |
1746 | * provide an accurate size for perf_get_pgtable_size(). But this leakage means | |
1747 | * its possible a pte will be passed to a setter with the contiguous bit set, so | |
1748 | * we explicitly clear the contiguous bit in those cases to prevent accidentally | |
1749 | * setting it in the pgtable. | |
1750 | */ | |
1751 | ||
1752 | #define ptep_get ptep_get | |
1753 | static inline pte_t ptep_get(pte_t *ptep) | |
1754 | { | |
1755 | pte_t pte = __ptep_get(ptep); | |
1756 | ||
1757 | if (likely(!pte_valid_cont(pte))) | |
1758 | return pte; | |
1759 | ||
1760 | return contpte_ptep_get(ptep, pte); | |
1761 | } | |
1762 | ||
1763 | #define ptep_get_lockless ptep_get_lockless | |
1764 | static inline pte_t ptep_get_lockless(pte_t *ptep) | |
1765 | { | |
1766 | pte_t pte = __ptep_get(ptep); | |
1767 | ||
1768 | if (likely(!pte_valid_cont(pte))) | |
1769 | return pte; | |
1770 | ||
1771 | return contpte_ptep_get_lockless(ptep); | |
1772 | } | |
1773 | ||
1774 | static inline void set_pte(pte_t *ptep, pte_t pte) | |
1775 | { | |
1776 | /* | |
1777 | * We don't have the mm or vaddr so cannot unfold contig entries (since | |
1778 | * it requires tlb maintenance). set_pte() is not used in core code, so | |
1779 | * this should never even be called. Regardless do our best to service | |
1780 | * any call and emit a warning if there is any attempt to set a pte on | |
1781 | * top of an existing contig range. | |
1782 | */ | |
1783 | pte_t orig_pte = __ptep_get(ptep); | |
1784 | ||
1785 | WARN_ON_ONCE(pte_valid_cont(orig_pte)); | |
1786 | __set_pte(ptep, pte_mknoncont(pte)); | |
1787 | } | |
1788 | ||
1789 | #define set_ptes set_ptes | |
b972fc6a | 1790 | static __always_inline void set_ptes(struct mm_struct *mm, unsigned long addr, |
4602e575 RR |
1791 | pte_t *ptep, pte_t pte, unsigned int nr) |
1792 | { | |
1793 | pte = pte_mknoncont(pte); | |
1794 | ||
1795 | if (likely(nr == 1)) { | |
1796 | contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep)); | |
1797 | __set_ptes(mm, addr, ptep, pte, 1); | |
f0c22649 | 1798 | contpte_try_fold(mm, addr, ptep, pte); |
4602e575 RR |
1799 | } else { |
1800 | contpte_set_ptes(mm, addr, ptep, pte, nr); | |
1801 | } | |
1802 | } | |
1803 | ||
1804 | static inline void pte_clear(struct mm_struct *mm, | |
1805 | unsigned long addr, pte_t *ptep) | |
1806 | { | |
1807 | contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep)); | |
1808 | __pte_clear(mm, addr, ptep); | |
1809 | } | |
1810 | ||
6b1e4efb RR |
1811 | #define clear_full_ptes clear_full_ptes |
1812 | static inline void clear_full_ptes(struct mm_struct *mm, unsigned long addr, | |
1813 | pte_t *ptep, unsigned int nr, int full) | |
1814 | { | |
1815 | if (likely(nr == 1)) { | |
1816 | contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep)); | |
1817 | __clear_full_ptes(mm, addr, ptep, nr, full); | |
1818 | } else { | |
1819 | contpte_clear_full_ptes(mm, addr, ptep, nr, full); | |
1820 | } | |
1821 | } | |
1822 | ||
1823 | #define get_and_clear_full_ptes get_and_clear_full_ptes | |
1824 | static inline pte_t get_and_clear_full_ptes(struct mm_struct *mm, | |
1825 | unsigned long addr, pte_t *ptep, | |
1826 | unsigned int nr, int full) | |
1827 | { | |
1828 | pte_t pte; | |
1829 | ||
1830 | if (likely(nr == 1)) { | |
1831 | contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep)); | |
1832 | pte = __get_and_clear_full_ptes(mm, addr, ptep, nr, full); | |
1833 | } else { | |
1834 | pte = contpte_get_and_clear_full_ptes(mm, addr, ptep, nr, full); | |
1835 | } | |
1836 | ||
1837 | return pte; | |
1838 | } | |
1839 | ||
4602e575 RR |
1840 | #define __HAVE_ARCH_PTEP_GET_AND_CLEAR |
1841 | static inline pte_t ptep_get_and_clear(struct mm_struct *mm, | |
1842 | unsigned long addr, pte_t *ptep) | |
1843 | { | |
1844 | contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep)); | |
1845 | return __ptep_get_and_clear(mm, addr, ptep); | |
1846 | } | |
1847 | ||
1848 | #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG | |
1849 | static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, | |
1850 | unsigned long addr, pte_t *ptep) | |
1851 | { | |
1852 | pte_t orig_pte = __ptep_get(ptep); | |
1853 | ||
1854 | if (likely(!pte_valid_cont(orig_pte))) | |
1855 | return __ptep_test_and_clear_young(vma, addr, ptep); | |
1856 | ||
1857 | return contpte_ptep_test_and_clear_young(vma, addr, ptep); | |
1858 | } | |
1859 | ||
1860 | #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH | |
1861 | static inline int ptep_clear_flush_young(struct vm_area_struct *vma, | |
1862 | unsigned long addr, pte_t *ptep) | |
1863 | { | |
1864 | pte_t orig_pte = __ptep_get(ptep); | |
1865 | ||
1866 | if (likely(!pte_valid_cont(orig_pte))) | |
1867 | return __ptep_clear_flush_young(vma, addr, ptep); | |
1868 | ||
1869 | return contpte_ptep_clear_flush_young(vma, addr, ptep); | |
1870 | } | |
1871 | ||
311a6cf2 | 1872 | #define wrprotect_ptes wrprotect_ptes |
b972fc6a RR |
1873 | static __always_inline void wrprotect_ptes(struct mm_struct *mm, |
1874 | unsigned long addr, pte_t *ptep, unsigned int nr) | |
311a6cf2 RR |
1875 | { |
1876 | if (likely(nr == 1)) { | |
1877 | /* | |
1878 | * Optimization: wrprotect_ptes() can only be called for present | |
1879 | * ptes so we only need to check contig bit as condition for | |
1880 | * unfold, and we can remove the contig bit from the pte we read | |
1881 | * to avoid re-reading. This speeds up fork() which is sensitive | |
1882 | * for order-0 folios. Equivalent to contpte_try_unfold(). | |
1883 | */ | |
1884 | pte_t orig_pte = __ptep_get(ptep); | |
1885 | ||
1886 | if (unlikely(pte_cont(orig_pte))) { | |
1887 | __contpte_try_unfold(mm, addr, ptep, orig_pte); | |
1888 | orig_pte = pte_mknoncont(orig_pte); | |
1889 | } | |
1890 | ___ptep_set_wrprotect(mm, addr, ptep, orig_pte); | |
1891 | } else { | |
1892 | contpte_wrprotect_ptes(mm, addr, ptep, nr); | |
1893 | } | |
1894 | } | |
1895 | ||
4602e575 RR |
1896 | #define __HAVE_ARCH_PTEP_SET_WRPROTECT |
1897 | static inline void ptep_set_wrprotect(struct mm_struct *mm, | |
1898 | unsigned long addr, pte_t *ptep) | |
1899 | { | |
311a6cf2 | 1900 | wrprotect_ptes(mm, addr, ptep, 1); |
4602e575 RR |
1901 | } |
1902 | ||
1903 | #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS | |
1904 | static inline int ptep_set_access_flags(struct vm_area_struct *vma, | |
1905 | unsigned long addr, pte_t *ptep, | |
1906 | pte_t entry, int dirty) | |
1907 | { | |
1908 | pte_t orig_pte = __ptep_get(ptep); | |
1909 | ||
1910 | entry = pte_mknoncont(entry); | |
1911 | ||
1912 | if (likely(!pte_valid_cont(orig_pte))) | |
1913 | return __ptep_set_access_flags(vma, addr, ptep, entry, dirty); | |
1914 | ||
1915 | return contpte_ptep_set_access_flags(vma, addr, ptep, entry, dirty); | |
1916 | } | |
1917 | ||
89e86854 LY |
1918 | #define clear_young_dirty_ptes clear_young_dirty_ptes |
1919 | static inline void clear_young_dirty_ptes(struct vm_area_struct *vma, | |
1920 | unsigned long addr, pte_t *ptep, | |
1921 | unsigned int nr, cydp_t flags) | |
1922 | { | |
1923 | if (likely(nr == 1 && !pte_cont(__ptep_get(ptep)))) | |
1924 | __clear_young_dirty_ptes(vma, addr, ptep, nr, flags); | |
1925 | else | |
1926 | contpte_clear_young_dirty_ptes(vma, addr, ptep, nr, flags); | |
1927 | } | |
1928 | ||
4602e575 RR |
1929 | #else /* CONFIG_ARM64_CONTPTE */ |
1930 | ||
5a00bfd6 RR |
1931 | #define ptep_get __ptep_get |
1932 | #define set_pte __set_pte | |
1933 | #define set_ptes __set_ptes | |
1934 | #define pte_clear __pte_clear | |
6b1e4efb RR |
1935 | #define clear_full_ptes __clear_full_ptes |
1936 | #define get_and_clear_full_ptes __get_and_clear_full_ptes | |
5a00bfd6 RR |
1937 | #define __HAVE_ARCH_PTEP_GET_AND_CLEAR |
1938 | #define ptep_get_and_clear __ptep_get_and_clear | |
1939 | #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG | |
1940 | #define ptep_test_and_clear_young __ptep_test_and_clear_young | |
1941 | #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH | |
1942 | #define ptep_clear_flush_young __ptep_clear_flush_young | |
1943 | #define __HAVE_ARCH_PTEP_SET_WRPROTECT | |
1944 | #define ptep_set_wrprotect __ptep_set_wrprotect | |
311a6cf2 | 1945 | #define wrprotect_ptes __wrprotect_ptes |
5a00bfd6 RR |
1946 | #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS |
1947 | #define ptep_set_access_flags __ptep_set_access_flags | |
89e86854 | 1948 | #define clear_young_dirty_ptes __clear_young_dirty_ptes |
5a00bfd6 | 1949 | |
4602e575 RR |
1950 | #endif /* CONFIG_ARM64_CONTPTE */ |
1951 | ||
4f04d8f0 CM |
1952 | #endif /* !__ASSEMBLY__ */ |
1953 | ||
1954 | #endif /* __ASM_PGTABLE_H */ |