Commit | Line | Data |
---|---|---|
b886d83c | 1 | // SPDX-License-Identifier: GPL-2.0-only |
926e5392 AV |
2 | /* |
3 | * Debug helper to dump the current kernel pagetables of the system | |
4 | * so that we can see what the various memory ranges are set to. | |
5 | * | |
6 | * (C) Copyright 2008 Intel Corporation | |
7 | * | |
8 | * Author: Arjan van de Ven <arjan@linux.intel.com> | |
926e5392 AV |
9 | */ |
10 | ||
fe770bf0 | 11 | #include <linux/debugfs.h> |
04b67022 | 12 | #include <linux/kasan.h> |
fe770bf0 | 13 | #include <linux/mm.h> |
84e629b6 | 14 | #include <linux/init.h> |
146fbb76 | 15 | #include <linux/sched.h> |
926e5392 | 16 | #include <linux/seq_file.h> |
d6ef1f19 | 17 | #include <linux/highmem.h> |
c200dac7 | 18 | #include <linux/pci.h> |
926e5392 | 19 | |
c200dac7 | 20 | #include <asm/e820/types.h> |
926e5392 AV |
21 | #include <asm/pgtable.h> |
22 | ||
23 | /* | |
24 | * The dumper groups pagetable entries of the same type into one, and for | |
25 | * that it needs to keep some state when walking, and flush this state | |
26 | * when a "break" in the continuity is found. | |
27 | */ | |
28 | struct pg_state { | |
29 | int level; | |
30 | pgprot_t current_prot; | |
672c0ae0 | 31 | pgprotval_t effective_prot; |
926e5392 AV |
32 | unsigned long start_address; |
33 | unsigned long current_address; | |
fe770bf0 | 34 | const struct addr_marker *marker; |
3891a04a | 35 | unsigned long lines; |
ef6bea6d | 36 | bool to_dmesg; |
e1a58320 SS |
37 | bool check_wx; |
38 | unsigned long wx_pages; | |
926e5392 AV |
39 | }; |
40 | ||
fe770bf0 PA |
41 | struct addr_marker { |
42 | unsigned long start_address; | |
43 | const char *name; | |
3891a04a | 44 | unsigned long max_lines; |
fe770bf0 PA |
45 | }; |
46 | ||
146122e2 TG |
47 | /* Address space markers hints */ |
48 | ||
49 | #ifdef CONFIG_X86_64 | |
50 | ||
92851e2f AS |
51 | enum address_markers_idx { |
52 | USER_SPACE_NR = 0, | |
92851e2f | 53 | KERNEL_SPACE_NR, |
254eb550 | 54 | #ifdef CONFIG_MODIFY_LDT_SYSCALL |
f55f0501 AL |
55 | LDT_NR, |
56 | #endif | |
254eb550 | 57 | LOW_KERNEL_NR, |
92851e2f AS |
58 | VMALLOC_START_NR, |
59 | VMEMMAP_START_NR, | |
025205f8 AR |
60 | #ifdef CONFIG_KASAN |
61 | KASAN_SHADOW_START_NR, | |
62 | KASAN_SHADOW_END_NR, | |
f55f0501 | 63 | #endif |
f2078904 | 64 | CPU_ENTRY_AREA_NR, |
146122e2 | 65 | #ifdef CONFIG_X86_ESPFIX64 |
3891a04a | 66 | ESPFIX_START_NR, |
146122e2 TG |
67 | #endif |
68 | #ifdef CONFIG_EFI | |
69 | EFI_END_NR, | |
70 | #endif | |
92851e2f AS |
71 | HIGH_KERNEL_NR, |
72 | MODULES_VADDR_NR, | |
73 | MODULES_END_NR, | |
146122e2 TG |
74 | FIXADDR_START_NR, |
75 | END_OF_SPACE_NR, | |
76 | }; | |
77 | ||
78 | static struct addr_marker address_markers[] = { | |
79 | [USER_SPACE_NR] = { 0, "User Space" }, | |
80 | [KERNEL_SPACE_NR] = { (1UL << 63), "Kernel Space" }, | |
81 | [LOW_KERNEL_NR] = { 0UL, "Low Kernel Mapping" }, | |
82 | [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" }, | |
83 | [VMEMMAP_START_NR] = { 0UL, "Vmemmap" }, | |
84 | #ifdef CONFIG_KASAN | |
09e61a77 KS |
85 | /* |
86 | * These fields get initialized with the (dynamic) | |
87 | * KASAN_SHADOW_{START,END} values in pt_dump_init(). | |
88 | */ | |
89 | [KASAN_SHADOW_START_NR] = { 0UL, "KASAN shadow" }, | |
90 | [KASAN_SHADOW_END_NR] = { 0UL, "KASAN shadow end" }, | |
f55f0501 AL |
91 | #endif |
92 | #ifdef CONFIG_MODIFY_LDT_SYSCALL | |
5c7919bb | 93 | [LDT_NR] = { 0UL, "LDT remap" }, |
146122e2 | 94 | #endif |
92a0f81d | 95 | [CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" }, |
146122e2 TG |
96 | #ifdef CONFIG_X86_ESPFIX64 |
97 | [ESPFIX_START_NR] = { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, | |
98 | #endif | |
99 | #ifdef CONFIG_EFI | |
100 | [EFI_END_NR] = { EFI_VA_END, "EFI Runtime Services" }, | |
101 | #endif | |
102 | [HIGH_KERNEL_NR] = { __START_KERNEL_map, "High Kernel Mapping" }, | |
103 | [MODULES_VADDR_NR] = { MODULES_VADDR, "Modules" }, | |
104 | [MODULES_END_NR] = { MODULES_END, "End Modules" }, | |
105 | [FIXADDR_START_NR] = { FIXADDR_START, "Fixmap Area" }, | |
106 | [END_OF_SPACE_NR] = { -1, NULL } | |
107 | }; | |
108 | ||
4e8537e4 JR |
109 | #define INIT_PGD ((pgd_t *) &init_top_pgt) |
110 | ||
146122e2 TG |
111 | #else /* CONFIG_X86_64 */ |
112 | ||
113 | enum address_markers_idx { | |
114 | USER_SPACE_NR = 0, | |
92851e2f AS |
115 | KERNEL_SPACE_NR, |
116 | VMALLOC_START_NR, | |
117 | VMALLOC_END_NR, | |
146122e2 | 118 | #ifdef CONFIG_HIGHMEM |
92851e2f | 119 | PKMAP_BASE_NR, |
f3e48e54 JR |
120 | #endif |
121 | #ifdef CONFIG_MODIFY_LDT_SYSCALL | |
122 | LDT_NR, | |
92851e2f | 123 | #endif |
92a0f81d | 124 | CPU_ENTRY_AREA_NR, |
146122e2 TG |
125 | FIXADDR_START_NR, |
126 | END_OF_SPACE_NR, | |
92851e2f AS |
127 | }; |
128 | ||
fe770bf0 | 129 | static struct addr_marker address_markers[] = { |
146122e2 TG |
130 | [USER_SPACE_NR] = { 0, "User Space" }, |
131 | [KERNEL_SPACE_NR] = { PAGE_OFFSET, "Kernel Mapping" }, | |
132 | [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" }, | |
133 | [VMALLOC_END_NR] = { 0UL, "vmalloc() End" }, | |
134 | #ifdef CONFIG_HIGHMEM | |
135 | [PKMAP_BASE_NR] = { 0UL, "Persistent kmap() Area" }, | |
f3e48e54 JR |
136 | #endif |
137 | #ifdef CONFIG_MODIFY_LDT_SYSCALL | |
138 | [LDT_NR] = { 0UL, "LDT remap" }, | |
fe770bf0 | 139 | #endif |
92a0f81d | 140 | [CPU_ENTRY_AREA_NR] = { 0UL, "CPU entry area" }, |
146122e2 TG |
141 | [FIXADDR_START_NR] = { 0UL, "Fixmap area" }, |
142 | [END_OF_SPACE_NR] = { -1, NULL } | |
fe770bf0 | 143 | }; |
926e5392 | 144 | |
4e8537e4 JR |
145 | #define INIT_PGD (swapper_pg_dir) |
146 | ||
146122e2 TG |
147 | #endif /* !CONFIG_X86_64 */ |
148 | ||
fe770bf0 PA |
149 | /* Multipliers for offsets within the PTEs */ |
150 | #define PTE_LEVEL_MULT (PAGE_SIZE) | |
151 | #define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT) | |
152 | #define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT) | |
fdd3d8ce | 153 | #define P4D_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT) |
84bbabc3 | 154 | #define PGD_LEVEL_MULT (PTRS_PER_P4D * P4D_LEVEL_MULT) |
926e5392 | 155 | |
ef6bea6d BP |
156 | #define pt_dump_seq_printf(m, to_dmesg, fmt, args...) \ |
157 | ({ \ | |
158 | if (to_dmesg) \ | |
159 | printk(KERN_INFO fmt, ##args); \ | |
160 | else \ | |
161 | if (m) \ | |
162 | seq_printf(m, fmt, ##args); \ | |
163 | }) | |
164 | ||
165 | #define pt_dump_cont_printf(m, to_dmesg, fmt, args...) \ | |
166 | ({ \ | |
167 | if (to_dmesg) \ | |
168 | printk(KERN_CONT fmt, ##args); \ | |
169 | else \ | |
170 | if (m) \ | |
171 | seq_printf(m, fmt, ##args); \ | |
172 | }) | |
173 | ||
926e5392 AV |
174 | /* |
175 | * Print a readable form of a pgprot_t to the seq_file | |
176 | */ | |
ef6bea6d | 177 | static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg) |
926e5392 | 178 | { |
fe770bf0 PA |
179 | pgprotval_t pr = pgprot_val(prot); |
180 | static const char * const level_name[] = | |
45dcd209 | 181 | { "cr3", "pgd", "p4d", "pud", "pmd", "pte" }; |
fe770bf0 | 182 | |
c0534494 | 183 | if (!(pr & _PAGE_PRESENT)) { |
fe770bf0 | 184 | /* Not present */ |
f439c429 | 185 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 PA |
186 | } else { |
187 | if (pr & _PAGE_USER) | |
ef6bea6d | 188 | pt_dump_cont_printf(m, dmsg, "USR "); |
926e5392 | 189 | else |
ef6bea6d | 190 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 | 191 | if (pr & _PAGE_RW) |
ef6bea6d | 192 | pt_dump_cont_printf(m, dmsg, "RW "); |
fe770bf0 | 193 | else |
ef6bea6d | 194 | pt_dump_cont_printf(m, dmsg, "ro "); |
fe770bf0 | 195 | if (pr & _PAGE_PWT) |
ef6bea6d | 196 | pt_dump_cont_printf(m, dmsg, "PWT "); |
fe770bf0 | 197 | else |
ef6bea6d | 198 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 | 199 | if (pr & _PAGE_PCD) |
ef6bea6d | 200 | pt_dump_cont_printf(m, dmsg, "PCD "); |
926e5392 | 201 | else |
ef6bea6d | 202 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 | 203 | |
f439c429 | 204 | /* Bit 7 has a different meaning on level 3 vs 4 */ |
45dcd209 | 205 | if (level <= 4 && pr & _PAGE_PSE) |
f439c429 JG |
206 | pt_dump_cont_printf(m, dmsg, "PSE "); |
207 | else | |
208 | pt_dump_cont_printf(m, dmsg, " "); | |
45dcd209 KS |
209 | if ((level == 5 && pr & _PAGE_PAT) || |
210 | ((level == 4 || level == 3) && pr & _PAGE_PAT_LARGE)) | |
da25e628 | 211 | pt_dump_cont_printf(m, dmsg, "PAT "); |
f439c429 JG |
212 | else |
213 | pt_dump_cont_printf(m, dmsg, " "); | |
fe770bf0 | 214 | if (pr & _PAGE_GLOBAL) |
ef6bea6d | 215 | pt_dump_cont_printf(m, dmsg, "GLB "); |
fe770bf0 | 216 | else |
ef6bea6d | 217 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 | 218 | if (pr & _PAGE_NX) |
ef6bea6d | 219 | pt_dump_cont_printf(m, dmsg, "NX "); |
fe770bf0 | 220 | else |
ef6bea6d | 221 | pt_dump_cont_printf(m, dmsg, "x "); |
926e5392 | 222 | } |
ef6bea6d | 223 | pt_dump_cont_printf(m, dmsg, "%s\n", level_name[level]); |
926e5392 AV |
224 | } |
225 | ||
226 | /* | |
fe770bf0 | 227 | * On 64 bits, sign-extend the 48 bit address to 64 bit |
926e5392 | 228 | */ |
fe770bf0 | 229 | static unsigned long normalize_addr(unsigned long u) |
926e5392 | 230 | { |
3a366f79 KS |
231 | int shift; |
232 | if (!IS_ENABLED(CONFIG_X86_64)) | |
233 | return u; | |
234 | ||
235 | shift = 64 - (__VIRTUAL_MASK_SHIFT + 1); | |
236 | return (signed long)(u << shift) >> shift; | |
926e5392 AV |
237 | } |
238 | ||
c200dac7 TG |
239 | static void note_wx(struct pg_state *st) |
240 | { | |
241 | unsigned long npages; | |
242 | ||
243 | npages = (st->current_address - st->start_address) / PAGE_SIZE; | |
244 | ||
245 | #ifdef CONFIG_PCI_BIOS | |
246 | /* | |
247 | * If PCI BIOS is enabled, the PCI BIOS area is forced to WX. | |
248 | * Inform about it, but avoid the warning. | |
249 | */ | |
250 | if (pcibios_enabled && st->start_address >= PAGE_OFFSET + BIOS_BEGIN && | |
251 | st->current_address <= PAGE_OFFSET + BIOS_END) { | |
252 | pr_warn_once("x86/mm: PCI BIOS W+X mapping %lu pages\n", npages); | |
253 | return; | |
254 | } | |
255 | #endif | |
256 | /* Account the WX pages */ | |
257 | st->wx_pages += npages; | |
510bb96f TG |
258 | WARN_ONCE(__supported_pte_mask & _PAGE_NX, |
259 | "x86/mm: Found insecure W+X mapping at address %pS\n", | |
c200dac7 TG |
260 | (void *)st->start_address); |
261 | } | |
262 | ||
926e5392 AV |
263 | /* |
264 | * This function gets called on a break in a continuous series | |
265 | * of PTE entries; the next one is different so we need to | |
266 | * print what we collected so far. | |
267 | */ | |
268 | static void note_page(struct seq_file *m, struct pg_state *st, | |
672c0ae0 | 269 | pgprot_t new_prot, pgprotval_t new_eff, int level) |
926e5392 | 270 | { |
672c0ae0 | 271 | pgprotval_t prot, cur, eff; |
3891a04a | 272 | static const char units[] = "BKMGTPE"; |
926e5392 AV |
273 | |
274 | /* | |
275 | * If we have a "break" in the series, we need to flush the state that | |
fe770bf0 PA |
276 | * we have now. "break" is either changing perms, levels or |
277 | * address space marker. | |
926e5392 | 278 | */ |
da25e628 TK |
279 | prot = pgprot_val(new_prot); |
280 | cur = pgprot_val(st->current_prot); | |
672c0ae0 | 281 | eff = st->effective_prot; |
926e5392 | 282 | |
fe770bf0 PA |
283 | if (!st->level) { |
284 | /* First entry */ | |
285 | st->current_prot = new_prot; | |
672c0ae0 | 286 | st->effective_prot = new_eff; |
fe770bf0 PA |
287 | st->level = level; |
288 | st->marker = address_markers; | |
3891a04a | 289 | st->lines = 0; |
ef6bea6d BP |
290 | pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", |
291 | st->marker->name); | |
672c0ae0 | 292 | } else if (prot != cur || new_eff != eff || level != st->level || |
fe770bf0 PA |
293 | st->current_address >= st->marker[1].start_address) { |
294 | const char *unit = units; | |
926e5392 | 295 | unsigned long delta; |
6424fb38 | 296 | int width = sizeof(unsigned long) * 2; |
e1a58320 | 297 | |
c200dac7 TG |
298 | if (st->check_wx && (eff & _PAGE_RW) && !(eff & _PAGE_NX)) |
299 | note_wx(st); | |
926e5392 | 300 | |
926e5392 AV |
301 | /* |
302 | * Now print the actual finished series | |
303 | */ | |
3891a04a PA |
304 | if (!st->marker->max_lines || |
305 | st->lines < st->marker->max_lines) { | |
306 | pt_dump_seq_printf(m, st->to_dmesg, | |
307 | "0x%0*lx-0x%0*lx ", | |
308 | width, st->start_address, | |
309 | width, st->current_address); | |
926e5392 | 310 | |
3891a04a PA |
311 | delta = st->current_address - st->start_address; |
312 | while (!(delta & 1023) && unit[1]) { | |
313 | delta >>= 10; | |
314 | unit++; | |
315 | } | |
316 | pt_dump_cont_printf(m, st->to_dmesg, "%9lu%c ", | |
317 | delta, *unit); | |
318 | printk_prot(m, st->current_prot, st->level, | |
319 | st->to_dmesg); | |
926e5392 | 320 | } |
3891a04a | 321 | st->lines++; |
fe770bf0 PA |
322 | |
323 | /* | |
324 | * We print markers for special areas of address space, | |
325 | * such as the start of vmalloc space etc. | |
326 | * This helps in the interpretation. | |
327 | */ | |
328 | if (st->current_address >= st->marker[1].start_address) { | |
3891a04a PA |
329 | if (st->marker->max_lines && |
330 | st->lines > st->marker->max_lines) { | |
331 | unsigned long nskip = | |
332 | st->lines - st->marker->max_lines; | |
333 | pt_dump_seq_printf(m, st->to_dmesg, | |
334 | "... %lu entr%s skipped ... \n", | |
335 | nskip, | |
336 | nskip == 1 ? "y" : "ies"); | |
337 | } | |
fe770bf0 | 338 | st->marker++; |
3891a04a | 339 | st->lines = 0; |
ef6bea6d BP |
340 | pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", |
341 | st->marker->name); | |
926e5392 | 342 | } |
fe770bf0 | 343 | |
926e5392 AV |
344 | st->start_address = st->current_address; |
345 | st->current_prot = new_prot; | |
672c0ae0 | 346 | st->effective_prot = new_eff; |
926e5392 | 347 | st->level = level; |
fe770bf0 | 348 | } |
926e5392 AV |
349 | } |
350 | ||
672c0ae0 JB |
351 | static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2) |
352 | { | |
353 | return (prot1 & prot2 & (_PAGE_USER | _PAGE_RW)) | | |
354 | ((prot1 | prot2) & _PAGE_NX); | |
355 | } | |
356 | ||
357 | static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr, | |
358 | pgprotval_t eff_in, unsigned long P) | |
926e5392 AV |
359 | { |
360 | int i; | |
d6ef1f19 | 361 | pte_t *pte; |
672c0ae0 | 362 | pgprotval_t prot, eff; |
926e5392 | 363 | |
926e5392 | 364 | for (i = 0; i < PTRS_PER_PTE; i++) { |
fe770bf0 | 365 | st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT); |
d6ef1f19 JR |
366 | pte = pte_offset_map(&addr, st->current_address); |
367 | prot = pte_flags(*pte); | |
368 | eff = effective_prot(eff_in, prot); | |
672c0ae0 | 369 | note_page(m, st, __pgprot(prot), eff, 5); |
d6ef1f19 | 370 | pte_unmap(pte); |
926e5392 AV |
371 | } |
372 | } | |
04b67022 AR |
373 | #ifdef CONFIG_KASAN |
374 | ||
375 | /* | |
376 | * This is an optimization for KASAN=y case. Since all kasan page tables | |
9577dd74 | 377 | * eventually point to the kasan_early_shadow_page we could call note_page() |
04b67022 AR |
378 | * right away without walking through lower level page tables. This saves |
379 | * us dozens of seconds (minutes for 5-level config) while checking for | |
380 | * W+X mapping or reading kernel_page_tables debugfs file. | |
381 | */ | |
382 | static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st, | |
383 | void *pt) | |
384 | { | |
9577dd74 AK |
385 | if (__pa(pt) == __pa(kasan_early_shadow_pmd) || |
386 | (pgtable_l5_enabled() && | |
387 | __pa(pt) == __pa(kasan_early_shadow_p4d)) || | |
388 | __pa(pt) == __pa(kasan_early_shadow_pud)) { | |
389 | pgprotval_t prot = pte_flags(kasan_early_shadow_pte[0]); | |
672c0ae0 | 390 | note_page(m, st, __pgprot(prot), 0, 5); |
04b67022 AR |
391 | return true; |
392 | } | |
393 | return false; | |
394 | } | |
395 | #else | |
396 | static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st, | |
397 | void *pt) | |
398 | { | |
399 | return false; | |
400 | } | |
401 | #endif | |
926e5392 | 402 | |
fe770bf0 | 403 | #if PTRS_PER_PMD > 1 |
926e5392 | 404 | |
672c0ae0 JB |
405 | static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, |
406 | pgprotval_t eff_in, unsigned long P) | |
926e5392 AV |
407 | { |
408 | int i; | |
04b67022 | 409 | pmd_t *start, *pmd_start; |
672c0ae0 | 410 | pgprotval_t prot, eff; |
926e5392 | 411 | |
04b67022 | 412 | pmd_start = start = (pmd_t *)pud_page_vaddr(addr); |
926e5392 | 413 | for (i = 0; i < PTRS_PER_PMD; i++) { |
fe770bf0 | 414 | st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT); |
926e5392 | 415 | if (!pmd_none(*start)) { |
672c0ae0 JB |
416 | prot = pmd_flags(*start); |
417 | eff = effective_prot(eff_in, prot); | |
da25e628 | 418 | if (pmd_large(*start) || !pmd_present(*start)) { |
672c0ae0 | 419 | note_page(m, st, __pgprot(prot), eff, 4); |
04b67022 | 420 | } else if (!kasan_page_table(m, st, pmd_start)) { |
672c0ae0 | 421 | walk_pte_level(m, st, *start, eff, |
fe770bf0 | 422 | P + i * PMD_LEVEL_MULT); |
da25e628 | 423 | } |
926e5392 | 424 | } else |
672c0ae0 | 425 | note_page(m, st, __pgprot(0), 0, 4); |
926e5392 AV |
426 | start++; |
427 | } | |
428 | } | |
429 | ||
fe770bf0 | 430 | #else |
672c0ae0 | 431 | #define walk_pmd_level(m,s,a,e,p) walk_pte_level(m,s,__pmd(pud_val(a)),e,p) |
fe770bf0 PA |
432 | #define pud_large(a) pmd_large(__pmd(pud_val(a))) |
433 | #define pud_none(a) pmd_none(__pmd(pud_val(a))) | |
434 | #endif | |
926e5392 | 435 | |
fe770bf0 PA |
436 | #if PTRS_PER_PUD > 1 |
437 | ||
672c0ae0 JB |
438 | static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr, |
439 | pgprotval_t eff_in, unsigned long P) | |
926e5392 AV |
440 | { |
441 | int i; | |
04b67022 | 442 | pud_t *start, *pud_start; |
672c0ae0 | 443 | pgprotval_t prot, eff; |
926e5392 | 444 | |
04b67022 | 445 | pud_start = start = (pud_t *)p4d_page_vaddr(addr); |
926e5392 AV |
446 | |
447 | for (i = 0; i < PTRS_PER_PUD; i++) { | |
fe770bf0 | 448 | st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); |
04b67022 | 449 | if (!pud_none(*start)) { |
672c0ae0 JB |
450 | prot = pud_flags(*start); |
451 | eff = effective_prot(eff_in, prot); | |
da25e628 | 452 | if (pud_large(*start) || !pud_present(*start)) { |
672c0ae0 | 453 | note_page(m, st, __pgprot(prot), eff, 3); |
04b67022 | 454 | } else if (!kasan_page_table(m, st, pud_start)) { |
672c0ae0 | 455 | walk_pmd_level(m, st, *start, eff, |
fe770bf0 | 456 | P + i * PUD_LEVEL_MULT); |
da25e628 | 457 | } |
926e5392 | 458 | } else |
672c0ae0 | 459 | note_page(m, st, __pgprot(0), 0, 3); |
926e5392 AV |
460 | |
461 | start++; | |
462 | } | |
463 | } | |
464 | ||
fe770bf0 | 465 | #else |
672c0ae0 | 466 | #define walk_pud_level(m,s,a,e,p) walk_pmd_level(m,s,__pud(p4d_val(a)),e,p) |
fdd3d8ce KS |
467 | #define p4d_large(a) pud_large(__pud(p4d_val(a))) |
468 | #define p4d_none(a) pud_none(__pud(p4d_val(a))) | |
469 | #endif | |
470 | ||
672c0ae0 JB |
471 | static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr, |
472 | pgprotval_t eff_in, unsigned long P) | |
fdd3d8ce KS |
473 | { |
474 | int i; | |
04b67022 | 475 | p4d_t *start, *p4d_start; |
672c0ae0 | 476 | pgprotval_t prot, eff; |
fdd3d8ce | 477 | |
c65e774f | 478 | if (PTRS_PER_P4D == 1) |
672c0ae0 | 479 | return walk_pud_level(m, st, __p4d(pgd_val(addr)), eff_in, P); |
c65e774f | 480 | |
04b67022 | 481 | p4d_start = start = (p4d_t *)pgd_page_vaddr(addr); |
fdd3d8ce KS |
482 | |
483 | for (i = 0; i < PTRS_PER_P4D; i++) { | |
484 | st->current_address = normalize_addr(P + i * P4D_LEVEL_MULT); | |
485 | if (!p4d_none(*start)) { | |
672c0ae0 JB |
486 | prot = p4d_flags(*start); |
487 | eff = effective_prot(eff_in, prot); | |
fdd3d8ce | 488 | if (p4d_large(*start) || !p4d_present(*start)) { |
672c0ae0 | 489 | note_page(m, st, __pgprot(prot), eff, 2); |
04b67022 | 490 | } else if (!kasan_page_table(m, st, p4d_start)) { |
672c0ae0 | 491 | walk_pud_level(m, st, *start, eff, |
fdd3d8ce KS |
492 | P + i * P4D_LEVEL_MULT); |
493 | } | |
494 | } else | |
672c0ae0 | 495 | note_page(m, st, __pgprot(0), 0, 2); |
fdd3d8ce KS |
496 | |
497 | start++; | |
498 | } | |
499 | } | |
500 | ||
ed7588d5 KS |
501 | #define pgd_large(a) (pgtable_l5_enabled() ? pgd_large(a) : p4d_large(__p4d(pgd_val(a)))) |
502 | #define pgd_none(a) (pgtable_l5_enabled() ? pgd_none(a) : p4d_none(__p4d(pgd_val(a)))) | |
fe770bf0 | 503 | |
f4e342c8 BO |
504 | static inline bool is_hypervisor_range(int idx) |
505 | { | |
b176862f | 506 | #ifdef CONFIG_X86_64 |
f4e342c8 | 507 | /* |
16877a55 KS |
508 | * A hole in the beginning of kernel address space reserved |
509 | * for a hypervisor. | |
f4e342c8 | 510 | */ |
16877a55 KS |
511 | return (idx >= pgd_index(GUARD_HOLE_BASE_ADDR)) && |
512 | (idx < pgd_index(GUARD_HOLE_END_ADDR)); | |
f4e342c8 | 513 | #else |
b176862f | 514 | return false; |
f4e342c8 | 515 | #endif |
b176862f | 516 | } |
f4e342c8 | 517 | |
e1a58320 | 518 | static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, |
b4bf4f92 | 519 | bool checkwx, bool dmesg) |
926e5392 | 520 | { |
4e8537e4 | 521 | pgd_t *start = INIT_PGD; |
672c0ae0 | 522 | pgprotval_t prot, eff; |
926e5392 | 523 | int i; |
ef6bea6d | 524 | struct pg_state st = {}; |
926e5392 | 525 | |
ef6bea6d BP |
526 | if (pgd) { |
527 | start = pgd; | |
b4bf4f92 | 528 | st.to_dmesg = dmesg; |
ef6bea6d | 529 | } |
926e5392 | 530 | |
e1a58320 SS |
531 | st.check_wx = checkwx; |
532 | if (checkwx) | |
533 | st.wx_pages = 0; | |
534 | ||
926e5392 | 535 | for (i = 0; i < PTRS_PER_PGD; i++) { |
fe770bf0 | 536 | st.current_address = normalize_addr(i * PGD_LEVEL_MULT); |
f4e342c8 | 537 | if (!pgd_none(*start) && !is_hypervisor_range(i)) { |
672c0ae0 JB |
538 | prot = pgd_flags(*start); |
539 | #ifdef CONFIG_X86_PAE | |
540 | eff = _PAGE_USER | _PAGE_RW; | |
541 | #else | |
542 | eff = prot; | |
543 | #endif | |
da25e628 | 544 | if (pgd_large(*start) || !pgd_present(*start)) { |
672c0ae0 | 545 | note_page(m, &st, __pgprot(prot), eff, 1); |
da25e628 | 546 | } else { |
672c0ae0 | 547 | walk_p4d_level(m, &st, *start, eff, |
fe770bf0 | 548 | i * PGD_LEVEL_MULT); |
da25e628 | 549 | } |
fe770bf0 | 550 | } else |
672c0ae0 | 551 | note_page(m, &st, __pgprot(0), 0, 1); |
fe770bf0 | 552 | |
146fbb76 | 553 | cond_resched(); |
926e5392 AV |
554 | start++; |
555 | } | |
fe770bf0 PA |
556 | |
557 | /* Flush out the last page */ | |
558 | st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT); | |
672c0ae0 | 559 | note_page(m, &st, __pgprot(0), 0, 0); |
e1a58320 SS |
560 | if (!checkwx) |
561 | return; | |
562 | if (st.wx_pages) | |
563 | pr_info("x86/mm: Checked W+X mappings: FAILED, %lu W+X pages found.\n", | |
564 | st.wx_pages); | |
565 | else | |
566 | pr_info("x86/mm: Checked W+X mappings: passed, no W+X pages found.\n"); | |
567 | } | |
568 | ||
569 | void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd) | |
570 | { | |
b4bf4f92 TG |
571 | ptdump_walk_pgd_level_core(m, pgd, false, true); |
572 | } | |
573 | ||
a4b51ef6 | 574 | void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user) |
b4bf4f92 | 575 | { |
a4b51ef6 | 576 | #ifdef CONFIG_PAGE_TABLE_ISOLATION |
28e3ace7 | 577 | if (user && boot_cpu_has(X86_FEATURE_PTI)) |
a4b51ef6 TG |
578 | pgd = kernel_to_user_pgdp(pgd); |
579 | #endif | |
b4bf4f92 TG |
580 | ptdump_walk_pgd_level_core(m, pgd, false, false); |
581 | } | |
582 | EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs); | |
583 | ||
d878efce | 584 | void ptdump_walk_user_pgd_level_checkwx(void) |
b4bf4f92 TG |
585 | { |
586 | #ifdef CONFIG_PAGE_TABLE_ISOLATION | |
4e8537e4 | 587 | pgd_t *pgd = INIT_PGD; |
b4bf4f92 | 588 | |
d878efce | 589 | if (!(__supported_pte_mask & _PAGE_NX) || |
28e3ace7 | 590 | !boot_cpu_has(X86_FEATURE_PTI)) |
b4bf4f92 TG |
591 | return; |
592 | ||
593 | pr_info("x86/mm: Checking user space page tables\n"); | |
594 | pgd = kernel_to_user_pgdp(pgd); | |
595 | ptdump_walk_pgd_level_core(NULL, pgd, true, false); | |
596 | #endif | |
926e5392 AV |
597 | } |
598 | ||
e1a58320 SS |
599 | void ptdump_walk_pgd_level_checkwx(void) |
600 | { | |
b4bf4f92 | 601 | ptdump_walk_pgd_level_core(NULL, NULL, true, false); |
e1a58320 SS |
602 | } |
603 | ||
8609d1b5 | 604 | static int __init pt_dump_init(void) |
926e5392 | 605 | { |
0483e1fa TG |
606 | /* |
607 | * Various markers are not compile-time constants, so assign them | |
608 | * here. | |
609 | */ | |
610 | #ifdef CONFIG_X86_64 | |
611 | address_markers[LOW_KERNEL_NR].start_address = PAGE_OFFSET; | |
612 | address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; | |
613 | address_markers[VMEMMAP_START_NR].start_address = VMEMMAP_START; | |
5c7919bb KS |
614 | #ifdef CONFIG_MODIFY_LDT_SYSCALL |
615 | address_markers[LDT_NR].start_address = LDT_BASE_ADDR; | |
616 | #endif | |
09e61a77 KS |
617 | #ifdef CONFIG_KASAN |
618 | address_markers[KASAN_SHADOW_START_NR].start_address = KASAN_SHADOW_START; | |
619 | address_markers[KASAN_SHADOW_END_NR].start_address = KASAN_SHADOW_END; | |
620 | #endif | |
0483e1fa | 621 | #endif |
fe770bf0 | 622 | #ifdef CONFIG_X86_32 |
92851e2f AS |
623 | address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; |
624 | address_markers[VMALLOC_END_NR].start_address = VMALLOC_END; | |
fe770bf0 | 625 | # ifdef CONFIG_HIGHMEM |
92851e2f | 626 | address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE; |
fe770bf0 | 627 | # endif |
92851e2f | 628 | address_markers[FIXADDR_START_NR].start_address = FIXADDR_START; |
92a0f81d | 629 | address_markers[CPU_ENTRY_AREA_NR].start_address = CPU_ENTRY_AREA_BASE; |
f3e48e54 JR |
630 | # ifdef CONFIG_MODIFY_LDT_SYSCALL |
631 | address_markers[LDT_NR].start_address = LDT_BASE_ADDR; | |
632 | # endif | |
fe770bf0 | 633 | #endif |
926e5392 AV |
634 | return 0; |
635 | } | |
926e5392 | 636 | __initcall(pt_dump_init); |