Commit | Line | Data |
---|---|---|
926e5392 AV |
1 | /* |
2 | * Debug helper to dump the current kernel pagetables of the system | |
3 | * so that we can see what the various memory ranges are set to. | |
4 | * | |
5 | * (C) Copyright 2008 Intel Corporation | |
6 | * | |
7 | * Author: Arjan van de Ven <arjan@linux.intel.com> | |
8 | * | |
9 | * This program is free software; you can redistribute it and/or | |
10 | * modify it under the terms of the GNU General Public License | |
11 | * as published by the Free Software Foundation; version 2 | |
12 | * of the License. | |
13 | */ | |
14 | ||
fe770bf0 PA |
15 | #include <linux/debugfs.h> |
16 | #include <linux/mm.h> | |
926e5392 AV |
17 | #include <linux/module.h> |
18 | #include <linux/seq_file.h> | |
926e5392 AV |
19 | |
20 | #include <asm/pgtable.h> | |
21 | ||
22 | /* | |
23 | * The dumper groups pagetable entries of the same type into one, and for | |
24 | * that it needs to keep some state when walking, and flush this state | |
25 | * when a "break" in the continuity is found. | |
26 | */ | |
27 | struct pg_state { | |
28 | int level; | |
29 | pgprot_t current_prot; | |
30 | unsigned long start_address; | |
31 | unsigned long current_address; | |
fe770bf0 | 32 | const struct addr_marker *marker; |
3891a04a | 33 | unsigned long lines; |
ef6bea6d | 34 | bool to_dmesg; |
926e5392 AV |
35 | }; |
36 | ||
fe770bf0 PA |
37 | struct addr_marker { |
38 | unsigned long start_address; | |
39 | const char *name; | |
3891a04a | 40 | unsigned long max_lines; |
fe770bf0 PA |
41 | }; |
42 | ||
92851e2f AS |
43 | /* indices for address_markers; keep sync'd w/ address_markers below */ |
44 | enum address_markers_idx { | |
45 | USER_SPACE_NR = 0, | |
46 | #ifdef CONFIG_X86_64 | |
47 | KERNEL_SPACE_NR, | |
48 | LOW_KERNEL_NR, | |
49 | VMALLOC_START_NR, | |
50 | VMEMMAP_START_NR, | |
8a5a5d15 | 51 | # ifdef CONFIG_X86_ESPFIX64 |
3891a04a | 52 | ESPFIX_START_NR, |
8a5a5d15 | 53 | # endif |
92851e2f AS |
54 | HIGH_KERNEL_NR, |
55 | MODULES_VADDR_NR, | |
56 | MODULES_END_NR, | |
57 | #else | |
58 | KERNEL_SPACE_NR, | |
59 | VMALLOC_START_NR, | |
60 | VMALLOC_END_NR, | |
61 | # ifdef CONFIG_HIGHMEM | |
62 | PKMAP_BASE_NR, | |
63 | # endif | |
64 | FIXADDR_START_NR, | |
65 | #endif | |
66 | }; | |
67 | ||
fe770bf0 PA |
68 | /* Address space markers hints */ |
69 | static struct addr_marker address_markers[] = { | |
70 | { 0, "User Space" }, | |
71 | #ifdef CONFIG_X86_64 | |
72 | { 0x8000000000000000UL, "Kernel Space" }, | |
684eb016 | 73 | { PAGE_OFFSET, "Low Kernel Mapping" }, |
fe770bf0 | 74 | { VMALLOC_START, "vmalloc() Area" }, |
fe770bf0 | 75 | { VMEMMAP_START, "Vmemmap" }, |
8a5a5d15 | 76 | # ifdef CONFIG_X86_ESPFIX64 |
3891a04a | 77 | { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, |
8266e31e MK |
78 | # endif |
79 | # ifdef CONFIG_EFI | |
80 | { EFI_VA_END, "EFI Runtime Services" }, | |
8a5a5d15 | 81 | # endif |
fe770bf0 | 82 | { __START_KERNEL_map, "High Kernel Mapping" }, |
9a79cf9c YL |
83 | { MODULES_VADDR, "Modules" }, |
84 | { MODULES_END, "End Modules" }, | |
fe770bf0 PA |
85 | #else |
86 | { PAGE_OFFSET, "Kernel Mapping" }, | |
87 | { 0/* VMALLOC_START */, "vmalloc() Area" }, | |
88 | { 0/*VMALLOC_END*/, "vmalloc() End" }, | |
89 | # ifdef CONFIG_HIGHMEM | |
90 | { 0/*PKMAP_BASE*/, "Persisent kmap() Area" }, | |
91 | # endif | |
92 | { 0/*FIXADDR_START*/, "Fixmap Area" }, | |
93 | #endif | |
94 | { -1, NULL } /* End of list */ | |
95 | }; | |
926e5392 | 96 | |
fe770bf0 PA |
97 | /* Multipliers for offsets within the PTEs */ |
98 | #define PTE_LEVEL_MULT (PAGE_SIZE) | |
99 | #define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT) | |
100 | #define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT) | |
101 | #define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT) | |
926e5392 | 102 | |
ef6bea6d BP |
103 | #define pt_dump_seq_printf(m, to_dmesg, fmt, args...) \ |
104 | ({ \ | |
105 | if (to_dmesg) \ | |
106 | printk(KERN_INFO fmt, ##args); \ | |
107 | else \ | |
108 | if (m) \ | |
109 | seq_printf(m, fmt, ##args); \ | |
110 | }) | |
111 | ||
112 | #define pt_dump_cont_printf(m, to_dmesg, fmt, args...) \ | |
113 | ({ \ | |
114 | if (to_dmesg) \ | |
115 | printk(KERN_CONT fmt, ##args); \ | |
116 | else \ | |
117 | if (m) \ | |
118 | seq_printf(m, fmt, ##args); \ | |
119 | }) | |
120 | ||
926e5392 AV |
121 | /* |
122 | * Print a readable form of a pgprot_t to the seq_file | |
123 | */ | |
ef6bea6d | 124 | static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg) |
926e5392 | 125 | { |
fe770bf0 PA |
126 | pgprotval_t pr = pgprot_val(prot); |
127 | static const char * const level_name[] = | |
128 | { "cr3", "pgd", "pud", "pmd", "pte" }; | |
129 | ||
130 | if (!pgprot_val(prot)) { | |
131 | /* Not present */ | |
f439c429 | 132 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 PA |
133 | } else { |
134 | if (pr & _PAGE_USER) | |
ef6bea6d | 135 | pt_dump_cont_printf(m, dmsg, "USR "); |
926e5392 | 136 | else |
ef6bea6d | 137 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 | 138 | if (pr & _PAGE_RW) |
ef6bea6d | 139 | pt_dump_cont_printf(m, dmsg, "RW "); |
fe770bf0 | 140 | else |
ef6bea6d | 141 | pt_dump_cont_printf(m, dmsg, "ro "); |
fe770bf0 | 142 | if (pr & _PAGE_PWT) |
ef6bea6d | 143 | pt_dump_cont_printf(m, dmsg, "PWT "); |
fe770bf0 | 144 | else |
ef6bea6d | 145 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 | 146 | if (pr & _PAGE_PCD) |
ef6bea6d | 147 | pt_dump_cont_printf(m, dmsg, "PCD "); |
926e5392 | 148 | else |
ef6bea6d | 149 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 | 150 | |
f439c429 JG |
151 | /* Bit 7 has a different meaning on level 3 vs 4 */ |
152 | if (level <= 3 && pr & _PAGE_PSE) | |
153 | pt_dump_cont_printf(m, dmsg, "PSE "); | |
154 | else | |
155 | pt_dump_cont_printf(m, dmsg, " "); | |
156 | if ((level == 4 && pr & _PAGE_PAT) || | |
157 | ((level == 3 || level == 2) && pr & _PAGE_PAT_LARGE)) | |
da25e628 | 158 | pt_dump_cont_printf(m, dmsg, "PAT "); |
f439c429 JG |
159 | else |
160 | pt_dump_cont_printf(m, dmsg, " "); | |
fe770bf0 | 161 | if (pr & _PAGE_GLOBAL) |
ef6bea6d | 162 | pt_dump_cont_printf(m, dmsg, "GLB "); |
fe770bf0 | 163 | else |
ef6bea6d | 164 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 | 165 | if (pr & _PAGE_NX) |
ef6bea6d | 166 | pt_dump_cont_printf(m, dmsg, "NX "); |
fe770bf0 | 167 | else |
ef6bea6d | 168 | pt_dump_cont_printf(m, dmsg, "x "); |
926e5392 | 169 | } |
ef6bea6d | 170 | pt_dump_cont_printf(m, dmsg, "%s\n", level_name[level]); |
926e5392 AV |
171 | } |
172 | ||
173 | /* | |
fe770bf0 | 174 | * On 64 bits, sign-extend the 48 bit address to 64 bit |
926e5392 | 175 | */ |
fe770bf0 | 176 | static unsigned long normalize_addr(unsigned long u) |
926e5392 | 177 | { |
fe770bf0 PA |
178 | #ifdef CONFIG_X86_64 |
179 | return (signed long)(u << 16) >> 16; | |
180 | #else | |
926e5392 | 181 | return u; |
fe770bf0 | 182 | #endif |
926e5392 AV |
183 | } |
184 | ||
185 | /* | |
186 | * This function gets called on a break in a continuous series | |
187 | * of PTE entries; the next one is different so we need to | |
188 | * print what we collected so far. | |
189 | */ | |
190 | static void note_page(struct seq_file *m, struct pg_state *st, | |
fe770bf0 | 191 | pgprot_t new_prot, int level) |
926e5392 | 192 | { |
fe770bf0 | 193 | pgprotval_t prot, cur; |
3891a04a | 194 | static const char units[] = "BKMGTPE"; |
926e5392 AV |
195 | |
196 | /* | |
197 | * If we have a "break" in the series, we need to flush the state that | |
fe770bf0 PA |
198 | * we have now. "break" is either changing perms, levels or |
199 | * address space marker. | |
926e5392 | 200 | */ |
da25e628 TK |
201 | prot = pgprot_val(new_prot); |
202 | cur = pgprot_val(st->current_prot); | |
926e5392 | 203 | |
fe770bf0 PA |
204 | if (!st->level) { |
205 | /* First entry */ | |
206 | st->current_prot = new_prot; | |
207 | st->level = level; | |
208 | st->marker = address_markers; | |
3891a04a | 209 | st->lines = 0; |
ef6bea6d BP |
210 | pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", |
211 | st->marker->name); | |
fe770bf0 PA |
212 | } else if (prot != cur || level != st->level || |
213 | st->current_address >= st->marker[1].start_address) { | |
214 | const char *unit = units; | |
926e5392 | 215 | unsigned long delta; |
6424fb38 | 216 | int width = sizeof(unsigned long) * 2; |
926e5392 | 217 | |
926e5392 AV |
218 | /* |
219 | * Now print the actual finished series | |
220 | */ | |
3891a04a PA |
221 | if (!st->marker->max_lines || |
222 | st->lines < st->marker->max_lines) { | |
223 | pt_dump_seq_printf(m, st->to_dmesg, | |
224 | "0x%0*lx-0x%0*lx ", | |
225 | width, st->start_address, | |
226 | width, st->current_address); | |
926e5392 | 227 | |
3891a04a PA |
228 | delta = st->current_address - st->start_address; |
229 | while (!(delta & 1023) && unit[1]) { | |
230 | delta >>= 10; | |
231 | unit++; | |
232 | } | |
233 | pt_dump_cont_printf(m, st->to_dmesg, "%9lu%c ", | |
234 | delta, *unit); | |
235 | printk_prot(m, st->current_prot, st->level, | |
236 | st->to_dmesg); | |
926e5392 | 237 | } |
3891a04a | 238 | st->lines++; |
fe770bf0 PA |
239 | |
240 | /* | |
241 | * We print markers for special areas of address space, | |
242 | * such as the start of vmalloc space etc. | |
243 | * This helps in the interpretation. | |
244 | */ | |
245 | if (st->current_address >= st->marker[1].start_address) { | |
3891a04a PA |
246 | if (st->marker->max_lines && |
247 | st->lines > st->marker->max_lines) { | |
248 | unsigned long nskip = | |
249 | st->lines - st->marker->max_lines; | |
250 | pt_dump_seq_printf(m, st->to_dmesg, | |
251 | "... %lu entr%s skipped ... \n", | |
252 | nskip, | |
253 | nskip == 1 ? "y" : "ies"); | |
254 | } | |
fe770bf0 | 255 | st->marker++; |
3891a04a | 256 | st->lines = 0; |
ef6bea6d BP |
257 | pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", |
258 | st->marker->name); | |
926e5392 | 259 | } |
fe770bf0 | 260 | |
926e5392 AV |
261 | st->start_address = st->current_address; |
262 | st->current_prot = new_prot; | |
263 | st->level = level; | |
fe770bf0 | 264 | } |
926e5392 AV |
265 | } |
266 | ||
fe770bf0 | 267 | static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr, |
926e5392 AV |
268 | unsigned long P) |
269 | { | |
270 | int i; | |
271 | pte_t *start; | |
da25e628 | 272 | pgprotval_t prot; |
926e5392 AV |
273 | |
274 | start = (pte_t *) pmd_page_vaddr(addr); | |
275 | for (i = 0; i < PTRS_PER_PTE; i++) { | |
da25e628 | 276 | prot = pte_flags(*start); |
fe770bf0 | 277 | st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT); |
da25e628 | 278 | note_page(m, st, __pgprot(prot), 4); |
926e5392 AV |
279 | start++; |
280 | } | |
281 | } | |
282 | ||
fe770bf0 | 283 | #if PTRS_PER_PMD > 1 |
926e5392 | 284 | |
fe770bf0 | 285 | static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, |
926e5392 AV |
286 | unsigned long P) |
287 | { | |
288 | int i; | |
289 | pmd_t *start; | |
da25e628 | 290 | pgprotval_t prot; |
926e5392 AV |
291 | |
292 | start = (pmd_t *) pud_page_vaddr(addr); | |
293 | for (i = 0; i < PTRS_PER_PMD; i++) { | |
fe770bf0 | 294 | st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT); |
926e5392 | 295 | if (!pmd_none(*start)) { |
da25e628 TK |
296 | if (pmd_large(*start) || !pmd_present(*start)) { |
297 | prot = pmd_flags(*start); | |
926e5392 | 298 | note_page(m, st, __pgprot(prot), 3); |
da25e628 | 299 | } else { |
fe770bf0 PA |
300 | walk_pte_level(m, st, *start, |
301 | P + i * PMD_LEVEL_MULT); | |
da25e628 | 302 | } |
926e5392 AV |
303 | } else |
304 | note_page(m, st, __pgprot(0), 3); | |
305 | start++; | |
306 | } | |
307 | } | |
308 | ||
fe770bf0 PA |
309 | #else |
310 | #define walk_pmd_level(m,s,a,p) walk_pte_level(m,s,__pmd(pud_val(a)),p) | |
311 | #define pud_large(a) pmd_large(__pmd(pud_val(a))) | |
312 | #define pud_none(a) pmd_none(__pmd(pud_val(a))) | |
313 | #endif | |
926e5392 | 314 | |
fe770bf0 PA |
315 | #if PTRS_PER_PUD > 1 |
316 | ||
317 | static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, | |
926e5392 AV |
318 | unsigned long P) |
319 | { | |
320 | int i; | |
321 | pud_t *start; | |
da25e628 | 322 | pgprotval_t prot; |
926e5392 AV |
323 | |
324 | start = (pud_t *) pgd_page_vaddr(addr); | |
325 | ||
326 | for (i = 0; i < PTRS_PER_PUD; i++) { | |
fe770bf0 | 327 | st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); |
926e5392 | 328 | if (!pud_none(*start)) { |
da25e628 TK |
329 | if (pud_large(*start) || !pud_present(*start)) { |
330 | prot = pud_flags(*start); | |
926e5392 | 331 | note_page(m, st, __pgprot(prot), 2); |
da25e628 | 332 | } else { |
fe770bf0 PA |
333 | walk_pmd_level(m, st, *start, |
334 | P + i * PUD_LEVEL_MULT); | |
da25e628 | 335 | } |
926e5392 AV |
336 | } else |
337 | note_page(m, st, __pgprot(0), 2); | |
338 | ||
339 | start++; | |
340 | } | |
341 | } | |
342 | ||
fe770bf0 PA |
343 | #else |
344 | #define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(pgd_val(a)),p) | |
345 | #define pgd_large(a) pud_large(__pud(pgd_val(a))) | |
346 | #define pgd_none(a) pud_none(__pud(pgd_val(a))) | |
347 | #endif | |
348 | ||
ef6bea6d | 349 | void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd) |
926e5392 | 350 | { |
fe770bf0 | 351 | #ifdef CONFIG_X86_64 |
926e5392 | 352 | pgd_t *start = (pgd_t *) &init_level4_pgt; |
fe770bf0 PA |
353 | #else |
354 | pgd_t *start = swapper_pg_dir; | |
355 | #endif | |
da25e628 | 356 | pgprotval_t prot; |
926e5392 | 357 | int i; |
ef6bea6d | 358 | struct pg_state st = {}; |
926e5392 | 359 | |
ef6bea6d BP |
360 | if (pgd) { |
361 | start = pgd; | |
362 | st.to_dmesg = true; | |
363 | } | |
926e5392 AV |
364 | |
365 | for (i = 0; i < PTRS_PER_PGD; i++) { | |
fe770bf0 PA |
366 | st.current_address = normalize_addr(i * PGD_LEVEL_MULT); |
367 | if (!pgd_none(*start)) { | |
da25e628 TK |
368 | if (pgd_large(*start) || !pgd_present(*start)) { |
369 | prot = pgd_flags(*start); | |
fe770bf0 | 370 | note_page(m, &st, __pgprot(prot), 1); |
da25e628 | 371 | } else { |
fe770bf0 PA |
372 | walk_pud_level(m, &st, *start, |
373 | i * PGD_LEVEL_MULT); | |
da25e628 | 374 | } |
fe770bf0 | 375 | } else |
926e5392 | 376 | note_page(m, &st, __pgprot(0), 1); |
fe770bf0 | 377 | |
926e5392 AV |
378 | start++; |
379 | } | |
fe770bf0 PA |
380 | |
381 | /* Flush out the last page */ | |
382 | st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT); | |
383 | note_page(m, &st, __pgprot(0), 0); | |
926e5392 AV |
384 | } |
385 | ||
386 | static int ptdump_show(struct seq_file *m, void *v) | |
387 | { | |
ef6bea6d | 388 | ptdump_walk_pgd_level(m, NULL); |
926e5392 AV |
389 | return 0; |
390 | } | |
391 | ||
392 | static int ptdump_open(struct inode *inode, struct file *filp) | |
393 | { | |
394 | return single_open(filp, ptdump_show, NULL); | |
395 | } | |
396 | ||
397 | static const struct file_operations ptdump_fops = { | |
398 | .open = ptdump_open, | |
399 | .read = seq_read, | |
400 | .llseek = seq_lseek, | |
401 | .release = single_release, | |
402 | }; | |
403 | ||
a4928cff | 404 | static int pt_dump_init(void) |
926e5392 AV |
405 | { |
406 | struct dentry *pe; | |
407 | ||
fe770bf0 PA |
408 | #ifdef CONFIG_X86_32 |
409 | /* Not a compile-time constant on x86-32 */ | |
92851e2f AS |
410 | address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; |
411 | address_markers[VMALLOC_END_NR].start_address = VMALLOC_END; | |
fe770bf0 | 412 | # ifdef CONFIG_HIGHMEM |
92851e2f | 413 | address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE; |
fe770bf0 | 414 | # endif |
92851e2f | 415 | address_markers[FIXADDR_START_NR].start_address = FIXADDR_START; |
fe770bf0 PA |
416 | #endif |
417 | ||
926e5392 AV |
418 | pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL, |
419 | &ptdump_fops); | |
420 | if (!pe) | |
421 | return -ENOMEM; | |
422 | ||
423 | return 0; | |
424 | } | |
425 | ||
426 | __initcall(pt_dump_init); | |
427 | MODULE_LICENSE("GPL"); | |
428 | MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>"); | |
429 | MODULE_DESCRIPTION("Kernel debugging helper that dumps pagetables"); |