powerpc/ptdump: Use DEFINE_SHOW_ATTRIBUTE()
[linux-2.6-block.git] / arch / powerpc / mm / ptdump / ptdump.c
CommitLineData
b886d83c 1// SPDX-License-Identifier: GPL-2.0-only
8eb07b18
RG
2/*
3 * Copyright 2016, Rashmica Gupta, IBM Corp.
4 *
5 * This traverses the kernel pagetables and dumps the
6 * information about the used sections of memory to
7 * /sys/kernel/debug/kernel_pagetables.
8 *
9 * Derived from the arm64 implementation:
10 * Copyright (c) 2014, The Linux Foundation, Laura Abbott.
11 * (C) Copyright 2008 Intel Corporation, Arjan van de Ven.
8eb07b18
RG
12 */
13#include <linux/debugfs.h>
14#include <linux/fs.h>
bfb9956a 15#include <linux/hugetlb.h>
8eb07b18
RG
16#include <linux/io.h>
17#include <linux/mm.h>
462951cd 18#include <linux/highmem.h>
8eb07b18
RG
19#include <linux/sched.h>
20#include <linux/seq_file.h>
21#include <asm/fixmap.h>
8eb07b18
RG
22#include <linux/const.h>
23#include <asm/page.h>
6b789a26 24#include <asm/hugetlb.h>
8eb07b18 25
1e1c8b2c
CL
26#include <mm/mmu_decl.h>
27
e66c3209 28#include "ptdump.h"
97026b5a 29
8eb07b18
RG
30/*
31 * To visualise what is happening,
32 *
33 * - PTRS_PER_P** = how many entries there are in the corresponding P**
34 * - P**_SHIFT = how many bits of the address we use to index into the
35 * corresponding P**
36 * - P**_SIZE is how much memory we can access through the table - not the
37 * size of the table itself.
38 * P**={PGD, PUD, PMD, PTE}
39 *
40 *
41 * Each entry of the PGD points to a PUD. Each entry of a PUD points to a
42 * PMD. Each entry of a PMD points to a PTE. And every PTE entry points to
43 * a page.
44 *
45 * In the case where there are only 3 levels, the PUD is folded into the
46 * PGD: every PUD has only one entry which points to the PMD.
47 *
48 * The page dumper groups page table entries of the same type into a single
49 * description. It uses pg_state to track the range information while
50 * iterating over the PTE entries. When the continuity is broken it then
51 * dumps out a description of the range - ie PTEs that are virtually contiguous
52 * with the same PTE flags are chunked together. This is to make it clear how
53 * different areas of the kernel virtual memory are used.
54 *
55 */
56struct pg_state {
57 struct seq_file *seq;
58 const struct addr_marker *marker;
59 unsigned long start_address;
aaa22952 60 unsigned long start_pa;
8eb07b18
RG
61 unsigned int level;
62 u64 current_flags;
453d87f6
RC
63 bool check_wx;
64 unsigned long wx_pages;
8eb07b18
RG
65};
66
67struct addr_marker {
68 unsigned long start_address;
69 const char *name;
70};
71
72static struct addr_marker address_markers[] = {
73 { 0, "Start of kernel VM" },
6ca05532
CL
74#ifdef MODULES_VADDR
75 { 0, "modules start" },
76 { 0, "modules end" },
77#endif
8eb07b18
RG
78 { 0, "vmalloc() Area" },
79 { 0, "vmalloc() End" },
6c01bbd2 80#ifdef CONFIG_PPC64
8eb07b18
RG
81 { 0, "isa I/O start" },
82 { 0, "isa I/O end" },
83 { 0, "phb I/O start" },
84 { 0, "phb I/O end" },
85 { 0, "I/O remap start" },
86 { 0, "I/O remap end" },
87 { 0, "vmemmap start" },
6c01bbd2
CL
88#else
89 { 0, "Early I/O remap start" },
90 { 0, "Early I/O remap end" },
6c01bbd2
CL
91#ifdef CONFIG_HIGHMEM
92 { 0, "Highmem PTEs start" },
93 { 0, "Highmem PTEs end" },
94#endif
95 { 0, "Fixmap start" },
96 { 0, "Fixmap end" },
b4abe38f
CL
97#endif
98#ifdef CONFIG_KASAN
99 { 0, "kasan shadow mem start" },
100 { 0, "kasan shadow mem end" },
6c01bbd2 101#endif
8eb07b18
RG
102 { -1, NULL },
103};
104
5f18cbdb
RC
105#define pt_dump_seq_printf(m, fmt, args...) \
106({ \
107 if (m) \
108 seq_printf(m, fmt, ##args); \
109})
110
111#define pt_dump_seq_putc(m, c) \
112({ \
113 if (m) \
114 seq_putc(m, c); \
115})
116
6b30830e
CL
117void pt_dump_size(struct seq_file *m, unsigned long size)
118{
119 static const char units[] = "KMGTPE";
120 const char *unit = units;
121
122 /* Work out what appropriate unit to use */
123 while (!(size & 1023) && unit[1]) {
124 size >>= 10;
125 unit++;
126 }
127 pt_dump_seq_printf(m, "%9lu%c ", size, *unit);
128}
129
8eb07b18
RG
130static void dump_flag_info(struct pg_state *st, const struct flag_info
131 *flag, u64 pte, int num)
132{
133 unsigned int i;
134
135 for (i = 0; i < num; i++, flag++) {
136 const char *s = NULL;
137 u64 val;
138
139 /* flag not defined so don't check it */
140 if (flag->mask == 0)
141 continue;
142 /* Some 'flags' are actually values */
143 if (flag->is_val) {
144 val = pte & flag->val;
145 if (flag->shift)
146 val = val >> flag->shift;
5f18cbdb 147 pt_dump_seq_printf(st->seq, " %s:%llx", flag->set, val);
8eb07b18
RG
148 } else {
149 if ((pte & flag->mask) == flag->val)
150 s = flag->set;
151 else
152 s = flag->clear;
153 if (s)
5f18cbdb 154 pt_dump_seq_printf(st->seq, " %s", s);
8eb07b18
RG
155 }
156 st->current_flags &= ~flag->mask;
157 }
158 if (st->current_flags != 0)
5f18cbdb 159 pt_dump_seq_printf(st->seq, " unknown flags:%llx", st->current_flags);
8eb07b18
RG
160}
161
162static void dump_addr(struct pg_state *st, unsigned long addr)
163{
78a18dbf 164#ifdef CONFIG_PPC64
cabe8138 165#define REG "0x%016lx"
78a18dbf 166#else
cabe8138 167#define REG "0x%08lx"
78a18dbf 168#endif
aaa22952 169
5f18cbdb 170 pt_dump_seq_printf(st->seq, REG "-" REG " ", st->start_address, addr - 1);
6ca6512c
CL
171 pt_dump_seq_printf(st->seq, " " REG " ", st->start_pa);
172 pt_dump_size(st->seq, (addr - st->start_address) >> 10);
8eb07b18
RG
173}
174
453d87f6
RC
175static void note_prot_wx(struct pg_state *st, unsigned long addr)
176{
d80ae83f
CL
177 pte_t pte = __pte(st->current_flags);
178
f3a2ac05 179 if (!IS_ENABLED(CONFIG_PPC_DEBUG_WX) || !st->check_wx)
453d87f6
RC
180 return;
181
d80ae83f 182 if (!pte_write(pte) || !pte_exec(pte))
453d87f6
RC
183 return;
184
185 WARN_ONCE(1, "powerpc/mm: Found insecure W+X mapping at address %p/%pS\n",
186 (void *)st->start_address, (void *)st->start_address);
187
188 st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
189}
190
e54e30bc
CL
191static void note_page_update_state(struct pg_state *st, unsigned long addr,
192 unsigned int level, u64 val, unsigned long page_size)
193{
194 u64 flag = val & pg_level[level].mask;
195 u64 pa = val & PTE_RPN_MASK;
196
197 st->level = level;
198 st->current_flags = flag;
199 st->start_address = addr;
200 st->start_pa = pa;
e54e30bc
CL
201
202 while (addr >= st->marker[1].start_address) {
203 st->marker++;
204 pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
205 }
206}
207
8eb07b18 208static void note_page(struct pg_state *st, unsigned long addr,
b00ff6d8 209 unsigned int level, u64 val, unsigned long page_size)
8eb07b18
RG
210{
211 u64 flag = val & pg_level[level].mask;
aaa22952 212
8eb07b18
RG
213 /* At first no level is set */
214 if (!st->level) {
5f18cbdb 215 pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
e54e30bc 216 note_page_update_state(st, addr, level, val, page_size);
8eb07b18
RG
217 /*
218 * Dump the section of virtual memory when:
219 * - the PTE flags from one entry to the next differs.
220 * - we change levels in the tree.
221 * - the address is in a different section of memory and is thus
222 * used for a different purpose, regardless of the flags.
223 */
224 } else if (flag != st->current_flags || level != st->level ||
6ca6512c 225 addr >= st->marker[1].start_address) {
8eb07b18
RG
226
227 /* Check the PTE flags */
228 if (st->current_flags) {
453d87f6 229 note_prot_wx(st, addr);
8eb07b18
RG
230 dump_addr(st, addr);
231
232 /* Dump all the flags */
233 if (pg_level[st->level].flag)
234 dump_flag_info(st, pg_level[st->level].flag,
235 st->current_flags,
236 pg_level[st->level].num);
237
5f18cbdb 238 pt_dump_seq_putc(st->seq, '\n');
8eb07b18
RG
239 }
240
241 /*
242 * Address indicates we have passed the end of the
243 * current section of virtual memory
244 */
e54e30bc 245 note_page_update_state(st, addr, level, val, page_size);
8eb07b18
RG
246 }
247}
248
249static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
250{
251 pte_t *pte = pte_offset_kernel(pmd, 0);
252 unsigned long addr;
253 unsigned int i;
254
255 for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
256 addr = start + i * PAGE_SIZE;
b00ff6d8 257 note_page(st, addr, 4, pte_val(*pte), PAGE_SIZE);
8eb07b18
RG
258
259 }
260}
261
6b789a26
CL
262static void walk_hugepd(struct pg_state *st, hugepd_t *phpd, unsigned long start,
263 int pdshift, int level)
264{
265#ifdef CONFIG_ARCH_HAS_HUGEPD
266 unsigned int i;
267 int shift = hugepd_shift(*phpd);
268 int ptrs_per_hpd = pdshift - shift > 0 ? 1 << (pdshift - shift) : 1;
269
270 if (start & ((1 << shift) - 1))
271 return;
272
273 for (i = 0; i < ptrs_per_hpd; i++) {
274 unsigned long addr = start + (i << shift);
275 pte_t *pte = hugepte_offset(*phpd, addr, pdshift);
276
277 note_page(st, addr, level + 1, pte_val(*pte), 1 << shift);
278 }
279#endif
280}
281
8eb07b18
RG
282static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
283{
284 pmd_t *pmd = pmd_offset(pud, 0);
285 unsigned long addr;
286 unsigned int i;
287
288 for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
289 addr = start + i * PMD_SIZE;
d6eacedd 290 if (!pmd_none(*pmd) && !pmd_is_leaf(*pmd))
8eb07b18
RG
291 /* pmd exists */
292 walk_pte(st, pmd, addr);
293 else
b00ff6d8 294 note_page(st, addr, 3, pmd_val(*pmd), PMD_SIZE);
8eb07b18
RG
295 }
296}
297
2fb47060 298static void walk_pud(struct pg_state *st, p4d_t *p4d, unsigned long start)
8eb07b18 299{
2fb47060 300 pud_t *pud = pud_offset(p4d, 0);
8eb07b18
RG
301 unsigned long addr;
302 unsigned int i;
303
304 for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
305 addr = start + i * PUD_SIZE;
d6eacedd 306 if (!pud_none(*pud) && !pud_is_leaf(*pud))
8eb07b18
RG
307 /* pud exists */
308 walk_pmd(st, pud, addr);
309 else
b00ff6d8 310 note_page(st, addr, 2, pud_val(*pud), PUD_SIZE);
8eb07b18
RG
311 }
312}
313
314static void walk_pagetables(struct pg_state *st)
315{
8eb07b18 316 unsigned int i;
e033829d
CL
317 unsigned long addr = st->start_address & PGDIR_MASK;
318 pgd_t *pgd = pgd_offset_k(addr);
0d923962 319
8eb07b18
RG
320 /*
321 * Traverse the linux pagetable structure and dump pages that are in
322 * the hash pagetable.
323 */
e033829d 324 for (i = pgd_index(addr); i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) {
2fb47060
MR
325 p4d_t *p4d = p4d_offset(pgd, 0);
326
7ae77150
LT
327 if (p4d_none(*p4d) || p4d_is_leaf(*p4d))
328 note_page(st, addr, 1, p4d_val(*p4d), PGDIR_SIZE);
329 else if (is_hugepd(__hugepd(p4d_val(*p4d))))
330 walk_hugepd(st, (hugepd_t *)p4d, addr, PGDIR_SHIFT, 1);
8eb07b18 331 else
7ae77150
LT
332 /* p4d exists */
333 walk_pud(st, p4d, addr);
8eb07b18
RG
334 }
335}
336
337static void populate_markers(void)
338{
6c01bbd2
CL
339 int i = 0;
340
b6be1bb7 341#ifdef CONFIG_PPC64
6c01bbd2 342 address_markers[i++].start_address = PAGE_OFFSET;
b6be1bb7
CL
343#else
344 address_markers[i++].start_address = TASK_SIZE;
6ca05532
CL
345#endif
346#ifdef MODULES_VADDR
347 address_markers[i++].start_address = MODULES_VADDR;
348 address_markers[i++].start_address = MODULES_END;
b6be1bb7 349#endif
6c01bbd2
CL
350 address_markers[i++].start_address = VMALLOC_START;
351 address_markers[i++].start_address = VMALLOC_END;
352#ifdef CONFIG_PPC64
353 address_markers[i++].start_address = ISA_IO_BASE;
354 address_markers[i++].start_address = ISA_IO_END;
355 address_markers[i++].start_address = PHB_IO_BASE;
356 address_markers[i++].start_address = PHB_IO_END;
357 address_markers[i++].start_address = IOREMAP_BASE;
358 address_markers[i++].start_address = IOREMAP_END;
0034d395 359 /* What is the ifdef about? */
4e003747 360#ifdef CONFIG_PPC_BOOK3S_64
0034d395 361 address_markers[i++].start_address = H_VMEMMAP_START;
8eb07b18 362#else
6c01bbd2
CL
363 address_markers[i++].start_address = VMEMMAP_BASE;
364#endif
365#else /* !CONFIG_PPC64 */
366 address_markers[i++].start_address = ioremap_bot;
367 address_markers[i++].start_address = IOREMAP_TOP;
6c01bbd2
CL
368#ifdef CONFIG_HIGHMEM
369 address_markers[i++].start_address = PKMAP_BASE;
370 address_markers[i++].start_address = PKMAP_ADDR(LAST_PKMAP);
8eb07b18 371#endif
6c01bbd2
CL
372 address_markers[i++].start_address = FIXADDR_START;
373 address_markers[i++].start_address = FIXADDR_TOP;
b4abe38f
CL
374#ifdef CONFIG_KASAN
375 address_markers[i++].start_address = KASAN_SHADOW_START;
376 address_markers[i++].start_address = KASAN_SHADOW_END;
377#endif
6c01bbd2 378#endif /* CONFIG_PPC64 */
8eb07b18
RG
379}
380
381static int ptdump_show(struct seq_file *m, void *v)
382{
383 struct pg_state st = {
384 .seq = m,
8eb07b18 385 .marker = address_markers,
b6be1bb7 386 .start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE,
8eb07b18 387 };
0d923962 388
82242352
CL
389#ifdef CONFIG_PPC64
390 if (!radix_enabled())
0d923962 391 st.start_address = KERN_VIRT_START;
82242352 392#endif
0d923962 393
8eb07b18
RG
394 /* Traverse kernel page tables */
395 walk_pagetables(&st);
b00ff6d8 396 note_page(&st, 0, 0, 0, 0);
8eb07b18
RG
397 return 0;
398}
399
11f27a7f 400DEFINE_SHOW_ATTRIBUTE(ptdump);
8eb07b18
RG
401
402static void build_pgtable_complete_mask(void)
403{
404 unsigned int i, j;
405
406 for (i = 0; i < ARRAY_SIZE(pg_level); i++)
407 if (pg_level[i].flag)
408 for (j = 0; j < pg_level[i].num; j++)
409 pg_level[i].mask |= pg_level[i].flag[j].mask;
410}
411
453d87f6
RC
412#ifdef CONFIG_PPC_DEBUG_WX
413void ptdump_check_wx(void)
414{
415 struct pg_state st = {
416 .seq = NULL,
417 .marker = address_markers,
418 .check_wx = true,
b6be1bb7 419 .start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE,
453d87f6
RC
420 };
421
82242352
CL
422#ifdef CONFIG_PPC64
423 if (!radix_enabled())
453d87f6 424 st.start_address = KERN_VIRT_START;
82242352 425#endif
453d87f6
RC
426
427 walk_pagetables(&st);
428
429 if (st.wx_pages)
430 pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n",
431 st.wx_pages);
432 else
433 pr_info("Checked W+X mappings: passed, no W+X pages found\n");
434}
435#endif
436
8eb07b18
RG
437static int ptdump_init(void)
438{
8eb07b18
RG
439 populate_markers();
440 build_pgtable_complete_mask();
f3c05201
GKH
441 debugfs_create_file("kernel_page_tables", 0400, NULL, NULL,
442 &ptdump_fops);
443 return 0;
8eb07b18
RG
444}
445device_initcall(ptdump_init);