Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
57c8a661 | 2 | #include <linux/memblock.h> |
6d80e53f AD |
3 | #include <linux/compiler.h> |
4 | #include <linux/fs.h> | |
5 | #include <linux/init.h> | |
9a840895 | 6 | #include <linux/ksm.h> |
6d80e53f AD |
7 | #include <linux/mm.h> |
8 | #include <linux/mmzone.h> | |
56873f43 | 9 | #include <linux/huge_mm.h> |
6d80e53f AD |
10 | #include <linux/proc_fs.h> |
11 | #include <linux/seq_file.h> | |
20a0307c | 12 | #include <linux/hugetlb.h> |
dc90f084 | 13 | #include <linux/memremap.h> |
80ae2fdc | 14 | #include <linux/memcontrol.h> |
33c3fc71 VD |
15 | #include <linux/mmu_notifier.h> |
16 | #include <linux/page_idle.h> | |
1a9b5b7f | 17 | #include <linux/kernel-page-flags.h> |
7c0f6ba6 | 18 | #include <linux/uaccess.h> |
6d80e53f AD |
19 | #include "internal.h" |
20 | ||
21 | #define KPMSIZE sizeof(u64) | |
22 | #define KPMMASK (KPMSIZE - 1) | |
33c3fc71 | 23 | #define KPMBITS (KPMSIZE * BITS_PER_BYTE) |
ed7ce0f1 | 24 | |
abec749f DH |
25 | static inline unsigned long get_max_dump_pfn(void) |
26 | { | |
27 | #ifdef CONFIG_SPARSEMEM | |
28 | /* | |
29 | * The memmap of early sections is completely populated and marked | |
30 | * online even if max_pfn does not fall on a section boundary - | |
31 | * pfn_to_online_page() will succeed on all pages. Allow inspecting | |
32 | * these memmaps. | |
33 | */ | |
34 | return round_up(max_pfn, PAGES_PER_SECTION); | |
35 | #else | |
36 | return max_pfn; | |
37 | #endif | |
38 | } | |
39 | ||
6d80e53f AD |
40 | /* /proc/kpagecount - an array exposing page counts |
41 | * | |
42 | * Each entry is a u64 representing the corresponding | |
43 | * physical page count. | |
44 | */ | |
45 | static ssize_t kpagecount_read(struct file *file, char __user *buf, | |
46 | size_t count, loff_t *ppos) | |
47 | { | |
abec749f | 48 | const unsigned long max_dump_pfn = get_max_dump_pfn(); |
6d80e53f AD |
49 | u64 __user *out = (u64 __user *)buf; |
50 | struct page *ppage; | |
51 | unsigned long src = *ppos; | |
52 | unsigned long pfn; | |
53 | ssize_t ret = 0; | |
54 | u64 pcount; | |
55 | ||
56 | pfn = src / KPMSIZE; | |
6d80e53f AD |
57 | if (src & KPMMASK || count & KPMMASK) |
58 | return -EINVAL; | |
abec749f DH |
59 | if (src >= max_dump_pfn * KPMSIZE) |
60 | return 0; | |
61 | count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src); | |
6d80e53f AD |
62 | |
63 | while (count > 0) { | |
aad5f69b DH |
64 | /* |
65 | * TODO: ZONE_DEVICE support requires to identify | |
66 | * memmaps that were actually initialized. | |
67 | */ | |
68 | ppage = pfn_to_online_page(pfn); | |
69 | ||
144552ff | 70 | if (!ppage || PageSlab(ppage) || page_has_type(ppage)) |
6d80e53f AD |
71 | pcount = 0; |
72 | else | |
73 | pcount = page_mapcount(ppage); | |
74 | ||
ed7ce0f1 | 75 | if (put_user(pcount, out)) { |
6d80e53f AD |
76 | ret = -EFAULT; |
77 | break; | |
78 | } | |
79 | ||
ed7ce0f1 WF |
80 | pfn++; |
81 | out++; | |
6d80e53f | 82 | count -= KPMSIZE; |
d3691d2c VD |
83 | |
84 | cond_resched(); | |
6d80e53f AD |
85 | } |
86 | ||
87 | *ppos += (char __user *)out - buf; | |
88 | if (!ret) | |
89 | ret = (char __user *)out - buf; | |
90 | return ret; | |
91 | } | |
92 | ||
97a32539 | 93 | static const struct proc_ops kpagecount_proc_ops = { |
ef1d6178 | 94 | .proc_flags = PROC_ENTRY_PERMANENT, |
97a32539 AD |
95 | .proc_lseek = mem_lseek, |
96 | .proc_read = kpagecount_read, | |
6d80e53f AD |
97 | }; |
98 | ||
99 | /* /proc/kpageflags - an array exposing page flags | |
100 | * | |
101 | * Each entry is a u64 representing the corresponding | |
102 | * physical page flags. | |
103 | */ | |
104 | ||
17797549 WF |
105 | static inline u64 kpf_copy_bit(u64 kflags, int ubit, int kbit) |
106 | { | |
107 | return ((kflags >> kbit) & 1) << ubit; | |
108 | } | |
109 | ||
1a9b5b7f | 110 | u64 stable_page_flags(struct page *page) |
17797549 WF |
111 | { |
112 | u64 k; | |
113 | u64 u; | |
114 | ||
115 | /* | |
116 | * pseudo flag: KPF_NOPAGE | |
117 | * it differentiates a memory hole from a page with no flags | |
118 | */ | |
119 | if (!page) | |
120 | return 1 << KPF_NOPAGE; | |
121 | ||
122 | k = page->flags; | |
123 | u = 0; | |
124 | ||
125 | /* | |
126 | * pseudo flags for the well known (anonymous) memory mapped pages | |
127 | * | |
d88e2a2b | 128 | * Note that page->_mapcount is overloaded in SLAB, so the |
832fc1de | 129 | * simple test in page_mapped() is not enough. |
17797549 | 130 | */ |
832fc1de | 131 | if (!PageSlab(page) && page_mapped(page)) |
17797549 WF |
132 | u |= 1 << KPF_MMAP; |
133 | if (PageAnon(page)) | |
134 | u |= 1 << KPF_ANON; | |
9a840895 HD |
135 | if (PageKsm(page)) |
136 | u |= 1 << KPF_KSM; | |
17797549 WF |
137 | |
138 | /* | |
139 | * compound pages: export both head/tail info | |
140 | * they together define a compound page's start/end pos and order | |
141 | */ | |
142 | if (PageHead(page)) | |
143 | u |= 1 << KPF_COMPOUND_HEAD; | |
144 | if (PageTail(page)) | |
145 | u |= 1 << KPF_COMPOUND_TAIL; | |
146 | if (PageHuge(page)) | |
147 | u |= 1 << KPF_HUGE; | |
7a71932d NH |
148 | /* |
149 | * PageTransCompound can be true for non-huge compound pages (slab | |
150 | * pages or pages allocated by drivers with __GFP_COMP) because it | |
e3bba3c3 NH |
151 | * just checks PG_head/PG_tail, so we need to check PageLRU/PageAnon |
152 | * to make sure a given page is a thp, not a non-huge compound page. | |
7a71932d | 153 | */ |
56873f43 WY |
154 | else if (PageTransCompound(page)) { |
155 | struct page *head = compound_head(page); | |
156 | ||
157 | if (PageLRU(head) || PageAnon(head)) | |
158 | u |= 1 << KPF_THP; | |
159 | else if (is_huge_zero_page(head)) { | |
160 | u |= 1 << KPF_ZERO_PAGE; | |
161 | u |= 1 << KPF_THP; | |
162 | } | |
163 | } else if (is_zero_pfn(page_to_pfn(page))) | |
164 | u |= 1 << KPF_ZERO_PAGE; | |
165 | ||
17797549 | 166 | |
17797549 | 167 | /* |
d88e2a2b VB |
168 | * Caveats on high order pages: PG_buddy and PG_slab will only be set |
169 | * on the head page. | |
17797549 | 170 | */ |
5f24ce5f AA |
171 | if (PageBuddy(page)) |
172 | u |= 1 << KPF_BUDDY; | |
832fc1de NH |
173 | else if (page_count(page) == 0 && is_free_buddy_page(page)) |
174 | u |= 1 << KPF_BUDDY; | |
5f24ce5f | 175 | |
ca215086 DH |
176 | if (PageOffline(page)) |
177 | u |= 1 << KPF_OFFLINE; | |
1d40a5ea MW |
178 | if (PageTable(page)) |
179 | u |= 1 << KPF_PGTABLE; | |
09316c09 | 180 | |
f074a8f4 VD |
181 | if (page_is_idle(page)) |
182 | u |= 1 << KPF_IDLE; | |
183 | ||
5f24ce5f AA |
184 | u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked); |
185 | ||
17797549 | 186 | u |= kpf_copy_bit(k, KPF_SLAB, PG_slab); |
d88e2a2b | 187 | if (PageTail(page) && PageSlab(page)) |
0a71649c | 188 | u |= 1 << KPF_SLAB; |
17797549 WF |
189 | |
190 | u |= kpf_copy_bit(k, KPF_ERROR, PG_error); | |
191 | u |= kpf_copy_bit(k, KPF_DIRTY, PG_dirty); | |
192 | u |= kpf_copy_bit(k, KPF_UPTODATE, PG_uptodate); | |
193 | u |= kpf_copy_bit(k, KPF_WRITEBACK, PG_writeback); | |
194 | ||
195 | u |= kpf_copy_bit(k, KPF_LRU, PG_lru); | |
196 | u |= kpf_copy_bit(k, KPF_REFERENCED, PG_referenced); | |
197 | u |= kpf_copy_bit(k, KPF_ACTIVE, PG_active); | |
198 | u |= kpf_copy_bit(k, KPF_RECLAIM, PG_reclaim); | |
199 | ||
b6789123 HD |
200 | if (PageSwapCache(page)) |
201 | u |= 1 << KPF_SWAPCACHE; | |
17797549 WF |
202 | u |= kpf_copy_bit(k, KPF_SWAPBACKED, PG_swapbacked); |
203 | ||
17797549 WF |
204 | u |= kpf_copy_bit(k, KPF_UNEVICTABLE, PG_unevictable); |
205 | u |= kpf_copy_bit(k, KPF_MLOCKED, PG_mlocked); | |
17797549 | 206 | |
253fb02d WF |
207 | #ifdef CONFIG_MEMORY_FAILURE |
208 | u |= kpf_copy_bit(k, KPF_HWPOISON, PG_hwpoison); | |
209 | #endif | |
210 | ||
ed430fec | 211 | #ifdef CONFIG_ARCH_USES_PG_UNCACHED |
17797549 WF |
212 | u |= kpf_copy_bit(k, KPF_UNCACHED, PG_uncached); |
213 | #endif | |
214 | ||
215 | u |= kpf_copy_bit(k, KPF_RESERVED, PG_reserved); | |
216 | u |= kpf_copy_bit(k, KPF_MAPPEDTODISK, PG_mappedtodisk); | |
217 | u |= kpf_copy_bit(k, KPF_PRIVATE, PG_private); | |
218 | u |= kpf_copy_bit(k, KPF_PRIVATE_2, PG_private_2); | |
219 | u |= kpf_copy_bit(k, KPF_OWNER_PRIVATE, PG_owner_priv_1); | |
220 | u |= kpf_copy_bit(k, KPF_ARCH, PG_arch_1); | |
b0284cd2 | 221 | #ifdef CONFIG_ARCH_USES_PG_ARCH_X |
4beba948 | 222 | u |= kpf_copy_bit(k, KPF_ARCH_2, PG_arch_2); |
ef6458b1 | 223 | u |= kpf_copy_bit(k, KPF_ARCH_3, PG_arch_3); |
4beba948 | 224 | #endif |
17797549 WF |
225 | |
226 | return u; | |
227 | }; | |
6d80e53f AD |
228 | |
229 | static ssize_t kpageflags_read(struct file *file, char __user *buf, | |
230 | size_t count, loff_t *ppos) | |
231 | { | |
abec749f | 232 | const unsigned long max_dump_pfn = get_max_dump_pfn(); |
6d80e53f AD |
233 | u64 __user *out = (u64 __user *)buf; |
234 | struct page *ppage; | |
235 | unsigned long src = *ppos; | |
236 | unsigned long pfn; | |
237 | ssize_t ret = 0; | |
6d80e53f AD |
238 | |
239 | pfn = src / KPMSIZE; | |
6d80e53f AD |
240 | if (src & KPMMASK || count & KPMMASK) |
241 | return -EINVAL; | |
abec749f DH |
242 | if (src >= max_dump_pfn * KPMSIZE) |
243 | return 0; | |
244 | count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src); | |
6d80e53f AD |
245 | |
246 | while (count > 0) { | |
aad5f69b DH |
247 | /* |
248 | * TODO: ZONE_DEVICE support requires to identify | |
249 | * memmaps that were actually initialized. | |
250 | */ | |
251 | ppage = pfn_to_online_page(pfn); | |
17797549 | 252 | |
1a9b5b7f | 253 | if (put_user(stable_page_flags(ppage), out)) { |
6d80e53f AD |
254 | ret = -EFAULT; |
255 | break; | |
256 | } | |
257 | ||
ed7ce0f1 WF |
258 | pfn++; |
259 | out++; | |
6d80e53f | 260 | count -= KPMSIZE; |
d3691d2c VD |
261 | |
262 | cond_resched(); | |
6d80e53f AD |
263 | } |
264 | ||
265 | *ppos += (char __user *)out - buf; | |
266 | if (!ret) | |
267 | ret = (char __user *)out - buf; | |
268 | return ret; | |
269 | } | |
270 | ||
97a32539 | 271 | static const struct proc_ops kpageflags_proc_ops = { |
ef1d6178 | 272 | .proc_flags = PROC_ENTRY_PERMANENT, |
97a32539 AD |
273 | .proc_lseek = mem_lseek, |
274 | .proc_read = kpageflags_read, | |
6d80e53f AD |
275 | }; |
276 | ||
80ae2fdc VD |
277 | #ifdef CONFIG_MEMCG |
278 | static ssize_t kpagecgroup_read(struct file *file, char __user *buf, | |
279 | size_t count, loff_t *ppos) | |
280 | { | |
abec749f | 281 | const unsigned long max_dump_pfn = get_max_dump_pfn(); |
80ae2fdc VD |
282 | u64 __user *out = (u64 __user *)buf; |
283 | struct page *ppage; | |
284 | unsigned long src = *ppos; | |
285 | unsigned long pfn; | |
286 | ssize_t ret = 0; | |
287 | u64 ino; | |
288 | ||
289 | pfn = src / KPMSIZE; | |
80ae2fdc VD |
290 | if (src & KPMMASK || count & KPMMASK) |
291 | return -EINVAL; | |
abec749f DH |
292 | if (src >= max_dump_pfn * KPMSIZE) |
293 | return 0; | |
294 | count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src); | |
80ae2fdc VD |
295 | |
296 | while (count > 0) { | |
aad5f69b DH |
297 | /* |
298 | * TODO: ZONE_DEVICE support requires to identify | |
299 | * memmaps that were actually initialized. | |
300 | */ | |
301 | ppage = pfn_to_online_page(pfn); | |
80ae2fdc VD |
302 | |
303 | if (ppage) | |
304 | ino = page_cgroup_ino(ppage); | |
305 | else | |
306 | ino = 0; | |
307 | ||
308 | if (put_user(ino, out)) { | |
309 | ret = -EFAULT; | |
310 | break; | |
311 | } | |
312 | ||
313 | pfn++; | |
314 | out++; | |
315 | count -= KPMSIZE; | |
d3691d2c VD |
316 | |
317 | cond_resched(); | |
80ae2fdc VD |
318 | } |
319 | ||
320 | *ppos += (char __user *)out - buf; | |
321 | if (!ret) | |
322 | ret = (char __user *)out - buf; | |
323 | return ret; | |
324 | } | |
325 | ||
97a32539 | 326 | static const struct proc_ops kpagecgroup_proc_ops = { |
ef1d6178 | 327 | .proc_flags = PROC_ENTRY_PERMANENT, |
97a32539 AD |
328 | .proc_lseek = mem_lseek, |
329 | .proc_read = kpagecgroup_read, | |
80ae2fdc VD |
330 | }; |
331 | #endif /* CONFIG_MEMCG */ | |
332 | ||
6d80e53f AD |
333 | static int __init proc_page_init(void) |
334 | { | |
97a32539 AD |
335 | proc_create("kpagecount", S_IRUSR, NULL, &kpagecount_proc_ops); |
336 | proc_create("kpageflags", S_IRUSR, NULL, &kpageflags_proc_ops); | |
80ae2fdc | 337 | #ifdef CONFIG_MEMCG |
97a32539 | 338 | proc_create("kpagecgroup", S_IRUSR, NULL, &kpagecgroup_proc_ops); |
80ae2fdc | 339 | #endif |
6d80e53f AD |
340 | return 0; |
341 | } | |
abaf3787 | 342 | fs_initcall(proc_page_init); |