Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
57c8a661 | 2 | #include <linux/memblock.h> |
6d80e53f AD |
3 | #include <linux/compiler.h> |
4 | #include <linux/fs.h> | |
5 | #include <linux/init.h> | |
9a840895 | 6 | #include <linux/ksm.h> |
6d80e53f AD |
7 | #include <linux/mm.h> |
8 | #include <linux/mmzone.h> | |
56873f43 | 9 | #include <linux/huge_mm.h> |
6d80e53f AD |
10 | #include <linux/proc_fs.h> |
11 | #include <linux/seq_file.h> | |
20a0307c | 12 | #include <linux/hugetlb.h> |
80ae2fdc | 13 | #include <linux/memcontrol.h> |
33c3fc71 VD |
14 | #include <linux/mmu_notifier.h> |
15 | #include <linux/page_idle.h> | |
1a9b5b7f | 16 | #include <linux/kernel-page-flags.h> |
7c0f6ba6 | 17 | #include <linux/uaccess.h> |
6d80e53f AD |
18 | #include "internal.h" |
19 | ||
20 | #define KPMSIZE sizeof(u64) | |
21 | #define KPMMASK (KPMSIZE - 1) | |
33c3fc71 | 22 | #define KPMBITS (KPMSIZE * BITS_PER_BYTE) |
ed7ce0f1 | 23 | |
abec749f DH |
24 | static inline unsigned long get_max_dump_pfn(void) |
25 | { | |
26 | #ifdef CONFIG_SPARSEMEM | |
27 | /* | |
28 | * The memmap of early sections is completely populated and marked | |
29 | * online even if max_pfn does not fall on a section boundary - | |
30 | * pfn_to_online_page() will succeed on all pages. Allow inspecting | |
31 | * these memmaps. | |
32 | */ | |
33 | return round_up(max_pfn, PAGES_PER_SECTION); | |
34 | #else | |
35 | return max_pfn; | |
36 | #endif | |
37 | } | |
38 | ||
6d80e53f AD |
39 | /* /proc/kpagecount - an array exposing page counts |
40 | * | |
41 | * Each entry is a u64 representing the corresponding | |
42 | * physical page count. | |
43 | */ | |
44 | static ssize_t kpagecount_read(struct file *file, char __user *buf, | |
45 | size_t count, loff_t *ppos) | |
46 | { | |
abec749f | 47 | const unsigned long max_dump_pfn = get_max_dump_pfn(); |
6d80e53f AD |
48 | u64 __user *out = (u64 __user *)buf; |
49 | struct page *ppage; | |
50 | unsigned long src = *ppos; | |
51 | unsigned long pfn; | |
52 | ssize_t ret = 0; | |
53 | u64 pcount; | |
54 | ||
55 | pfn = src / KPMSIZE; | |
6d80e53f AD |
56 | if (src & KPMMASK || count & KPMMASK) |
57 | return -EINVAL; | |
abec749f DH |
58 | if (src >= max_dump_pfn * KPMSIZE) |
59 | return 0; | |
60 | count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src); | |
6d80e53f AD |
61 | |
62 | while (count > 0) { | |
aad5f69b DH |
63 | /* |
64 | * TODO: ZONE_DEVICE support requires to identify | |
65 | * memmaps that were actually initialized. | |
66 | */ | |
67 | ppage = pfn_to_online_page(pfn); | |
68 | ||
144552ff | 69 | if (!ppage || PageSlab(ppage) || page_has_type(ppage)) |
6d80e53f AD |
70 | pcount = 0; |
71 | else | |
72 | pcount = page_mapcount(ppage); | |
73 | ||
ed7ce0f1 | 74 | if (put_user(pcount, out)) { |
6d80e53f AD |
75 | ret = -EFAULT; |
76 | break; | |
77 | } | |
78 | ||
ed7ce0f1 WF |
79 | pfn++; |
80 | out++; | |
6d80e53f | 81 | count -= KPMSIZE; |
d3691d2c VD |
82 | |
83 | cond_resched(); | |
6d80e53f AD |
84 | } |
85 | ||
86 | *ppos += (char __user *)out - buf; | |
87 | if (!ret) | |
88 | ret = (char __user *)out - buf; | |
89 | return ret; | |
90 | } | |
91 | ||
97a32539 AD |
92 | static const struct proc_ops kpagecount_proc_ops = { |
93 | .proc_lseek = mem_lseek, | |
94 | .proc_read = kpagecount_read, | |
6d80e53f AD |
95 | }; |
96 | ||
97 | /* /proc/kpageflags - an array exposing page flags | |
98 | * | |
99 | * Each entry is a u64 representing the corresponding | |
100 | * physical page flags. | |
101 | */ | |
102 | ||
17797549 WF |
103 | static inline u64 kpf_copy_bit(u64 kflags, int ubit, int kbit) |
104 | { | |
105 | return ((kflags >> kbit) & 1) << ubit; | |
106 | } | |
107 | ||
1a9b5b7f | 108 | u64 stable_page_flags(struct page *page) |
17797549 WF |
109 | { |
110 | u64 k; | |
111 | u64 u; | |
112 | ||
113 | /* | |
114 | * pseudo flag: KPF_NOPAGE | |
115 | * it differentiates a memory hole from a page with no flags | |
116 | */ | |
117 | if (!page) | |
118 | return 1 << KPF_NOPAGE; | |
119 | ||
120 | k = page->flags; | |
121 | u = 0; | |
122 | ||
123 | /* | |
124 | * pseudo flags for the well known (anonymous) memory mapped pages | |
125 | * | |
126 | * Note that page->_mapcount is overloaded in SLOB/SLUB/SLQB, so the | |
832fc1de | 127 | * simple test in page_mapped() is not enough. |
17797549 | 128 | */ |
832fc1de | 129 | if (!PageSlab(page) && page_mapped(page)) |
17797549 WF |
130 | u |= 1 << KPF_MMAP; |
131 | if (PageAnon(page)) | |
132 | u |= 1 << KPF_ANON; | |
9a840895 HD |
133 | if (PageKsm(page)) |
134 | u |= 1 << KPF_KSM; | |
17797549 WF |
135 | |
136 | /* | |
137 | * compound pages: export both head/tail info | |
138 | * they together define a compound page's start/end pos and order | |
139 | */ | |
140 | if (PageHead(page)) | |
141 | u |= 1 << KPF_COMPOUND_HEAD; | |
142 | if (PageTail(page)) | |
143 | u |= 1 << KPF_COMPOUND_TAIL; | |
144 | if (PageHuge(page)) | |
145 | u |= 1 << KPF_HUGE; | |
7a71932d NH |
146 | /* |
147 | * PageTransCompound can be true for non-huge compound pages (slab | |
148 | * pages or pages allocated by drivers with __GFP_COMP) because it | |
e3bba3c3 NH |
149 | * just checks PG_head/PG_tail, so we need to check PageLRU/PageAnon |
150 | * to make sure a given page is a thp, not a non-huge compound page. | |
7a71932d | 151 | */ |
56873f43 WY |
152 | else if (PageTransCompound(page)) { |
153 | struct page *head = compound_head(page); | |
154 | ||
155 | if (PageLRU(head) || PageAnon(head)) | |
156 | u |= 1 << KPF_THP; | |
157 | else if (is_huge_zero_page(head)) { | |
158 | u |= 1 << KPF_ZERO_PAGE; | |
159 | u |= 1 << KPF_THP; | |
160 | } | |
161 | } else if (is_zero_pfn(page_to_pfn(page))) | |
162 | u |= 1 << KPF_ZERO_PAGE; | |
163 | ||
17797549 | 164 | |
17797549 | 165 | /* |
0139aa7b | 166 | * Caveats on high order pages: page->_refcount will only be set |
5f24ce5f AA |
167 | * -1 on the head page; SLUB/SLQB do the same for PG_slab; |
168 | * SLOB won't set PG_slab at all on compound pages. | |
17797549 | 169 | */ |
5f24ce5f AA |
170 | if (PageBuddy(page)) |
171 | u |= 1 << KPF_BUDDY; | |
832fc1de NH |
172 | else if (page_count(page) == 0 && is_free_buddy_page(page)) |
173 | u |= 1 << KPF_BUDDY; | |
5f24ce5f | 174 | |
ca215086 DH |
175 | if (PageOffline(page)) |
176 | u |= 1 << KPF_OFFLINE; | |
1d40a5ea MW |
177 | if (PageTable(page)) |
178 | u |= 1 << KPF_PGTABLE; | |
09316c09 | 179 | |
f074a8f4 VD |
180 | if (page_is_idle(page)) |
181 | u |= 1 << KPF_IDLE; | |
182 | ||
5f24ce5f AA |
183 | u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked); |
184 | ||
17797549 | 185 | u |= kpf_copy_bit(k, KPF_SLAB, PG_slab); |
0a71649c NH |
186 | if (PageTail(page) && PageSlab(compound_head(page))) |
187 | u |= 1 << KPF_SLAB; | |
17797549 WF |
188 | |
189 | u |= kpf_copy_bit(k, KPF_ERROR, PG_error); | |
190 | u |= kpf_copy_bit(k, KPF_DIRTY, PG_dirty); | |
191 | u |= kpf_copy_bit(k, KPF_UPTODATE, PG_uptodate); | |
192 | u |= kpf_copy_bit(k, KPF_WRITEBACK, PG_writeback); | |
193 | ||
194 | u |= kpf_copy_bit(k, KPF_LRU, PG_lru); | |
195 | u |= kpf_copy_bit(k, KPF_REFERENCED, PG_referenced); | |
196 | u |= kpf_copy_bit(k, KPF_ACTIVE, PG_active); | |
197 | u |= kpf_copy_bit(k, KPF_RECLAIM, PG_reclaim); | |
198 | ||
b6789123 HD |
199 | if (PageSwapCache(page)) |
200 | u |= 1 << KPF_SWAPCACHE; | |
17797549 WF |
201 | u |= kpf_copy_bit(k, KPF_SWAPBACKED, PG_swapbacked); |
202 | ||
17797549 WF |
203 | u |= kpf_copy_bit(k, KPF_UNEVICTABLE, PG_unevictable); |
204 | u |= kpf_copy_bit(k, KPF_MLOCKED, PG_mlocked); | |
17797549 | 205 | |
253fb02d WF |
206 | #ifdef CONFIG_MEMORY_FAILURE |
207 | u |= kpf_copy_bit(k, KPF_HWPOISON, PG_hwpoison); | |
208 | #endif | |
209 | ||
ed430fec | 210 | #ifdef CONFIG_ARCH_USES_PG_UNCACHED |
17797549 WF |
211 | u |= kpf_copy_bit(k, KPF_UNCACHED, PG_uncached); |
212 | #endif | |
213 | ||
214 | u |= kpf_copy_bit(k, KPF_RESERVED, PG_reserved); | |
215 | u |= kpf_copy_bit(k, KPF_MAPPEDTODISK, PG_mappedtodisk); | |
216 | u |= kpf_copy_bit(k, KPF_PRIVATE, PG_private); | |
217 | u |= kpf_copy_bit(k, KPF_PRIVATE_2, PG_private_2); | |
218 | u |= kpf_copy_bit(k, KPF_OWNER_PRIVATE, PG_owner_priv_1); | |
219 | u |= kpf_copy_bit(k, KPF_ARCH, PG_arch_1); | |
4beba948 SP |
220 | #ifdef CONFIG_64BIT |
221 | u |= kpf_copy_bit(k, KPF_ARCH_2, PG_arch_2); | |
222 | #endif | |
17797549 WF |
223 | |
224 | return u; | |
225 | }; | |
6d80e53f AD |
226 | |
227 | static ssize_t kpageflags_read(struct file *file, char __user *buf, | |
228 | size_t count, loff_t *ppos) | |
229 | { | |
abec749f | 230 | const unsigned long max_dump_pfn = get_max_dump_pfn(); |
6d80e53f AD |
231 | u64 __user *out = (u64 __user *)buf; |
232 | struct page *ppage; | |
233 | unsigned long src = *ppos; | |
234 | unsigned long pfn; | |
235 | ssize_t ret = 0; | |
6d80e53f AD |
236 | |
237 | pfn = src / KPMSIZE; | |
6d80e53f AD |
238 | if (src & KPMMASK || count & KPMMASK) |
239 | return -EINVAL; | |
abec749f DH |
240 | if (src >= max_dump_pfn * KPMSIZE) |
241 | return 0; | |
242 | count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src); | |
6d80e53f AD |
243 | |
244 | while (count > 0) { | |
aad5f69b DH |
245 | /* |
246 | * TODO: ZONE_DEVICE support requires to identify | |
247 | * memmaps that were actually initialized. | |
248 | */ | |
249 | ppage = pfn_to_online_page(pfn); | |
17797549 | 250 | |
1a9b5b7f | 251 | if (put_user(stable_page_flags(ppage), out)) { |
6d80e53f AD |
252 | ret = -EFAULT; |
253 | break; | |
254 | } | |
255 | ||
ed7ce0f1 WF |
256 | pfn++; |
257 | out++; | |
6d80e53f | 258 | count -= KPMSIZE; |
d3691d2c VD |
259 | |
260 | cond_resched(); | |
6d80e53f AD |
261 | } |
262 | ||
263 | *ppos += (char __user *)out - buf; | |
264 | if (!ret) | |
265 | ret = (char __user *)out - buf; | |
266 | return ret; | |
267 | } | |
268 | ||
97a32539 AD |
269 | static const struct proc_ops kpageflags_proc_ops = { |
270 | .proc_lseek = mem_lseek, | |
271 | .proc_read = kpageflags_read, | |
6d80e53f AD |
272 | }; |
273 | ||
80ae2fdc VD |
274 | #ifdef CONFIG_MEMCG |
275 | static ssize_t kpagecgroup_read(struct file *file, char __user *buf, | |
276 | size_t count, loff_t *ppos) | |
277 | { | |
abec749f | 278 | const unsigned long max_dump_pfn = get_max_dump_pfn(); |
80ae2fdc VD |
279 | u64 __user *out = (u64 __user *)buf; |
280 | struct page *ppage; | |
281 | unsigned long src = *ppos; | |
282 | unsigned long pfn; | |
283 | ssize_t ret = 0; | |
284 | u64 ino; | |
285 | ||
286 | pfn = src / KPMSIZE; | |
80ae2fdc VD |
287 | if (src & KPMMASK || count & KPMMASK) |
288 | return -EINVAL; | |
abec749f DH |
289 | if (src >= max_dump_pfn * KPMSIZE) |
290 | return 0; | |
291 | count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src); | |
80ae2fdc VD |
292 | |
293 | while (count > 0) { | |
aad5f69b DH |
294 | /* |
295 | * TODO: ZONE_DEVICE support requires to identify | |
296 | * memmaps that were actually initialized. | |
297 | */ | |
298 | ppage = pfn_to_online_page(pfn); | |
80ae2fdc VD |
299 | |
300 | if (ppage) | |
301 | ino = page_cgroup_ino(ppage); | |
302 | else | |
303 | ino = 0; | |
304 | ||
305 | if (put_user(ino, out)) { | |
306 | ret = -EFAULT; | |
307 | break; | |
308 | } | |
309 | ||
310 | pfn++; | |
311 | out++; | |
312 | count -= KPMSIZE; | |
d3691d2c VD |
313 | |
314 | cond_resched(); | |
80ae2fdc VD |
315 | } |
316 | ||
317 | *ppos += (char __user *)out - buf; | |
318 | if (!ret) | |
319 | ret = (char __user *)out - buf; | |
320 | return ret; | |
321 | } | |
322 | ||
97a32539 AD |
323 | static const struct proc_ops kpagecgroup_proc_ops = { |
324 | .proc_lseek = mem_lseek, | |
325 | .proc_read = kpagecgroup_read, | |
80ae2fdc VD |
326 | }; |
327 | #endif /* CONFIG_MEMCG */ | |
328 | ||
6d80e53f AD |
329 | static int __init proc_page_init(void) |
330 | { | |
97a32539 AD |
331 | proc_create("kpagecount", S_IRUSR, NULL, &kpagecount_proc_ops); |
332 | proc_create("kpageflags", S_IRUSR, NULL, &kpageflags_proc_ops); | |
80ae2fdc | 333 | #ifdef CONFIG_MEMCG |
97a32539 | 334 | proc_create("kpagecgroup", S_IRUSR, NULL, &kpagecgroup_proc_ops); |
80ae2fdc | 335 | #endif |
6d80e53f AD |
336 | return 0; |
337 | } | |
abaf3787 | 338 | fs_initcall(proc_page_init); |