Merge patch series "riscv: Introduce KASLR"
[linux-2.6-block.git] / mm / page_owner.c
CommitLineData
b2441318 1// SPDX-License-Identifier: GPL-2.0
48c96a36
JK
2#include <linux/debugfs.h>
3#include <linux/mm.h>
4#include <linux/slab.h>
5#include <linux/uaccess.h>
57c8a661 6#include <linux/memblock.h>
48c96a36
JK
7#include <linux/stacktrace.h>
8#include <linux/page_owner.h>
7dd80b8a 9#include <linux/jump_label.h>
7cd12b4a 10#include <linux/migrate.h>
f2ca0b55 11#include <linux/stackdepot.h>
e2f612e6 12#include <linux/seq_file.h>
fcf89358 13#include <linux/memcontrol.h>
9cc7e96a 14#include <linux/sched/clock.h>
f2ca0b55 15
48c96a36
JK
16#include "internal.h"
17
f2ca0b55
JK
18/*
19 * TODO: teach PAGE_OWNER_STACK_DEPTH (__dump_page_owner and save_stack)
20 * to use off stack temporal storage
21 */
22#define PAGE_OWNER_STACK_DEPTH (16)
23
9300d8df 24struct page_owner {
6b4c54e3
AM
25 unsigned short order;
26 short last_migrate_reason;
9300d8df 27 gfp_t gfp_mask;
9300d8df 28 depot_stack_handle_t handle;
8974558f 29 depot_stack_handle_t free_handle;
9cc7e96a 30 u64 ts_nsec;
866b4852 31 u64 free_ts_nsec;
865ed6a3 32 char comm[TASK_COMM_LEN];
9cc7e96a 33 pid_t pid;
bf215eab 34 pid_t tgid;
9300d8df
JK
35};
36
3645b5ec 37static bool page_owner_enabled __initdata;
7dd80b8a 38DEFINE_STATIC_KEY_FALSE(page_owner_inited);
48c96a36 39
f2ca0b55
JK
40static depot_stack_handle_t dummy_handle;
41static depot_stack_handle_t failure_handle;
dab4ead1 42static depot_stack_handle_t early_handle;
f2ca0b55 43
61cf5feb
JK
44static void init_early_allocated_pages(void);
45
1173194e 46static int __init early_page_owner_param(char *buf)
48c96a36 47{
a5f1783b
VB
48 int ret = kstrtobool(buf, &page_owner_enabled);
49
50 if (page_owner_enabled)
1c0310ad 51 stack_depot_request_early_init();
a5f1783b
VB
52
53 return ret;
48c96a36
JK
54}
55early_param("page_owner", early_page_owner_param);
56
cab0a7c1 57static __init bool need_page_owner(void)
48c96a36 58{
0fe9a448 59 return page_owner_enabled;
48c96a36
JK
60}
61
dab4ead1 62static __always_inline depot_stack_handle_t create_dummy_stack(void)
f2ca0b55
JK
63{
64 unsigned long entries[4];
af52bf6b 65 unsigned int nr_entries;
f2ca0b55 66
af52bf6b
TG
67 nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0);
68 return stack_depot_save(entries, nr_entries, GFP_KERNEL);
f2ca0b55
JK
69}
70
dab4ead1 71static noinline void register_dummy_stack(void)
f2ca0b55 72{
dab4ead1
VB
73 dummy_handle = create_dummy_stack();
74}
f2ca0b55 75
dab4ead1
VB
76static noinline void register_failure_stack(void)
77{
78 failure_handle = create_dummy_stack();
79}
f2ca0b55 80
dab4ead1
VB
81static noinline void register_early_stack(void)
82{
83 early_handle = create_dummy_stack();
f2ca0b55
JK
84}
85
cab0a7c1 86static __init void init_page_owner(void)
48c96a36 87{
0fe9a448 88 if (!page_owner_enabled)
48c96a36
JK
89 return;
90
f2ca0b55
JK
91 register_dummy_stack();
92 register_failure_stack();
dab4ead1 93 register_early_stack();
7dd80b8a 94 static_branch_enable(&page_owner_inited);
61cf5feb 95 init_early_allocated_pages();
48c96a36
JK
96}
97
98struct page_ext_operations page_owner_ops = {
9300d8df 99 .size = sizeof(struct page_owner),
48c96a36
JK
100 .need = need_page_owner,
101 .init = init_page_owner,
6189eb82 102 .need_shared_flags = true,
48c96a36
JK
103};
104
9300d8df
JK
105static inline struct page_owner *get_page_owner(struct page_ext *page_ext)
106{
1cac4c07 107 return page_ext_data(page_ext, &page_owner_ops);
9300d8df
JK
108}
109
f2ca0b55
JK
110static noinline depot_stack_handle_t save_stack(gfp_t flags)
111{
112 unsigned long entries[PAGE_OWNER_STACK_DEPTH];
f2ca0b55 113 depot_stack_handle_t handle;
af52bf6b 114 unsigned int nr_entries;
f2ca0b55 115
f2ca0b55 116 /*
8e9b16c4
ST
117 * Avoid recursion.
118 *
119 * Sometimes page metadata allocation tracking requires more
120 * memory to be allocated:
121 * - when new stack trace is saved to stack depot
122 * - when backtrace itself is calculated (ia64)
f2ca0b55 123 */
8e9b16c4 124 if (current->in_page_owner)
f2ca0b55 125 return dummy_handle;
8e9b16c4 126 current->in_page_owner = 1;
f2ca0b55 127
8e9b16c4 128 nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2);
af52bf6b 129 handle = stack_depot_save(entries, nr_entries, flags);
f2ca0b55
JK
130 if (!handle)
131 handle = failure_handle;
132
8e9b16c4 133 current->in_page_owner = 0;
f2ca0b55
JK
134 return handle;
135}
136
0093de69 137void __reset_page_owner(struct page *page, unsigned short order)
8974558f
VB
138{
139 int i;
140 struct page_ext *page_ext;
fab765c2 141 depot_stack_handle_t handle;
8974558f 142 struct page_owner *page_owner;
866b4852 143 u64 free_ts_nsec = local_clock();
8974558f 144
b1d5488a 145 page_ext = page_ext_get(page);
5556cfe8
VB
146 if (unlikely(!page_ext))
147 return;
fab765c2
ST
148
149 handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
8974558f 150 for (i = 0; i < (1 << order); i++) {
fdf3bf80 151 __clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
0fe9a448
VB
152 page_owner = get_page_owner(page_ext);
153 page_owner->free_handle = handle;
866b4852 154 page_owner->free_ts_nsec = free_ts_nsec;
5556cfe8 155 page_ext = page_ext_next(page_ext);
8974558f 156 }
b1d5488a 157 page_ext_put(page_ext);
8974558f
VB
158}
159
64ea78d2 160static inline void __set_page_owner_handle(struct page_ext *page_ext,
161 depot_stack_handle_t handle,
0093de69 162 unsigned short order, gfp_t gfp_mask)
f2ca0b55 163{
9300d8df 164 struct page_owner *page_owner;
7e2f2a0c 165 int i;
05a42199 166 u64 ts_nsec = local_clock();
48c96a36 167
7e2f2a0c
VB
168 for (i = 0; i < (1 << order); i++) {
169 page_owner = get_page_owner(page_ext);
170 page_owner->handle = handle;
171 page_owner->order = order;
172 page_owner->gfp_mask = gfp_mask;
173 page_owner->last_migrate_reason = -1;
9cc7e96a 174 page_owner->pid = current->pid;
bf215eab 175 page_owner->tgid = current->tgid;
05a42199 176 page_owner->ts_nsec = ts_nsec;
cd8c1fd8 177 strscpy(page_owner->comm, current->comm,
865ed6a3 178 sizeof(page_owner->comm));
7e2f2a0c 179 __set_bit(PAGE_EXT_OWNER, &page_ext->flags);
fdf3bf80 180 __set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
48c96a36 181
5556cfe8 182 page_ext = page_ext_next(page_ext);
7e2f2a0c 183 }
48c96a36
JK
184}
185
0093de69 186noinline void __set_page_owner(struct page *page, unsigned short order,
dab4ead1
VB
187 gfp_t gfp_mask)
188{
b1d5488a 189 struct page_ext *page_ext;
dab4ead1
VB
190 depot_stack_handle_t handle;
191
b1d5488a
CTK
192 handle = save_stack(gfp_mask);
193
194 page_ext = page_ext_get(page);
dab4ead1
VB
195 if (unlikely(!page_ext))
196 return;
64ea78d2 197 __set_page_owner_handle(page_ext, handle, order, gfp_mask);
b1d5488a 198 page_ext_put(page_ext);
dab4ead1
VB
199}
200
7cd12b4a
VB
201void __set_page_owner_migrate_reason(struct page *page, int reason)
202{
b1d5488a 203 struct page_ext *page_ext = page_ext_get(page);
9300d8df
JK
204 struct page_owner *page_owner;
205
f86e4271
YS
206 if (unlikely(!page_ext))
207 return;
7cd12b4a 208
9300d8df
JK
209 page_owner = get_page_owner(page_ext);
210 page_owner->last_migrate_reason = reason;
b1d5488a 211 page_ext_put(page_ext);
7cd12b4a
VB
212}
213
8fb156c9 214void __split_page_owner(struct page *page, unsigned int nr)
e2cfc911 215{
a9627bc5 216 int i;
b1d5488a 217 struct page_ext *page_ext = page_ext_get(page);
9300d8df 218 struct page_owner *page_owner;
a9627bc5 219
f86e4271 220 if (unlikely(!page_ext))
a9627bc5 221 return;
e2cfc911 222
8fb156c9 223 for (i = 0; i < nr; i++) {
7e2f2a0c
VB
224 page_owner = get_page_owner(page_ext);
225 page_owner->order = 0;
5556cfe8 226 page_ext = page_ext_next(page_ext);
7e2f2a0c 227 }
b1d5488a 228 page_ext_put(page_ext);
e2cfc911
JK
229}
230
19138349 231void __folio_copy_owner(struct folio *newfolio, struct folio *old)
d435edca 232{
b1d5488a
CTK
233 struct page_ext *old_ext;
234 struct page_ext *new_ext;
9300d8df 235 struct page_owner *old_page_owner, *new_page_owner;
d435edca 236
b1d5488a
CTK
237 old_ext = page_ext_get(&old->page);
238 if (unlikely(!old_ext))
f86e4271
YS
239 return;
240
b1d5488a
CTK
241 new_ext = page_ext_get(&newfolio->page);
242 if (unlikely(!new_ext)) {
243 page_ext_put(old_ext);
244 return;
245 }
246
9300d8df
JK
247 old_page_owner = get_page_owner(old_ext);
248 new_page_owner = get_page_owner(new_ext);
249 new_page_owner->order = old_page_owner->order;
250 new_page_owner->gfp_mask = old_page_owner->gfp_mask;
251 new_page_owner->last_migrate_reason =
252 old_page_owner->last_migrate_reason;
253 new_page_owner->handle = old_page_owner->handle;
9cc7e96a 254 new_page_owner->pid = old_page_owner->pid;
bf215eab 255 new_page_owner->tgid = old_page_owner->tgid;
9cc7e96a 256 new_page_owner->ts_nsec = old_page_owner->ts_nsec;
866b4852 257 new_page_owner->free_ts_nsec = old_page_owner->ts_nsec;
865ed6a3 258 strcpy(new_page_owner->comm, old_page_owner->comm);
d435edca
VB
259
260 /*
19138349 261 * We don't clear the bit on the old folio as it's going to be freed
d435edca 262 * after migration. Until then, the info can be useful in case of
f0953a1b 263 * a bug, and the overall stats will be off a bit only temporarily.
d435edca 264 * Also, migrate_misplaced_transhuge_page() can still fail the
19138349 265 * migration and then we want the old folio to retain the info. But
d435edca
VB
266 * in that case we also don't need to explicitly clear the info from
267 * the new page, which will be freed.
268 */
269 __set_bit(PAGE_EXT_OWNER, &new_ext->flags);
fdf3bf80 270 __set_bit(PAGE_EXT_OWNER_ALLOCATED, &new_ext->flags);
b1d5488a
CTK
271 page_ext_put(new_ext);
272 page_ext_put(old_ext);
d435edca
VB
273}
274
e2f612e6
JK
275void pagetypeinfo_showmixedcount_print(struct seq_file *m,
276 pg_data_t *pgdat, struct zone *zone)
277{
278 struct page *page;
279 struct page_ext *page_ext;
9300d8df 280 struct page_owner *page_owner;
1d2cae8e
ML
281 unsigned long pfn, block_end_pfn;
282 unsigned long end_pfn = zone_end_pfn(zone);
e2f612e6
JK
283 unsigned long count[MIGRATE_TYPES] = { 0, };
284 int pageblock_mt, page_mt;
285 int i;
286
287 /* Scan block by block. First and last block may be incomplete */
288 pfn = zone->zone_start_pfn;
289
290 /*
291 * Walk the zone in pageblock_nr_pages steps. If a page block spans
292 * a zone boundary, it will be double counted between zones. This does
293 * not matter as the mixed block count will still be correct
294 */
295 for (; pfn < end_pfn; ) {
a26ee565
QC
296 page = pfn_to_online_page(pfn);
297 if (!page) {
e2f612e6
JK
298 pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
299 continue;
300 }
301
4f9bc69a 302 block_end_pfn = pageblock_end_pfn(pfn);
e2f612e6
JK
303 block_end_pfn = min(block_end_pfn, end_pfn);
304
e2f612e6
JK
305 pageblock_mt = get_pageblock_migratetype(page);
306
307 for (; pfn < block_end_pfn; pfn++) {
a26ee565 308 /* The pageblock is online, no need to recheck. */
e2f612e6
JK
309 page = pfn_to_page(pfn);
310
311 if (page_zone(page) != zone)
312 continue;
313
314 if (PageBuddy(page)) {
727c080f
VM
315 unsigned long freepage_order;
316
ab130f91 317 freepage_order = buddy_order_unsafe(page);
23baf831 318 if (freepage_order <= MAX_ORDER)
727c080f 319 pfn += (1UL << freepage_order) - 1;
e2f612e6
JK
320 continue;
321 }
322
323 if (PageReserved(page))
324 continue;
325
b1d5488a 326 page_ext = page_ext_get(page);
e2f612e6
JK
327 if (unlikely(!page_ext))
328 continue;
329
fdf3bf80 330 if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
b1d5488a 331 goto ext_put_continue;
e2f612e6 332
9300d8df 333 page_owner = get_page_owner(page_ext);
01c0bfe0 334 page_mt = gfp_migratetype(page_owner->gfp_mask);
e2f612e6
JK
335 if (pageblock_mt != page_mt) {
336 if (is_migrate_cma(pageblock_mt))
337 count[MIGRATE_MOVABLE]++;
338 else
339 count[pageblock_mt]++;
340
341 pfn = block_end_pfn;
b1d5488a 342 page_ext_put(page_ext);
e2f612e6
JK
343 break;
344 }
9300d8df 345 pfn += (1UL << page_owner->order) - 1;
b1d5488a
CTK
346ext_put_continue:
347 page_ext_put(page_ext);
e2f612e6
JK
348 }
349 }
350
351 /* Print counts */
352 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
353 for (i = 0; i < MIGRATE_TYPES; i++)
354 seq_printf(m, "%12lu ", count[i]);
355 seq_putc(m, '\n');
356}
357
fcf89358
WL
358/*
359 * Looking for memcg information and print it out
360 */
361static inline int print_page_owner_memcg(char *kbuf, size_t count, int ret,
362 struct page *page)
363{
364#ifdef CONFIG_MEMCG
365 unsigned long memcg_data;
366 struct mem_cgroup *memcg;
367 bool online;
368 char name[80];
369
370 rcu_read_lock();
371 memcg_data = READ_ONCE(page->memcg_data);
372 if (!memcg_data)
373 goto out_unlock;
374
375 if (memcg_data & MEMCG_DATA_OBJCGS)
376 ret += scnprintf(kbuf + ret, count - ret,
377 "Slab cache page\n");
378
379 memcg = page_memcg_check(page);
380 if (!memcg)
381 goto out_unlock;
382
383 online = (memcg->css.flags & CSS_ONLINE);
384 cgroup_name(memcg->css.cgroup, name, sizeof(name));
385 ret += scnprintf(kbuf + ret, count - ret,
386 "Charged %sto %smemcg %s\n",
387 PageMemcgKmem(page) ? "(via objcg) " : "",
388 online ? "" : "offline ",
389 name);
390out_unlock:
391 rcu_read_unlock();
392#endif /* CONFIG_MEMCG */
393
394 return ret;
395}
396
48c96a36
JK
397static ssize_t
398print_page_owner(char __user *buf, size_t count, unsigned long pfn,
9300d8df 399 struct page *page, struct page_owner *page_owner,
f2ca0b55 400 depot_stack_handle_t handle)
48c96a36 401{
af52bf6b 402 int ret, pageblock_mt, page_mt;
48c96a36
JK
403 char *kbuf;
404
c8f61cfc 405 count = min_t(size_t, count, PAGE_SIZE);
48c96a36
JK
406 kbuf = kmalloc(count, GFP_KERNEL);
407 if (!kbuf)
408 return -ENOMEM;
409
3ebc4397 410 ret = scnprintf(kbuf, count,
bf215eab 411 "Page allocated via order %u, mask %#x(%pGg), pid %d, tgid %d (%s), ts %llu ns, free_ts %llu ns\n",
9300d8df 412 page_owner->order, page_owner->gfp_mask,
9cc7e96a 413 &page_owner->gfp_mask, page_owner->pid,
bf215eab
YC
414 page_owner->tgid, page_owner->comm,
415 page_owner->ts_nsec, page_owner->free_ts_nsec);
48c96a36 416
48c96a36 417 /* Print information relevant to grouping pages by mobility */
0b423ca2 418 pageblock_mt = get_pageblock_migratetype(page);
01c0bfe0 419 page_mt = gfp_migratetype(page_owner->gfp_mask);
3ebc4397 420 ret += scnprintf(kbuf + ret, count - ret,
399fd496 421 "PFN 0x%lx type %s Block %lu type %s Flags %pGp\n",
48c96a36 422 pfn,
60f30350 423 migratetype_names[page_mt],
48c96a36 424 pfn >> pageblock_order,
60f30350 425 migratetype_names[pageblock_mt],
23efd080 426 &page->flags);
48c96a36 427
0f68d45e 428 ret += stack_depot_snprint(handle, kbuf + ret, count - ret, 0);
48c96a36
JK
429 if (ret >= count)
430 goto err;
431
9300d8df 432 if (page_owner->last_migrate_reason != -1) {
3ebc4397 433 ret += scnprintf(kbuf + ret, count - ret,
7cd12b4a 434 "Page has been migrated, last migrate reason: %s\n",
9300d8df 435 migrate_reason_names[page_owner->last_migrate_reason]);
7cd12b4a
VB
436 }
437
fcf89358
WL
438 ret = print_page_owner_memcg(kbuf, count, ret, page);
439
48c96a36
JK
440 ret += snprintf(kbuf + ret, count - ret, "\n");
441 if (ret >= count)
442 goto err;
443
444 if (copy_to_user(buf, kbuf, ret))
445 ret = -EFAULT;
446
447 kfree(kbuf);
448 return ret;
449
450err:
451 kfree(kbuf);
452 return -ENOMEM;
453}
454
8bf6f451 455void __dump_page_owner(const struct page *page)
4e462112 456{
b1d5488a 457 struct page_ext *page_ext = page_ext_get((void *)page);
9300d8df 458 struct page_owner *page_owner;
f2ca0b55 459 depot_stack_handle_t handle;
8285027f
SM
460 gfp_t gfp_mask;
461 int mt;
4e462112 462
f86e4271
YS
463 if (unlikely(!page_ext)) {
464 pr_alert("There is not page extension available.\n");
465 return;
466 }
9300d8df
JK
467
468 page_owner = get_page_owner(page_ext);
469 gfp_mask = page_owner->gfp_mask;
01c0bfe0 470 mt = gfp_migratetype(gfp_mask);
f86e4271 471
4e462112 472 if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) {
37389167 473 pr_alert("page_owner info is not present (never set?)\n");
b1d5488a 474 page_ext_put(page_ext);
4e462112
VB
475 return;
476 }
477
fdf3bf80 478 if (test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
37389167
VB
479 pr_alert("page_owner tracks the page as allocated\n");
480 else
481 pr_alert("page_owner tracks the page as freed\n");
482
bf215eab 483 pr_alert("page last allocated via order %u, migratetype %s, gfp_mask %#x(%pGg), pid %d, tgid %d (%s), ts %llu, free_ts %llu\n",
9cc7e96a 484 page_owner->order, migratetype_names[mt], gfp_mask, &gfp_mask,
bf215eab
YC
485 page_owner->pid, page_owner->tgid, page_owner->comm,
486 page_owner->ts_nsec, page_owner->free_ts_nsec);
37389167 487
9300d8df 488 handle = READ_ONCE(page_owner->handle);
505be481 489 if (!handle)
37389167 490 pr_alert("page_owner allocation stack trace missing\n");
505be481
IK
491 else
492 stack_depot_print(handle);
f2ca0b55 493
8974558f
VB
494 handle = READ_ONCE(page_owner->free_handle);
495 if (!handle) {
496 pr_alert("page_owner free stack trace missing\n");
497 } else {
8974558f 498 pr_alert("page last free stack trace:\n");
505be481 499 stack_depot_print(handle);
8974558f 500 }
8974558f 501
9300d8df 502 if (page_owner->last_migrate_reason != -1)
4e462112 503 pr_alert("page has been migrated, last migrate reason: %s\n",
9300d8df 504 migrate_reason_names[page_owner->last_migrate_reason]);
b1d5488a 505 page_ext_put(page_ext);
4e462112
VB
506}
507
48c96a36
JK
508static ssize_t
509read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
510{
511 unsigned long pfn;
512 struct page *page;
513 struct page_ext *page_ext;
9300d8df 514 struct page_owner *page_owner;
f2ca0b55 515 depot_stack_handle_t handle;
48c96a36 516
7dd80b8a 517 if (!static_branch_unlikely(&page_owner_inited))
48c96a36
JK
518 return -EINVAL;
519
520 page = NULL;
8f0efa81
KL
521 if (*ppos == 0)
522 pfn = min_low_pfn;
523 else
524 pfn = *ppos;
48c96a36
JK
525 /* Find a valid PFN or the start of a MAX_ORDER_NR_PAGES area */
526 while (!pfn_valid(pfn) && (pfn & (MAX_ORDER_NR_PAGES - 1)) != 0)
527 pfn++;
528
48c96a36
JK
529 /* Find an allocated page */
530 for (; pfn < max_pfn; pfn++) {
b1d5488a
CTK
531 /*
532 * This temporary page_owner is required so
533 * that we can avoid the context switches while holding
534 * the rcu lock and copying the page owner information to
535 * user through copy_to_user() or GFP_KERNEL allocations.
536 */
537 struct page_owner page_owner_tmp;
538
48c96a36
JK
539 /*
540 * If the new page is in a new MAX_ORDER_NR_PAGES area,
541 * validate the area as existing, skip it if not
542 */
543 if ((pfn & (MAX_ORDER_NR_PAGES - 1)) == 0 && !pfn_valid(pfn)) {
544 pfn += MAX_ORDER_NR_PAGES - 1;
545 continue;
546 }
547
48c96a36
JK
548 page = pfn_to_page(pfn);
549 if (PageBuddy(page)) {
ab130f91 550 unsigned long freepage_order = buddy_order_unsafe(page);
48c96a36 551
23baf831 552 if (freepage_order <= MAX_ORDER)
48c96a36
JK
553 pfn += (1UL << freepage_order) - 1;
554 continue;
555 }
556
b1d5488a 557 page_ext = page_ext_get(page);
f86e4271
YS
558 if (unlikely(!page_ext))
559 continue;
48c96a36
JK
560
561 /*
61cf5feb
JK
562 * Some pages could be missed by concurrent allocation or free,
563 * because we don't hold the zone lock.
48c96a36
JK
564 */
565 if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
b1d5488a 566 goto ext_put_continue;
48c96a36 567
37389167
VB
568 /*
569 * Although we do have the info about past allocation of free
570 * pages, it's not relevant for current memory usage.
571 */
fdf3bf80 572 if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
b1d5488a 573 goto ext_put_continue;
37389167 574
9300d8df
JK
575 page_owner = get_page_owner(page_ext);
576
7e2f2a0c
VB
577 /*
578 * Don't print "tail" pages of high-order allocations as that
579 * would inflate the stats.
580 */
581 if (!IS_ALIGNED(pfn, 1 << page_owner->order))
b1d5488a 582 goto ext_put_continue;
7e2f2a0c 583
f2ca0b55
JK
584 /*
585 * Access to page_ext->handle isn't synchronous so we should
586 * be careful to access it.
587 */
9300d8df 588 handle = READ_ONCE(page_owner->handle);
f2ca0b55 589 if (!handle)
b1d5488a 590 goto ext_put_continue;
f2ca0b55 591
48c96a36 592 /* Record the next PFN to read in the file offset */
8f0efa81 593 *ppos = pfn + 1;
48c96a36 594
b1d5488a
CTK
595 page_owner_tmp = *page_owner;
596 page_ext_put(page_ext);
f2ca0b55 597 return print_page_owner(buf, count, pfn, page,
b1d5488a
CTK
598 &page_owner_tmp, handle);
599ext_put_continue:
600 page_ext_put(page_ext);
48c96a36
JK
601 }
602
603 return 0;
604}
605
8f0efa81
KL
606static loff_t lseek_page_owner(struct file *file, loff_t offset, int orig)
607{
608 switch (orig) {
609 case SEEK_SET:
610 file->f_pos = offset;
611 break;
612 case SEEK_CUR:
613 file->f_pos += offset;
614 break;
615 default:
616 return -EINVAL;
617 }
618 return file->f_pos;
619}
620
61cf5feb
JK
621static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone)
622{
6787c1da
OS
623 unsigned long pfn = zone->zone_start_pfn;
624 unsigned long end_pfn = zone_end_pfn(zone);
61cf5feb
JK
625 unsigned long count = 0;
626
61cf5feb
JK
627 /*
628 * Walk the zone in pageblock_nr_pages steps. If a page block spans
629 * a zone boundary, it will be double counted between zones. This does
630 * not matter as the mixed block count will still be correct
631 */
632 for (; pfn < end_pfn; ) {
6787c1da
OS
633 unsigned long block_end_pfn;
634
61cf5feb
JK
635 if (!pfn_valid(pfn)) {
636 pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
637 continue;
638 }
639
4f9bc69a 640 block_end_pfn = pageblock_end_pfn(pfn);
61cf5feb
JK
641 block_end_pfn = min(block_end_pfn, end_pfn);
642
61cf5feb 643 for (; pfn < block_end_pfn; pfn++) {
859a85dd 644 struct page *page = pfn_to_page(pfn);
6787c1da
OS
645 struct page_ext *page_ext;
646
9d43f5ae
JK
647 if (page_zone(page) != zone)
648 continue;
649
61cf5feb 650 /*
10903027
VB
651 * To avoid having to grab zone->lock, be a little
652 * careful when reading buddy page order. The only
653 * danger is that we skip too much and potentially miss
654 * some early allocated pages, which is better than
655 * heavy lock contention.
61cf5feb
JK
656 */
657 if (PageBuddy(page)) {
ab130f91 658 unsigned long order = buddy_order_unsafe(page);
10903027 659
23baf831 660 if (order > 0 && order <= MAX_ORDER)
10903027 661 pfn += (1UL << order) - 1;
61cf5feb
JK
662 continue;
663 }
664
665 if (PageReserved(page))
666 continue;
667
b1d5488a 668 page_ext = page_ext_get(page);
f86e4271
YS
669 if (unlikely(!page_ext))
670 continue;
61cf5feb 671
dab4ead1 672 /* Maybe overlapping zone */
61cf5feb 673 if (test_bit(PAGE_EXT_OWNER, &page_ext->flags))
b1d5488a 674 goto ext_put_continue;
61cf5feb
JK
675
676 /* Found early allocated page */
64ea78d2 677 __set_page_owner_handle(page_ext, early_handle,
7e2f2a0c 678 0, 0);
61cf5feb 679 count++;
b1d5488a
CTK
680ext_put_continue:
681 page_ext_put(page_ext);
61cf5feb 682 }
10903027 683 cond_resched();
61cf5feb
JK
684 }
685
686 pr_info("Node %d, zone %8s: page owner found early allocated %lu pages\n",
687 pgdat->node_id, zone->name, count);
688}
689
690static void init_zones_in_node(pg_data_t *pgdat)
691{
692 struct zone *zone;
693 struct zone *node_zones = pgdat->node_zones;
61cf5feb
JK
694
695 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
696 if (!populated_zone(zone))
697 continue;
698
61cf5feb 699 init_pages_in_zone(pgdat, zone);
61cf5feb
JK
700 }
701}
702
703static void init_early_allocated_pages(void)
704{
705 pg_data_t *pgdat;
706
61cf5feb
JK
707 for_each_online_pgdat(pgdat)
708 init_zones_in_node(pgdat);
709}
710
48c96a36
JK
711static const struct file_operations proc_page_owner_operations = {
712 .read = read_page_owner,
8f0efa81 713 .llseek = lseek_page_owner,
48c96a36
JK
714};
715
716static int __init pageowner_init(void)
717{
7dd80b8a 718 if (!static_branch_unlikely(&page_owner_inited)) {
48c96a36
JK
719 pr_info("page_owner is disabled\n");
720 return 0;
721 }
722
d9f7979c
GKH
723 debugfs_create_file("page_owner", 0400, NULL, NULL,
724 &proc_page_owner_operations);
48c96a36 725
d9f7979c 726 return 0;
48c96a36 727}
44c5af96 728late_initcall(pageowner_init)