mm/page_owner: record the timestamp of all pages during free
[linux-2.6-block.git] / mm / page_owner.c
CommitLineData
b2441318 1// SPDX-License-Identifier: GPL-2.0
48c96a36
JK
2#include <linux/debugfs.h>
3#include <linux/mm.h>
4#include <linux/slab.h>
5#include <linux/uaccess.h>
57c8a661 6#include <linux/memblock.h>
48c96a36
JK
7#include <linux/stacktrace.h>
8#include <linux/page_owner.h>
7dd80b8a 9#include <linux/jump_label.h>
7cd12b4a 10#include <linux/migrate.h>
f2ca0b55 11#include <linux/stackdepot.h>
e2f612e6 12#include <linux/seq_file.h>
9cc7e96a 13#include <linux/sched/clock.h>
f2ca0b55 14
48c96a36
JK
15#include "internal.h"
16
f2ca0b55
JK
17/*
18 * TODO: teach PAGE_OWNER_STACK_DEPTH (__dump_page_owner and save_stack)
19 * to use off stack temporal storage
20 */
21#define PAGE_OWNER_STACK_DEPTH (16)
22
9300d8df 23struct page_owner {
6b4c54e3
AM
24 unsigned short order;
25 short last_migrate_reason;
9300d8df 26 gfp_t gfp_mask;
9300d8df 27 depot_stack_handle_t handle;
8974558f 28 depot_stack_handle_t free_handle;
9cc7e96a 29 u64 ts_nsec;
866b4852 30 u64 free_ts_nsec;
9cc7e96a 31 pid_t pid;
9300d8df
JK
32};
33
0fe9a448 34static bool page_owner_enabled = false;
7dd80b8a 35DEFINE_STATIC_KEY_FALSE(page_owner_inited);
48c96a36 36
f2ca0b55
JK
37static depot_stack_handle_t dummy_handle;
38static depot_stack_handle_t failure_handle;
dab4ead1 39static depot_stack_handle_t early_handle;
f2ca0b55 40
61cf5feb
JK
41static void init_early_allocated_pages(void);
42
1173194e 43static int __init early_page_owner_param(char *buf)
48c96a36
JK
44{
45 if (!buf)
46 return -EINVAL;
47
48 if (strcmp(buf, "on") == 0)
0fe9a448 49 page_owner_enabled = true;
48c96a36
JK
50
51 return 0;
52}
53early_param("page_owner", early_page_owner_param);
54
55static bool need_page_owner(void)
56{
0fe9a448 57 return page_owner_enabled;
48c96a36
JK
58}
59
dab4ead1 60static __always_inline depot_stack_handle_t create_dummy_stack(void)
f2ca0b55
JK
61{
62 unsigned long entries[4];
af52bf6b 63 unsigned int nr_entries;
f2ca0b55 64
af52bf6b
TG
65 nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0);
66 return stack_depot_save(entries, nr_entries, GFP_KERNEL);
f2ca0b55
JK
67}
68
dab4ead1 69static noinline void register_dummy_stack(void)
f2ca0b55 70{
dab4ead1
VB
71 dummy_handle = create_dummy_stack();
72}
f2ca0b55 73
dab4ead1
VB
74static noinline void register_failure_stack(void)
75{
76 failure_handle = create_dummy_stack();
77}
f2ca0b55 78
dab4ead1
VB
79static noinline void register_early_stack(void)
80{
81 early_handle = create_dummy_stack();
f2ca0b55
JK
82}
83
48c96a36
JK
84static void init_page_owner(void)
85{
0fe9a448 86 if (!page_owner_enabled)
48c96a36
JK
87 return;
88
f2ca0b55
JK
89 register_dummy_stack();
90 register_failure_stack();
dab4ead1 91 register_early_stack();
7dd80b8a 92 static_branch_enable(&page_owner_inited);
61cf5feb 93 init_early_allocated_pages();
48c96a36
JK
94}
95
96struct page_ext_operations page_owner_ops = {
9300d8df 97 .size = sizeof(struct page_owner),
48c96a36
JK
98 .need = need_page_owner,
99 .init = init_page_owner,
100};
101
9300d8df
JK
102static inline struct page_owner *get_page_owner(struct page_ext *page_ext)
103{
104 return (void *)page_ext + page_owner_ops.offset;
105}
106
af52bf6b
TG
107static inline bool check_recursive_alloc(unsigned long *entries,
108 unsigned int nr_entries,
109 unsigned long ip)
48c96a36 110{
af52bf6b 111 unsigned int i;
f2ca0b55 112
af52bf6b
TG
113 for (i = 0; i < nr_entries; i++) {
114 if (entries[i] == ip)
f2ca0b55
JK
115 return true;
116 }
f2ca0b55
JK
117 return false;
118}
119
120static noinline depot_stack_handle_t save_stack(gfp_t flags)
121{
122 unsigned long entries[PAGE_OWNER_STACK_DEPTH];
f2ca0b55 123 depot_stack_handle_t handle;
af52bf6b 124 unsigned int nr_entries;
f2ca0b55 125
af52bf6b 126 nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2);
f2ca0b55
JK
127
128 /*
af52bf6b
TG
129 * We need to check recursion here because our request to
130 * stackdepot could trigger memory allocation to save new
131 * entry. New memory allocation would reach here and call
132 * stack_depot_save_entries() again if we don't catch it. There is
133 * still not enough memory in stackdepot so it would try to
134 * allocate memory again and loop forever.
f2ca0b55 135 */
af52bf6b 136 if (check_recursive_alloc(entries, nr_entries, _RET_IP_))
f2ca0b55
JK
137 return dummy_handle;
138
af52bf6b 139 handle = stack_depot_save(entries, nr_entries, flags);
f2ca0b55
JK
140 if (!handle)
141 handle = failure_handle;
142
143 return handle;
144}
145
8974558f
VB
146void __reset_page_owner(struct page *page, unsigned int order)
147{
148 int i;
149 struct page_ext *page_ext;
8974558f
VB
150 depot_stack_handle_t handle = 0;
151 struct page_owner *page_owner;
866b4852 152 u64 free_ts_nsec = local_clock();
8974558f 153
0fe9a448 154 handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
8974558f 155
5556cfe8
VB
156 page_ext = lookup_page_ext(page);
157 if (unlikely(!page_ext))
158 return;
8974558f 159 for (i = 0; i < (1 << order); i++) {
fdf3bf80 160 __clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
0fe9a448
VB
161 page_owner = get_page_owner(page_ext);
162 page_owner->free_handle = handle;
866b4852 163 page_owner->free_ts_nsec = free_ts_nsec;
5556cfe8 164 page_ext = page_ext_next(page_ext);
8974558f
VB
165 }
166}
167
7e2f2a0c
VB
168static inline void __set_page_owner_handle(struct page *page,
169 struct page_ext *page_ext, depot_stack_handle_t handle,
170 unsigned int order, gfp_t gfp_mask)
f2ca0b55 171{
9300d8df 172 struct page_owner *page_owner;
7e2f2a0c 173 int i;
48c96a36 174
7e2f2a0c
VB
175 for (i = 0; i < (1 << order); i++) {
176 page_owner = get_page_owner(page_ext);
177 page_owner->handle = handle;
178 page_owner->order = order;
179 page_owner->gfp_mask = gfp_mask;
180 page_owner->last_migrate_reason = -1;
9cc7e96a
LM
181 page_owner->pid = current->pid;
182 page_owner->ts_nsec = local_clock();
7e2f2a0c 183 __set_bit(PAGE_EXT_OWNER, &page_ext->flags);
fdf3bf80 184 __set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
48c96a36 185
5556cfe8 186 page_ext = page_ext_next(page_ext);
7e2f2a0c 187 }
48c96a36
JK
188}
189
dab4ead1
VB
190noinline void __set_page_owner(struct page *page, unsigned int order,
191 gfp_t gfp_mask)
192{
193 struct page_ext *page_ext = lookup_page_ext(page);
194 depot_stack_handle_t handle;
195
196 if (unlikely(!page_ext))
197 return;
198
199 handle = save_stack(gfp_mask);
7e2f2a0c 200 __set_page_owner_handle(page, page_ext, handle, order, gfp_mask);
dab4ead1
VB
201}
202
7cd12b4a
VB
203void __set_page_owner_migrate_reason(struct page *page, int reason)
204{
205 struct page_ext *page_ext = lookup_page_ext(page);
9300d8df
JK
206 struct page_owner *page_owner;
207
f86e4271
YS
208 if (unlikely(!page_ext))
209 return;
7cd12b4a 210
9300d8df
JK
211 page_owner = get_page_owner(page_ext);
212 page_owner->last_migrate_reason = reason;
7cd12b4a
VB
213}
214
8fb156c9 215void __split_page_owner(struct page *page, unsigned int nr)
e2cfc911 216{
a9627bc5 217 int i;
e2cfc911 218 struct page_ext *page_ext = lookup_page_ext(page);
9300d8df 219 struct page_owner *page_owner;
a9627bc5 220
f86e4271 221 if (unlikely(!page_ext))
a9627bc5 222 return;
e2cfc911 223
8fb156c9 224 for (i = 0; i < nr; i++) {
7e2f2a0c
VB
225 page_owner = get_page_owner(page_ext);
226 page_owner->order = 0;
5556cfe8 227 page_ext = page_ext_next(page_ext);
7e2f2a0c 228 }
e2cfc911
JK
229}
230
d435edca
VB
231void __copy_page_owner(struct page *oldpage, struct page *newpage)
232{
233 struct page_ext *old_ext = lookup_page_ext(oldpage);
234 struct page_ext *new_ext = lookup_page_ext(newpage);
9300d8df 235 struct page_owner *old_page_owner, *new_page_owner;
d435edca 236
f86e4271
YS
237 if (unlikely(!old_ext || !new_ext))
238 return;
239
9300d8df
JK
240 old_page_owner = get_page_owner(old_ext);
241 new_page_owner = get_page_owner(new_ext);
242 new_page_owner->order = old_page_owner->order;
243 new_page_owner->gfp_mask = old_page_owner->gfp_mask;
244 new_page_owner->last_migrate_reason =
245 old_page_owner->last_migrate_reason;
246 new_page_owner->handle = old_page_owner->handle;
9cc7e96a
LM
247 new_page_owner->pid = old_page_owner->pid;
248 new_page_owner->ts_nsec = old_page_owner->ts_nsec;
866b4852 249 new_page_owner->free_ts_nsec = old_page_owner->ts_nsec;
d435edca
VB
250
251 /*
252 * We don't clear the bit on the oldpage as it's going to be freed
253 * after migration. Until then, the info can be useful in case of
254 * a bug, and the overal stats will be off a bit only temporarily.
255 * Also, migrate_misplaced_transhuge_page() can still fail the
256 * migration and then we want the oldpage to retain the info. But
257 * in that case we also don't need to explicitly clear the info from
258 * the new page, which will be freed.
259 */
260 __set_bit(PAGE_EXT_OWNER, &new_ext->flags);
fdf3bf80 261 __set_bit(PAGE_EXT_OWNER_ALLOCATED, &new_ext->flags);
d435edca
VB
262}
263
e2f612e6
JK
264void pagetypeinfo_showmixedcount_print(struct seq_file *m,
265 pg_data_t *pgdat, struct zone *zone)
266{
267 struct page *page;
268 struct page_ext *page_ext;
9300d8df 269 struct page_owner *page_owner;
1d2cae8e
ML
270 unsigned long pfn, block_end_pfn;
271 unsigned long end_pfn = zone_end_pfn(zone);
e2f612e6
JK
272 unsigned long count[MIGRATE_TYPES] = { 0, };
273 int pageblock_mt, page_mt;
274 int i;
275
276 /* Scan block by block. First and last block may be incomplete */
277 pfn = zone->zone_start_pfn;
278
279 /*
280 * Walk the zone in pageblock_nr_pages steps. If a page block spans
281 * a zone boundary, it will be double counted between zones. This does
282 * not matter as the mixed block count will still be correct
283 */
284 for (; pfn < end_pfn; ) {
a26ee565
QC
285 page = pfn_to_online_page(pfn);
286 if (!page) {
e2f612e6
JK
287 pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
288 continue;
289 }
290
291 block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
292 block_end_pfn = min(block_end_pfn, end_pfn);
293
e2f612e6
JK
294 pageblock_mt = get_pageblock_migratetype(page);
295
296 for (; pfn < block_end_pfn; pfn++) {
297 if (!pfn_valid_within(pfn))
298 continue;
299
a26ee565 300 /* The pageblock is online, no need to recheck. */
e2f612e6
JK
301 page = pfn_to_page(pfn);
302
303 if (page_zone(page) != zone)
304 continue;
305
306 if (PageBuddy(page)) {
727c080f
VM
307 unsigned long freepage_order;
308
ab130f91 309 freepage_order = buddy_order_unsafe(page);
727c080f
VM
310 if (freepage_order < MAX_ORDER)
311 pfn += (1UL << freepage_order) - 1;
e2f612e6
JK
312 continue;
313 }
314
315 if (PageReserved(page))
316 continue;
317
318 page_ext = lookup_page_ext(page);
319 if (unlikely(!page_ext))
320 continue;
321
fdf3bf80 322 if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
e2f612e6
JK
323 continue;
324
9300d8df 325 page_owner = get_page_owner(page_ext);
01c0bfe0 326 page_mt = gfp_migratetype(page_owner->gfp_mask);
e2f612e6
JK
327 if (pageblock_mt != page_mt) {
328 if (is_migrate_cma(pageblock_mt))
329 count[MIGRATE_MOVABLE]++;
330 else
331 count[pageblock_mt]++;
332
333 pfn = block_end_pfn;
334 break;
335 }
9300d8df 336 pfn += (1UL << page_owner->order) - 1;
e2f612e6
JK
337 }
338 }
339
340 /* Print counts */
341 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
342 for (i = 0; i < MIGRATE_TYPES; i++)
343 seq_printf(m, "%12lu ", count[i]);
344 seq_putc(m, '\n');
345}
346
48c96a36
JK
347static ssize_t
348print_page_owner(char __user *buf, size_t count, unsigned long pfn,
9300d8df 349 struct page *page, struct page_owner *page_owner,
f2ca0b55 350 depot_stack_handle_t handle)
48c96a36 351{
af52bf6b
TG
352 int ret, pageblock_mt, page_mt;
353 unsigned long *entries;
354 unsigned int nr_entries;
48c96a36
JK
355 char *kbuf;
356
c8f61cfc 357 count = min_t(size_t, count, PAGE_SIZE);
48c96a36
JK
358 kbuf = kmalloc(count, GFP_KERNEL);
359 if (!kbuf)
360 return -ENOMEM;
361
362 ret = snprintf(kbuf, count,
866b4852 363 "Page allocated via order %u, mask %#x(%pGg), pid %d, ts %llu ns, free_ts %llu ns\n",
9300d8df 364 page_owner->order, page_owner->gfp_mask,
9cc7e96a 365 &page_owner->gfp_mask, page_owner->pid,
866b4852 366 page_owner->ts_nsec, page_owner->free_ts_nsec);
48c96a36
JK
367
368 if (ret >= count)
369 goto err;
370
371 /* Print information relevant to grouping pages by mobility */
0b423ca2 372 pageblock_mt = get_pageblock_migratetype(page);
01c0bfe0 373 page_mt = gfp_migratetype(page_owner->gfp_mask);
48c96a36 374 ret += snprintf(kbuf + ret, count - ret,
60f30350 375 "PFN %lu type %s Block %lu type %s Flags %#lx(%pGp)\n",
48c96a36 376 pfn,
60f30350 377 migratetype_names[page_mt],
48c96a36 378 pfn >> pageblock_order,
60f30350
VB
379 migratetype_names[pageblock_mt],
380 page->flags, &page->flags);
48c96a36
JK
381
382 if (ret >= count)
383 goto err;
384
af52bf6b
TG
385 nr_entries = stack_depot_fetch(handle, &entries);
386 ret += stack_trace_snprint(kbuf + ret, count - ret, entries, nr_entries, 0);
48c96a36
JK
387 if (ret >= count)
388 goto err;
389
9300d8df 390 if (page_owner->last_migrate_reason != -1) {
7cd12b4a
VB
391 ret += snprintf(kbuf + ret, count - ret,
392 "Page has been migrated, last migrate reason: %s\n",
9300d8df 393 migrate_reason_names[page_owner->last_migrate_reason]);
7cd12b4a
VB
394 if (ret >= count)
395 goto err;
396 }
397
48c96a36
JK
398 ret += snprintf(kbuf + ret, count - ret, "\n");
399 if (ret >= count)
400 goto err;
401
402 if (copy_to_user(buf, kbuf, ret))
403 ret = -EFAULT;
404
405 kfree(kbuf);
406 return ret;
407
408err:
409 kfree(kbuf);
410 return -ENOMEM;
411}
412
4e462112
VB
413void __dump_page_owner(struct page *page)
414{
415 struct page_ext *page_ext = lookup_page_ext(page);
9300d8df 416 struct page_owner *page_owner;
f2ca0b55 417 depot_stack_handle_t handle;
af52bf6b
TG
418 unsigned long *entries;
419 unsigned int nr_entries;
8285027f
SM
420 gfp_t gfp_mask;
421 int mt;
4e462112 422
f86e4271
YS
423 if (unlikely(!page_ext)) {
424 pr_alert("There is not page extension available.\n");
425 return;
426 }
9300d8df
JK
427
428 page_owner = get_page_owner(page_ext);
429 gfp_mask = page_owner->gfp_mask;
01c0bfe0 430 mt = gfp_migratetype(gfp_mask);
f86e4271 431
4e462112 432 if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) {
37389167 433 pr_alert("page_owner info is not present (never set?)\n");
4e462112
VB
434 return;
435 }
436
fdf3bf80 437 if (test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
37389167
VB
438 pr_alert("page_owner tracks the page as allocated\n");
439 else
440 pr_alert("page_owner tracks the page as freed\n");
441
866b4852 442 pr_alert("page last allocated via order %u, migratetype %s, gfp_mask %#x(%pGg), pid %d, ts %llu, free_ts %llu\n",
9cc7e96a 443 page_owner->order, migratetype_names[mt], gfp_mask, &gfp_mask,
866b4852 444 page_owner->pid, page_owner->ts_nsec, page_owner->free_ts_nsec);
37389167 445
9300d8df 446 handle = READ_ONCE(page_owner->handle);
f2ca0b55 447 if (!handle) {
37389167
VB
448 pr_alert("page_owner allocation stack trace missing\n");
449 } else {
450 nr_entries = stack_depot_fetch(handle, &entries);
451 stack_trace_print(entries, nr_entries, 0);
f2ca0b55
JK
452 }
453
8974558f
VB
454 handle = READ_ONCE(page_owner->free_handle);
455 if (!handle) {
456 pr_alert("page_owner free stack trace missing\n");
457 } else {
458 nr_entries = stack_depot_fetch(handle, &entries);
459 pr_alert("page last free stack trace:\n");
460 stack_trace_print(entries, nr_entries, 0);
461 }
8974558f 462
9300d8df 463 if (page_owner->last_migrate_reason != -1)
4e462112 464 pr_alert("page has been migrated, last migrate reason: %s\n",
9300d8df 465 migrate_reason_names[page_owner->last_migrate_reason]);
4e462112
VB
466}
467
48c96a36
JK
468static ssize_t
469read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
470{
471 unsigned long pfn;
472 struct page *page;
473 struct page_ext *page_ext;
9300d8df 474 struct page_owner *page_owner;
f2ca0b55 475 depot_stack_handle_t handle;
48c96a36 476
7dd80b8a 477 if (!static_branch_unlikely(&page_owner_inited))
48c96a36
JK
478 return -EINVAL;
479
480 page = NULL;
481 pfn = min_low_pfn + *ppos;
482
483 /* Find a valid PFN or the start of a MAX_ORDER_NR_PAGES area */
484 while (!pfn_valid(pfn) && (pfn & (MAX_ORDER_NR_PAGES - 1)) != 0)
485 pfn++;
486
487 drain_all_pages(NULL);
488
489 /* Find an allocated page */
490 for (; pfn < max_pfn; pfn++) {
491 /*
492 * If the new page is in a new MAX_ORDER_NR_PAGES area,
493 * validate the area as existing, skip it if not
494 */
495 if ((pfn & (MAX_ORDER_NR_PAGES - 1)) == 0 && !pfn_valid(pfn)) {
496 pfn += MAX_ORDER_NR_PAGES - 1;
497 continue;
498 }
499
500 /* Check for holes within a MAX_ORDER area */
501 if (!pfn_valid_within(pfn))
502 continue;
503
504 page = pfn_to_page(pfn);
505 if (PageBuddy(page)) {
ab130f91 506 unsigned long freepage_order = buddy_order_unsafe(page);
48c96a36
JK
507
508 if (freepage_order < MAX_ORDER)
509 pfn += (1UL << freepage_order) - 1;
510 continue;
511 }
512
513 page_ext = lookup_page_ext(page);
f86e4271
YS
514 if (unlikely(!page_ext))
515 continue;
48c96a36
JK
516
517 /*
61cf5feb
JK
518 * Some pages could be missed by concurrent allocation or free,
519 * because we don't hold the zone lock.
48c96a36
JK
520 */
521 if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
522 continue;
523
37389167
VB
524 /*
525 * Although we do have the info about past allocation of free
526 * pages, it's not relevant for current memory usage.
527 */
fdf3bf80 528 if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
37389167
VB
529 continue;
530
9300d8df
JK
531 page_owner = get_page_owner(page_ext);
532
7e2f2a0c
VB
533 /*
534 * Don't print "tail" pages of high-order allocations as that
535 * would inflate the stats.
536 */
537 if (!IS_ALIGNED(pfn, 1 << page_owner->order))
538 continue;
539
f2ca0b55
JK
540 /*
541 * Access to page_ext->handle isn't synchronous so we should
542 * be careful to access it.
543 */
9300d8df 544 handle = READ_ONCE(page_owner->handle);
f2ca0b55
JK
545 if (!handle)
546 continue;
547
48c96a36
JK
548 /* Record the next PFN to read in the file offset */
549 *ppos = (pfn - min_low_pfn) + 1;
550
f2ca0b55 551 return print_page_owner(buf, count, pfn, page,
9300d8df 552 page_owner, handle);
48c96a36
JK
553 }
554
555 return 0;
556}
557
61cf5feb
JK
558static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone)
559{
6787c1da
OS
560 unsigned long pfn = zone->zone_start_pfn;
561 unsigned long end_pfn = zone_end_pfn(zone);
61cf5feb
JK
562 unsigned long count = 0;
563
61cf5feb
JK
564 /*
565 * Walk the zone in pageblock_nr_pages steps. If a page block spans
566 * a zone boundary, it will be double counted between zones. This does
567 * not matter as the mixed block count will still be correct
568 */
569 for (; pfn < end_pfn; ) {
6787c1da
OS
570 unsigned long block_end_pfn;
571
61cf5feb
JK
572 if (!pfn_valid(pfn)) {
573 pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
574 continue;
575 }
576
577 block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
578 block_end_pfn = min(block_end_pfn, end_pfn);
579
61cf5feb 580 for (; pfn < block_end_pfn; pfn++) {
6787c1da
OS
581 struct page *page;
582 struct page_ext *page_ext;
583
61cf5feb
JK
584 if (!pfn_valid_within(pfn))
585 continue;
586
587 page = pfn_to_page(pfn);
588
9d43f5ae
JK
589 if (page_zone(page) != zone)
590 continue;
591
61cf5feb 592 /*
10903027
VB
593 * To avoid having to grab zone->lock, be a little
594 * careful when reading buddy page order. The only
595 * danger is that we skip too much and potentially miss
596 * some early allocated pages, which is better than
597 * heavy lock contention.
61cf5feb
JK
598 */
599 if (PageBuddy(page)) {
ab130f91 600 unsigned long order = buddy_order_unsafe(page);
10903027
VB
601
602 if (order > 0 && order < MAX_ORDER)
603 pfn += (1UL << order) - 1;
61cf5feb
JK
604 continue;
605 }
606
607 if (PageReserved(page))
608 continue;
609
610 page_ext = lookup_page_ext(page);
f86e4271
YS
611 if (unlikely(!page_ext))
612 continue;
61cf5feb 613
dab4ead1 614 /* Maybe overlapping zone */
61cf5feb
JK
615 if (test_bit(PAGE_EXT_OWNER, &page_ext->flags))
616 continue;
617
618 /* Found early allocated page */
7e2f2a0c
VB
619 __set_page_owner_handle(page, page_ext, early_handle,
620 0, 0);
61cf5feb
JK
621 count++;
622 }
10903027 623 cond_resched();
61cf5feb
JK
624 }
625
626 pr_info("Node %d, zone %8s: page owner found early allocated %lu pages\n",
627 pgdat->node_id, zone->name, count);
628}
629
630static void init_zones_in_node(pg_data_t *pgdat)
631{
632 struct zone *zone;
633 struct zone *node_zones = pgdat->node_zones;
61cf5feb
JK
634
635 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
636 if (!populated_zone(zone))
637 continue;
638
61cf5feb 639 init_pages_in_zone(pgdat, zone);
61cf5feb
JK
640 }
641}
642
643static void init_early_allocated_pages(void)
644{
645 pg_data_t *pgdat;
646
61cf5feb
JK
647 for_each_online_pgdat(pgdat)
648 init_zones_in_node(pgdat);
649}
650
48c96a36
JK
651static const struct file_operations proc_page_owner_operations = {
652 .read = read_page_owner,
653};
654
655static int __init pageowner_init(void)
656{
7dd80b8a 657 if (!static_branch_unlikely(&page_owner_inited)) {
48c96a36
JK
658 pr_info("page_owner is disabled\n");
659 return 0;
660 }
661
d9f7979c
GKH
662 debugfs_create_file("page_owner", 0400, NULL, NULL,
663 &proc_page_owner_operations);
48c96a36 664
d9f7979c 665 return 0;
48c96a36 666}
44c5af96 667late_initcall(pageowner_init)