Merge tag 'riscv-for-linus-6.16-rc5' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6-block.git] / mm / vmstat.c
CommitLineData
457c8996 1// SPDX-License-Identifier: GPL-2.0-only
f6ac2354
CL
2/*
3 * linux/mm/vmstat.c
4 *
5 * Manages VM statistics
6 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
2244b95a
CL
7 *
8 * zoned VM statistics
9 * Copyright (C) 2006 Silicon Graphics, Inc.,
786d5cc2 10 * Christoph Lameter <cl@gentwo.org>
7cc36bbd 11 * Copyright (C) 2008-2014 Christoph Lameter
f6ac2354 12 */
8f32f7e5 13#include <linux/fs.h>
f6ac2354 14#include <linux/mm.h>
4e950f6f 15#include <linux/err.h>
2244b95a 16#include <linux/module.h>
5a0e3ad6 17#include <linux/slab.h>
df9ecaba 18#include <linux/cpu.h>
7cc36bbd 19#include <linux/cpumask.h>
c748e134 20#include <linux/vmstat.h>
3c486871
AM
21#include <linux/proc_fs.h>
22#include <linux/seq_file.h>
23#include <linux/debugfs.h>
e8edc6e0 24#include <linux/sched.h>
f1a5ab12 25#include <linux/math64.h>
79da826a 26#include <linux/writeback.h>
36deb0be 27#include <linux/compaction.h>
6e543d57 28#include <linux/mm_inline.h>
48c96a36 29#include <linux/page_owner.h>
be5e015d 30#include <linux/sched/isolation.h>
6e543d57
LD
31
32#include "internal.h"
f6ac2354 33
b8974b89 34#ifdef CONFIG_PROC_FS
4518085e 35#ifdef CONFIG_NUMA
b8974b89
KY
36#define ENABLE_NUMA_STAT 1
37static int sysctl_vm_numa_stat = ENABLE_NUMA_STAT;
4518085e
KW
38
39/* zero numa counters within a zone */
40static void zero_zone_numa_counters(struct zone *zone)
41{
42 int item, cpu;
43
f19298b9
MG
44 for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++) {
45 atomic_long_set(&zone->vm_numa_event[item], 0);
46 for_each_online_cpu(cpu) {
47 per_cpu_ptr(zone->per_cpu_zonestats, cpu)->vm_numa_event[item]
4518085e 48 = 0;
f19298b9 49 }
4518085e
KW
50 }
51}
52
53/* zero numa counters of all the populated zones */
54static void zero_zones_numa_counters(void)
55{
56 struct zone *zone;
57
58 for_each_populated_zone(zone)
59 zero_zone_numa_counters(zone);
60}
61
62/* zero global numa counters */
63static void zero_global_numa_counters(void)
64{
65 int item;
66
f19298b9
MG
67 for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++)
68 atomic_long_set(&vm_numa_event[item], 0);
4518085e
KW
69}
70
71static void invalid_numa_statistics(void)
72{
73 zero_zones_numa_counters();
74 zero_global_numa_counters();
75}
76
77static DEFINE_MUTEX(vm_numa_stat_lock);
78
b8974b89 79static int sysctl_vm_numa_stat_handler(const struct ctl_table *table, int write,
32927393 80 void *buffer, size_t *length, loff_t *ppos)
4518085e
KW
81{
82 int ret, oldval;
83
84 mutex_lock(&vm_numa_stat_lock);
85 if (write)
86 oldval = sysctl_vm_numa_stat;
87 ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
88 if (ret || !write)
89 goto out;
90
91 if (oldval == sysctl_vm_numa_stat)
92 goto out;
93 else if (sysctl_vm_numa_stat == ENABLE_NUMA_STAT) {
94 static_branch_enable(&vm_numa_stat_key);
95 pr_info("enable numa statistics\n");
96 } else {
97 static_branch_disable(&vm_numa_stat_key);
98 invalid_numa_statistics();
99 pr_info("disable numa statistics, and clear numa counters\n");
100 }
101
102out:
103 mutex_unlock(&vm_numa_stat_lock);
104 return ret;
105}
106#endif
b8974b89 107#endif /* CONFIG_PROC_FS */
4518085e 108
f8891e5e
CL
109#ifdef CONFIG_VM_EVENT_COUNTERS
110DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
111EXPORT_PER_CPU_SYMBOL(vm_event_states);
112
31f961a8 113static void sum_vm_events(unsigned long *ret)
f8891e5e 114{
9eccf2a8 115 int cpu;
f8891e5e
CL
116 int i;
117
118 memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
119
31f961a8 120 for_each_online_cpu(cpu) {
f8891e5e
CL
121 struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
122
f8891e5e
CL
123 for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
124 ret[i] += this->event[i];
125 }
126}
127
128/*
129 * Accumulate the vm event counters across all CPUs.
130 * The result is unavoidably approximate - it can change
131 * during and after execution of this function.
132*/
133void all_vm_events(unsigned long *ret)
134{
7625eccd 135 cpus_read_lock();
31f961a8 136 sum_vm_events(ret);
7625eccd 137 cpus_read_unlock();
f8891e5e 138}
32dd66fc 139EXPORT_SYMBOL_GPL(all_vm_events);
f8891e5e 140
f8891e5e
CL
141/*
142 * Fold the foreign cpu events into our own.
143 *
144 * This is adding to the events on one processor
145 * but keeps the global counts constant.
146 */
147void vm_events_fold_cpu(int cpu)
148{
149 struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu);
150 int i;
151
152 for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
153 count_vm_events(i, fold_state->event[i]);
154 fold_state->event[i] = 0;
155 }
156}
f8891e5e
CL
157
158#endif /* CONFIG_VM_EVENT_COUNTERS */
159
2244b95a
CL
160/*
161 * Manage combined zone based / global counters
162 *
163 * vm_stat contains the global counters
164 */
75ef7184
MG
165atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
166atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp;
f19298b9 167atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS] __cacheline_aligned_in_smp;
75ef7184
MG
168EXPORT_SYMBOL(vm_zone_stat);
169EXPORT_SYMBOL(vm_node_stat);
2244b95a 170
ebeac3ea
GU
171#ifdef CONFIG_NUMA
172static void fold_vm_zone_numa_events(struct zone *zone)
173{
174 unsigned long zone_numa_events[NR_VM_NUMA_EVENT_ITEMS] = { 0, };
175 int cpu;
176 enum numa_stat_item item;
177
178 for_each_online_cpu(cpu) {
179 struct per_cpu_zonestat *pzstats;
180
181 pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
182 for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++)
183 zone_numa_events[item] += xchg(&pzstats->vm_numa_event[item], 0);
184 }
185
186 for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++)
187 zone_numa_event_add(zone_numa_events[item], zone, item);
188}
189
190void fold_vm_numa_events(void)
191{
192 struct zone *zone;
193
194 for_each_populated_zone(zone)
195 fold_vm_zone_numa_events(zone);
196}
197#endif
198
2244b95a
CL
199#ifdef CONFIG_SMP
200
b44129b3 201int calculate_pressure_threshold(struct zone *zone)
88f5acf8
MG
202{
203 int threshold;
204 int watermark_distance;
205
206 /*
207 * As vmstats are not up to date, there is drift between the estimated
208 * and real values. For high thresholds and a high number of CPUs, it
209 * is possible for the min watermark to be breached while the estimated
210 * value looks fine. The pressure threshold is a reduced value such
211 * that even the maximum amount of drift will not accidentally breach
212 * the min watermark
213 */
214 watermark_distance = low_wmark_pages(zone) - min_wmark_pages(zone);
215 threshold = max(1, (int)(watermark_distance / num_online_cpus()));
216
217 /*
218 * Maximum threshold is 125
219 */
220 threshold = min(125, threshold);
221
222 return threshold;
223}
224
b44129b3 225int calculate_normal_threshold(struct zone *zone)
df9ecaba
CL
226{
227 int threshold;
228 int mem; /* memory in 128 MB units */
229
230 /*
231 * The threshold scales with the number of processors and the amount
232 * of memory per zone. More memory means that we can defer updates for
233 * longer, more processors could lead to more contention.
234 * fls() is used to have a cheap way of logarithmic scaling.
235 *
236 * Some sample thresholds:
237 *
ea15ba17 238 * Threshold Processors (fls) Zonesize fls(mem)+1
df9ecaba
CL
239 * ------------------------------------------------------------------
240 * 8 1 1 0.9-1 GB 4
241 * 16 2 2 0.9-1 GB 4
242 * 20 2 2 1-2 GB 5
243 * 24 2 2 2-4 GB 6
244 * 28 2 2 4-8 GB 7
245 * 32 2 2 8-16 GB 8
246 * 4 2 2 <128M 1
247 * 30 4 3 2-4 GB 5
248 * 48 4 3 8-16 GB 8
249 * 32 8 4 1-2 GB 4
250 * 32 8 4 0.9-1GB 4
251 * 10 16 5 <128M 1
252 * 40 16 5 900M 4
253 * 70 64 7 2-4 GB 5
254 * 84 64 7 4-8 GB 6
255 * 108 512 9 4-8 GB 6
256 * 125 1024 10 8-16 GB 8
257 * 125 1024 10 16-32 GB 9
258 */
259
9705bea5 260 mem = zone_managed_pages(zone) >> (27 - PAGE_SHIFT);
df9ecaba
CL
261
262 threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem));
263
264 /*
265 * Maximum threshold is 125
266 */
267 threshold = min(125, threshold);
268
269 return threshold;
270}
2244b95a
CL
271
272/*
df9ecaba 273 * Refresh the thresholds for each zone.
2244b95a 274 */
a6cccdc3 275void refresh_zone_stat_thresholds(void)
2244b95a 276{
75ef7184 277 struct pglist_data *pgdat;
df9ecaba
CL
278 struct zone *zone;
279 int cpu;
280 int threshold;
281
75ef7184
MG
282 /* Zero current pgdat thresholds */
283 for_each_online_pgdat(pgdat) {
284 for_each_online_cpu(cpu) {
285 per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold = 0;
286 }
287 }
288
ee99c71c 289 for_each_populated_zone(zone) {
75ef7184 290 struct pglist_data *pgdat = zone->zone_pgdat;
aa454840
CL
291 unsigned long max_drift, tolerate_drift;
292
b44129b3 293 threshold = calculate_normal_threshold(zone);
df9ecaba 294
75ef7184
MG
295 for_each_online_cpu(cpu) {
296 int pgdat_threshold;
297
28f836b6 298 per_cpu_ptr(zone->per_cpu_zonestats, cpu)->stat_threshold
99dcc3e5 299 = threshold;
1d90ca89 300
75ef7184
MG
301 /* Base nodestat threshold on the largest populated zone. */
302 pgdat_threshold = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold;
303 per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold
304 = max(threshold, pgdat_threshold);
305 }
306
aa454840
CL
307 /*
308 * Only set percpu_drift_mark if there is a danger that
309 * NR_FREE_PAGES reports the low watermark is ok when in fact
310 * the min watermark could be breached by an allocation
311 */
312 tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone);
313 max_drift = num_online_cpus() * threshold;
314 if (max_drift > tolerate_drift)
315 zone->percpu_drift_mark = high_wmark_pages(zone) +
316 max_drift;
df9ecaba 317 }
2244b95a
CL
318}
319
b44129b3
MG
320void set_pgdat_percpu_threshold(pg_data_t *pgdat,
321 int (*calculate_pressure)(struct zone *))
88f5acf8
MG
322{
323 struct zone *zone;
324 int cpu;
325 int threshold;
326 int i;
327
88f5acf8
MG
328 for (i = 0; i < pgdat->nr_zones; i++) {
329 zone = &pgdat->node_zones[i];
330 if (!zone->percpu_drift_mark)
331 continue;
332
b44129b3 333 threshold = (*calculate_pressure)(zone);
1d90ca89 334 for_each_online_cpu(cpu)
28f836b6 335 per_cpu_ptr(zone->per_cpu_zonestats, cpu)->stat_threshold
88f5acf8
MG
336 = threshold;
337 }
88f5acf8
MG
338}
339
2244b95a 340/*
bea04b07
JZ
341 * For use when we know that interrupts are disabled,
342 * or when we know that preemption is disabled and that
343 * particular counter cannot be updated from interrupt context.
2244b95a
CL
344 */
345void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
6cdb18ad 346 long delta)
2244b95a 347{
28f836b6 348 struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
12938a92 349 s8 __percpu *p = pcp->vm_stat_diff + item;
2244b95a 350 long x;
12938a92
CL
351 long t;
352
c68ed794
IM
353 /*
354 * Accurate vmstat updates require a RMW. On !PREEMPT_RT kernels,
355 * atomicity is provided by IRQs being disabled -- either explicitly
356 * or via local_lock_irq. On PREEMPT_RT, local_lock_irq only disables
357 * CPU migrations and preemption potentially corrupts a counter so
358 * disable preemption.
359 */
7a025e91 360 preempt_disable_nested();
c68ed794 361
12938a92 362 x = delta + __this_cpu_read(*p);
2244b95a 363
12938a92 364 t = __this_cpu_read(pcp->stat_threshold);
2244b95a 365
40610076 366 if (unlikely(abs(x) > t)) {
2244b95a
CL
367 zone_page_state_add(x, zone, item);
368 x = 0;
369 }
12938a92 370 __this_cpu_write(*p, x);
c68ed794 371
7a025e91 372 preempt_enable_nested();
2244b95a
CL
373}
374EXPORT_SYMBOL(__mod_zone_page_state);
375
75ef7184
MG
376void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
377 long delta)
378{
379 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
380 s8 __percpu *p = pcp->vm_node_stat_diff + item;
381 long x;
382 long t;
383
ea426c2a 384 if (vmstat_item_in_bytes(item)) {
629484ae
JW
385 /*
386 * Only cgroups use subpage accounting right now; at
387 * the global level, these items still change in
388 * multiples of whole pages. Store them as pages
389 * internally to keep the per-cpu counters compact.
390 */
ea426c2a
RG
391 VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1));
392 delta >>= PAGE_SHIFT;
393 }
394
c68ed794 395 /* See __mod_node_page_state */
7a025e91 396 preempt_disable_nested();
c68ed794 397
75ef7184
MG
398 x = delta + __this_cpu_read(*p);
399
400 t = __this_cpu_read(pcp->stat_threshold);
401
40610076 402 if (unlikely(abs(x) > t)) {
75ef7184
MG
403 node_page_state_add(x, pgdat, item);
404 x = 0;
405 }
406 __this_cpu_write(*p, x);
c68ed794 407
7a025e91 408 preempt_enable_nested();
75ef7184
MG
409}
410EXPORT_SYMBOL(__mod_node_page_state);
411
2244b95a
CL
412/*
413 * Optimized increment and decrement functions.
414 *
415 * These are only for a single page and therefore can take a struct page *
416 * argument instead of struct zone *. This allows the inclusion of the code
417 * generated for page_zone(page) into the optimized functions.
418 *
419 * No overflow check is necessary and therefore the differential can be
420 * incremented or decremented in place which may allow the compilers to
421 * generate better code.
2244b95a
CL
422 * The increment or decrement is known and therefore one boundary check can
423 * be omitted.
424 *
df9ecaba
CL
425 * NOTE: These functions are very performance sensitive. Change only
426 * with care.
427 *
2244b95a
CL
428 * Some processors have inc/dec instructions that are atomic vs an interrupt.
429 * However, the code must first determine the differential location in a zone
430 * based on the processor number and then inc/dec the counter. There is no
431 * guarantee without disabling preemption that the processor will not change
432 * in between and therefore the atomicity vs. interrupt cannot be exploited
433 * in a useful way here.
434 */
c8785385 435void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
2244b95a 436{
28f836b6 437 struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
12938a92
CL
438 s8 __percpu *p = pcp->vm_stat_diff + item;
439 s8 v, t;
2244b95a 440
c68ed794 441 /* See __mod_node_page_state */
7a025e91 442 preempt_disable_nested();
c68ed794 443
908ee0f1 444 v = __this_cpu_inc_return(*p);
12938a92
CL
445 t = __this_cpu_read(pcp->stat_threshold);
446 if (unlikely(v > t)) {
447 s8 overstep = t >> 1;
df9ecaba 448
12938a92
CL
449 zone_page_state_add(v + overstep, zone, item);
450 __this_cpu_write(*p, -overstep);
2244b95a 451 }
c68ed794 452
7a025e91 453 preempt_enable_nested();
2244b95a 454}
ca889e6c 455
75ef7184
MG
456void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
457{
458 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
459 s8 __percpu *p = pcp->vm_node_stat_diff + item;
460 s8 v, t;
461
ea426c2a
RG
462 VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
463
c68ed794 464 /* See __mod_node_page_state */
7a025e91 465 preempt_disable_nested();
c68ed794 466
75ef7184
MG
467 v = __this_cpu_inc_return(*p);
468 t = __this_cpu_read(pcp->stat_threshold);
469 if (unlikely(v > t)) {
470 s8 overstep = t >> 1;
471
472 node_page_state_add(v + overstep, pgdat, item);
473 __this_cpu_write(*p, -overstep);
474 }
c68ed794 475
7a025e91 476 preempt_enable_nested();
75ef7184
MG
477}
478
ca889e6c
CL
479void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
480{
481 __inc_zone_state(page_zone(page), item);
482}
2244b95a
CL
483EXPORT_SYMBOL(__inc_zone_page_state);
484
75ef7184
MG
485void __inc_node_page_state(struct page *page, enum node_stat_item item)
486{
487 __inc_node_state(page_pgdat(page), item);
488}
489EXPORT_SYMBOL(__inc_node_page_state);
490
c8785385 491void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
2244b95a 492{
28f836b6 493 struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
12938a92
CL
494 s8 __percpu *p = pcp->vm_stat_diff + item;
495 s8 v, t;
2244b95a 496
c68ed794 497 /* See __mod_node_page_state */
7a025e91 498 preempt_disable_nested();
c68ed794 499
908ee0f1 500 v = __this_cpu_dec_return(*p);
12938a92
CL
501 t = __this_cpu_read(pcp->stat_threshold);
502 if (unlikely(v < - t)) {
503 s8 overstep = t >> 1;
2244b95a 504
12938a92
CL
505 zone_page_state_add(v - overstep, zone, item);
506 __this_cpu_write(*p, overstep);
2244b95a 507 }
c68ed794 508
7a025e91 509 preempt_enable_nested();
2244b95a 510}
c8785385 511
75ef7184
MG
512void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
513{
514 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
515 s8 __percpu *p = pcp->vm_node_stat_diff + item;
516 s8 v, t;
517
ea426c2a
RG
518 VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
519
c68ed794 520 /* See __mod_node_page_state */
7a025e91 521 preempt_disable_nested();
c68ed794 522
75ef7184
MG
523 v = __this_cpu_dec_return(*p);
524 t = __this_cpu_read(pcp->stat_threshold);
525 if (unlikely(v < - t)) {
526 s8 overstep = t >> 1;
527
528 node_page_state_add(v - overstep, pgdat, item);
529 __this_cpu_write(*p, overstep);
530 }
c68ed794 531
7a025e91 532 preempt_enable_nested();
75ef7184
MG
533}
534
c8785385
CL
535void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
536{
537 __dec_zone_state(page_zone(page), item);
538}
2244b95a
CL
539EXPORT_SYMBOL(__dec_zone_page_state);
540
75ef7184
MG
541void __dec_node_page_state(struct page *page, enum node_stat_item item)
542{
543 __dec_node_state(page_pgdat(page), item);
544}
545EXPORT_SYMBOL(__dec_node_page_state);
546
4156153c 547#ifdef CONFIG_HAVE_CMPXCHG_LOCAL
7c839120
CL
548/*
549 * If we have cmpxchg_local support then we do not need to incur the overhead
550 * that comes with local_irq_save/restore if we use this_cpu_cmpxchg.
551 *
552 * mod_state() modifies the zone counter state through atomic per cpu
553 * operations.
554 *
555 * Overstep mode specifies how overstep should handled:
556 * 0 No overstepping
557 * 1 Overstepping half of threshold
558 * -1 Overstepping minus half of threshold
559*/
75ef7184
MG
560static inline void mod_zone_state(struct zone *zone,
561 enum zone_stat_item item, long delta, int overstep_mode)
7c839120 562{
28f836b6 563 struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
7c839120 564 s8 __percpu *p = pcp->vm_stat_diff + item;
77cd8148
UB
565 long n, t, z;
566 s8 o;
7c839120 567
77cd8148 568 o = this_cpu_read(*p);
7c839120
CL
569 do {
570 z = 0; /* overflow to zone counters */
571
572 /*
573 * The fetching of the stat_threshold is racy. We may apply
574 * a counter threshold to the wrong the cpu if we get
d3bc2367
CL
575 * rescheduled while executing here. However, the next
576 * counter update will apply the threshold again and
577 * therefore bring the counter under the threshold again.
578 *
579 * Most of the time the thresholds are the same anyways
580 * for all cpus in a zone.
7c839120
CL
581 */
582 t = this_cpu_read(pcp->stat_threshold);
583
77cd8148 584 n = delta + (long)o;
7c839120 585
40610076 586 if (abs(n) > t) {
7c839120
CL
587 int os = overstep_mode * (t >> 1) ;
588
589 /* Overflow must be added to zone counters */
590 z = n + os;
591 n = -os;
592 }
77cd8148 593 } while (!this_cpu_try_cmpxchg(*p, &o, n));
7c839120
CL
594
595 if (z)
596 zone_page_state_add(z, zone, item);
597}
598
599void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
6cdb18ad 600 long delta)
7c839120 601{
75ef7184 602 mod_zone_state(zone, item, delta, 0);
7c839120
CL
603}
604EXPORT_SYMBOL(mod_zone_page_state);
605
7c839120
CL
606void inc_zone_page_state(struct page *page, enum zone_stat_item item)
607{
75ef7184 608 mod_zone_state(page_zone(page), item, 1, 1);
7c839120
CL
609}
610EXPORT_SYMBOL(inc_zone_page_state);
611
612void dec_zone_page_state(struct page *page, enum zone_stat_item item)
613{
75ef7184 614 mod_zone_state(page_zone(page), item, -1, -1);
7c839120
CL
615}
616EXPORT_SYMBOL(dec_zone_page_state);
75ef7184
MG
617
618static inline void mod_node_state(struct pglist_data *pgdat,
619 enum node_stat_item item, int delta, int overstep_mode)
620{
621 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
622 s8 __percpu *p = pcp->vm_node_stat_diff + item;
77cd8148
UB
623 long n, t, z;
624 s8 o;
75ef7184 625
ea426c2a 626 if (vmstat_item_in_bytes(item)) {
629484ae
JW
627 /*
628 * Only cgroups use subpage accounting right now; at
629 * the global level, these items still change in
630 * multiples of whole pages. Store them as pages
631 * internally to keep the per-cpu counters compact.
632 */
ea426c2a
RG
633 VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1));
634 delta >>= PAGE_SHIFT;
635 }
636
77cd8148 637 o = this_cpu_read(*p);
75ef7184
MG
638 do {
639 z = 0; /* overflow to node counters */
640
641 /*
642 * The fetching of the stat_threshold is racy. We may apply
643 * a counter threshold to the wrong the cpu if we get
644 * rescheduled while executing here. However, the next
645 * counter update will apply the threshold again and
646 * therefore bring the counter under the threshold again.
647 *
648 * Most of the time the thresholds are the same anyways
649 * for all cpus in a node.
650 */
651 t = this_cpu_read(pcp->stat_threshold);
652
77cd8148 653 n = delta + (long)o;
75ef7184 654
40610076 655 if (abs(n) > t) {
75ef7184
MG
656 int os = overstep_mode * (t >> 1) ;
657
658 /* Overflow must be added to node counters */
659 z = n + os;
660 n = -os;
661 }
77cd8148 662 } while (!this_cpu_try_cmpxchg(*p, &o, n));
75ef7184
MG
663
664 if (z)
665 node_page_state_add(z, pgdat, item);
666}
667
668void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
669 long delta)
670{
671 mod_node_state(pgdat, item, delta, 0);
672}
673EXPORT_SYMBOL(mod_node_page_state);
674
675void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
676{
677 mod_node_state(pgdat, item, 1, 1);
678}
679
680void inc_node_page_state(struct page *page, enum node_stat_item item)
681{
682 mod_node_state(page_pgdat(page), item, 1, 1);
683}
684EXPORT_SYMBOL(inc_node_page_state);
685
686void dec_node_page_state(struct page *page, enum node_stat_item item)
687{
688 mod_node_state(page_pgdat(page), item, -1, -1);
689}
690EXPORT_SYMBOL(dec_node_page_state);
7c839120
CL
691#else
692/*
693 * Use interrupt disable to serialize counter updates
694 */
695void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
6cdb18ad 696 long delta)
7c839120
CL
697{
698 unsigned long flags;
699
700 local_irq_save(flags);
701 __mod_zone_page_state(zone, item, delta);
702 local_irq_restore(flags);
703}
704EXPORT_SYMBOL(mod_zone_page_state);
705
2244b95a
CL
706void inc_zone_page_state(struct page *page, enum zone_stat_item item)
707{
708 unsigned long flags;
709 struct zone *zone;
2244b95a
CL
710
711 zone = page_zone(page);
712 local_irq_save(flags);
ca889e6c 713 __inc_zone_state(zone, item);
2244b95a
CL
714 local_irq_restore(flags);
715}
716EXPORT_SYMBOL(inc_zone_page_state);
717
718void dec_zone_page_state(struct page *page, enum zone_stat_item item)
719{
720 unsigned long flags;
2244b95a 721
2244b95a 722 local_irq_save(flags);
a302eb4e 723 __dec_zone_page_state(page, item);
2244b95a
CL
724 local_irq_restore(flags);
725}
726EXPORT_SYMBOL(dec_zone_page_state);
727
75ef7184
MG
728void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
729{
730 unsigned long flags;
731
732 local_irq_save(flags);
733 __inc_node_state(pgdat, item);
734 local_irq_restore(flags);
735}
736EXPORT_SYMBOL(inc_node_state);
737
738void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
739 long delta)
740{
741 unsigned long flags;
742
743 local_irq_save(flags);
744 __mod_node_page_state(pgdat, item, delta);
745 local_irq_restore(flags);
746}
747EXPORT_SYMBOL(mod_node_page_state);
748
749void inc_node_page_state(struct page *page, enum node_stat_item item)
750{
751 unsigned long flags;
752 struct pglist_data *pgdat;
753
754 pgdat = page_pgdat(page);
755 local_irq_save(flags);
756 __inc_node_state(pgdat, item);
757 local_irq_restore(flags);
758}
759EXPORT_SYMBOL(inc_node_page_state);
760
761void dec_node_page_state(struct page *page, enum node_stat_item item)
762{
763 unsigned long flags;
764
765 local_irq_save(flags);
766 __dec_node_page_state(page, item);
767 local_irq_restore(flags);
768}
769EXPORT_SYMBOL(dec_node_page_state);
770#endif
7cc36bbd
CL
771
772/*
773 * Fold a differential into the global counters.
774 * Returns the number of counters updated.
775 */
f19298b9 776static int fold_diff(int *zone_diff, int *node_diff)
3a321d2a
KW
777{
778 int i;
779 int changes = 0;
780
781 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
782 if (zone_diff[i]) {
783 atomic_long_add(zone_diff[i], &vm_zone_stat[i]);
784 changes++;
785 }
786
3a321d2a
KW
787 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
788 if (node_diff[i]) {
789 atomic_long_add(node_diff[i], &vm_node_stat[i]);
790 changes++;
791 }
792 return changes;
793}
f19298b9 794
2244b95a 795/*
2bb921e5 796 * Update the zone counters for the current cpu.
a7f75e25 797 *
4037d452
CL
798 * Note that refresh_cpu_vm_stats strives to only access
799 * node local memory. The per cpu pagesets on remote zones are placed
800 * in the memory local to the processor using that pageset. So the
801 * loop over all zones will access a series of cachelines local to
802 * the processor.
803 *
804 * The call to zone_page_state_add updates the cachelines with the
805 * statistics in the remote zone struct as well as the global cachelines
806 * with the global counters. These could cause remote node cache line
807 * bouncing and will have to be only done when necessary.
7cc36bbd
CL
808 *
809 * The function returns the number of global counters updated.
2244b95a 810 */
0eb77e98 811static int refresh_cpu_vm_stats(bool do_pagesets)
2244b95a 812{
75ef7184 813 struct pglist_data *pgdat;
2244b95a
CL
814 struct zone *zone;
815 int i;
75ef7184
MG
816 int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
817 int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
7cc36bbd 818 int changes = 0;
2244b95a 819
ee99c71c 820 for_each_populated_zone(zone) {
28f836b6 821 struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats;
28f836b6 822 struct per_cpu_pages __percpu *pcp = zone->per_cpu_pageset;
2244b95a 823
fbc2edb0
CL
824 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
825 int v;
2244b95a 826
28f836b6 827 v = this_cpu_xchg(pzstats->vm_stat_diff[i], 0);
fbc2edb0 828 if (v) {
a7f75e25 829
a7f75e25 830 atomic_long_add(v, &zone->vm_stat[i]);
75ef7184 831 global_zone_diff[i] += v;
4037d452
CL
832#ifdef CONFIG_NUMA
833 /* 3 seconds idle till flush */
28f836b6 834 __this_cpu_write(pcp->expire, 3);
4037d452 835#endif
2244b95a 836 }
fbc2edb0 837 }
3a321d2a 838
0eb77e98
CL
839 if (do_pagesets) {
840 cond_resched();
51a755c5
HY
841
842 changes += decay_pcp_high(zone, this_cpu_ptr(pcp));
843#ifdef CONFIG_NUMA
0eb77e98
CL
844 /*
845 * Deal with draining the remote pageset of this
846 * processor
847 *
848 * Check if there are pages remaining in this pageset
849 * if not then there is nothing to expire.
850 */
28f836b6
MG
851 if (!__this_cpu_read(pcp->expire) ||
852 !__this_cpu_read(pcp->count))
0eb77e98 853 continue;
4037d452 854
0eb77e98
CL
855 /*
856 * We never drain zones local to this processor.
857 */
858 if (zone_to_nid(zone) == numa_node_id()) {
28f836b6 859 __this_cpu_write(pcp->expire, 0);
0eb77e98
CL
860 continue;
861 }
4037d452 862
fa8c4f9a
HY
863 if (__this_cpu_dec_return(pcp->expire)) {
864 changes++;
0eb77e98 865 continue;
fa8c4f9a 866 }
4037d452 867
28f836b6
MG
868 if (__this_cpu_read(pcp->count)) {
869 drain_zone_pages(zone, this_cpu_ptr(pcp));
0eb77e98
CL
870 changes++;
871 }
4037d452 872#endif
51a755c5 873 }
2244b95a 874 }
75ef7184
MG
875
876 for_each_online_pgdat(pgdat) {
877 struct per_cpu_nodestat __percpu *p = pgdat->per_cpu_nodestats;
878
879 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
880 int v;
881
882 v = this_cpu_xchg(p->vm_node_stat_diff[i], 0);
883 if (v) {
884 atomic_long_add(v, &pgdat->vm_stat[i]);
885 global_node_diff[i] += v;
886 }
887 }
888 }
889
890 changes += fold_diff(global_zone_diff, global_node_diff);
7cc36bbd 891 return changes;
2244b95a
CL
892}
893
2bb921e5
CL
894/*
895 * Fold the data for an offline cpu into the global array.
896 * There cannot be any access by the offline cpu and therefore
897 * synchronization is simplified.
898 */
899void cpu_vm_stats_fold(int cpu)
900{
75ef7184 901 struct pglist_data *pgdat;
2bb921e5
CL
902 struct zone *zone;
903 int i;
75ef7184
MG
904 int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
905 int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
2bb921e5
CL
906
907 for_each_populated_zone(zone) {
28f836b6 908 struct per_cpu_zonestat *pzstats;
2bb921e5 909
28f836b6 910 pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
2bb921e5 911
f19298b9 912 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
28f836b6 913 if (pzstats->vm_stat_diff[i]) {
2bb921e5
CL
914 int v;
915
28f836b6
MG
916 v = pzstats->vm_stat_diff[i];
917 pzstats->vm_stat_diff[i] = 0;
2bb921e5 918 atomic_long_add(v, &zone->vm_stat[i]);
75ef7184 919 global_zone_diff[i] += v;
2bb921e5 920 }
f19298b9 921 }
3a321d2a 922#ifdef CONFIG_NUMA
f19298b9
MG
923 for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) {
924 if (pzstats->vm_numa_event[i]) {
925 unsigned long v;
3a321d2a 926
f19298b9
MG
927 v = pzstats->vm_numa_event[i];
928 pzstats->vm_numa_event[i] = 0;
929 zone_numa_event_add(v, zone, i);
3a321d2a 930 }
f19298b9 931 }
3a321d2a 932#endif
2bb921e5
CL
933 }
934
75ef7184
MG
935 for_each_online_pgdat(pgdat) {
936 struct per_cpu_nodestat *p;
937
938 p = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu);
939
940 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
941 if (p->vm_node_stat_diff[i]) {
942 int v;
943
944 v = p->vm_node_stat_diff[i];
945 p->vm_node_stat_diff[i] = 0;
946 atomic_long_add(v, &pgdat->vm_stat[i]);
947 global_node_diff[i] += v;
948 }
949 }
950
951 fold_diff(global_zone_diff, global_node_diff);
2bb921e5
CL
952}
953
40f4b1ea
CS
954/*
955 * this is only called if !populated_zone(zone), which implies no other users of
f0953a1b 956 * pset->vm_stat_diff[] exist.
40f4b1ea 957 */
28f836b6 958void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *pzstats)
5a883813 959{
f19298b9 960 unsigned long v;
5a883813
MK
961 int i;
962
f19298b9 963 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
28f836b6 964 if (pzstats->vm_stat_diff[i]) {
f19298b9 965 v = pzstats->vm_stat_diff[i];
28f836b6 966 pzstats->vm_stat_diff[i] = 0;
f19298b9 967 zone_page_state_add(v, zone, i);
5a883813 968 }
f19298b9 969 }
3a321d2a
KW
970
971#ifdef CONFIG_NUMA
f19298b9
MG
972 for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) {
973 if (pzstats->vm_numa_event[i]) {
974 v = pzstats->vm_numa_event[i];
975 pzstats->vm_numa_event[i] = 0;
976 zone_numa_event_add(v, zone, i);
3a321d2a 977 }
f19298b9 978 }
3a321d2a 979#endif
5a883813 980}
2244b95a
CL
981#endif
982
ca889e6c 983#ifdef CONFIG_NUMA
c2d42c16 984/*
75ef7184
MG
985 * Determine the per node value of a stat item. This function
986 * is called frequently in a NUMA machine, so try to be as
987 * frugal as possible.
c2d42c16 988 */
75ef7184
MG
989unsigned long sum_zone_node_page_state(int node,
990 enum zone_stat_item item)
c2d42c16
AM
991{
992 struct zone *zones = NODE_DATA(node)->node_zones;
e87d59f7
JK
993 int i;
994 unsigned long count = 0;
c2d42c16 995
e87d59f7
JK
996 for (i = 0; i < MAX_NR_ZONES; i++)
997 count += zone_page_state(zones + i, item);
998
999 return count;
c2d42c16
AM
1000}
1001
f19298b9
MG
1002/* Determine the per node value of a numa stat item. */
1003unsigned long sum_zone_numa_event_state(int node,
3a321d2a
KW
1004 enum numa_stat_item item)
1005{
1006 struct zone *zones = NODE_DATA(node)->node_zones;
3a321d2a 1007 unsigned long count = 0;
f19298b9 1008 int i;
3a321d2a
KW
1009
1010 for (i = 0; i < MAX_NR_ZONES; i++)
f19298b9 1011 count += zone_numa_event_state(zones + i, item);
3a321d2a
KW
1012
1013 return count;
1014}
1015
75ef7184
MG
1016/*
1017 * Determine the per node value of a stat item.
1018 */
ea426c2a
RG
1019unsigned long node_page_state_pages(struct pglist_data *pgdat,
1020 enum node_stat_item item)
75ef7184
MG
1021{
1022 long x = atomic_long_read(&pgdat->vm_stat[item]);
1023#ifdef CONFIG_SMP
1024 if (x < 0)
1025 x = 0;
1026#endif
1027 return x;
1028}
ea426c2a
RG
1029
1030unsigned long node_page_state(struct pglist_data *pgdat,
1031 enum node_stat_item item)
1032{
1033 VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
1034
1035 return node_page_state_pages(pgdat, item);
1036}
ca889e6c
CL
1037#endif
1038
9d857311
PT
1039/*
1040 * Count number of pages "struct page" and "struct page_ext" consume.
1041 * nr_memmap_boot_pages: # of pages allocated by boot allocator
1042 * nr_memmap_pages: # of pages that were allocated by buddy allocator
1043 */
1044static atomic_long_t nr_memmap_boot_pages = ATOMIC_LONG_INIT(0);
1045static atomic_long_t nr_memmap_pages = ATOMIC_LONG_INIT(0);
1046
1047void memmap_boot_pages_add(long delta)
1048{
1049 atomic_long_add(delta, &nr_memmap_boot_pages);
1050}
1051
1052void memmap_pages_add(long delta)
1053{
1054 atomic_long_add(delta, &nr_memmap_pages);
1055}
1056
d7a5752c 1057#ifdef CONFIG_COMPACTION
36deb0be 1058
d7a5752c
MG
1059struct contig_page_info {
1060 unsigned long free_pages;
1061 unsigned long free_blocks_total;
1062 unsigned long free_blocks_suitable;
1063};
1064
1065/*
1066 * Calculate the number of free pages in a zone, how many contiguous
1067 * pages are free and how many are large enough to satisfy an allocation of
1068 * the target size. Note that this function makes no attempt to estimate
1069 * how many suitable free blocks there *might* be if MOVABLE pages were
1070 * migrated. Calculating that is possible, but expensive and can be
1071 * figured out from userspace
1072 */
1073static void fill_contig_page_info(struct zone *zone,
1074 unsigned int suitable_order,
1075 struct contig_page_info *info)
1076{
1077 unsigned int order;
1078
1079 info->free_pages = 0;
1080 info->free_blocks_total = 0;
1081 info->free_blocks_suitable = 0;
1082
fd377218 1083 for (order = 0; order < NR_PAGE_ORDERS; order++) {
d7a5752c
MG
1084 unsigned long blocks;
1085
af1c31ac
LS
1086 /*
1087 * Count number of free blocks.
1088 *
1089 * Access to nr_free is lockless as nr_free is used only for
1090 * diagnostic purposes. Use data_race to avoid KCSAN warning.
1091 */
1092 blocks = data_race(zone->free_area[order].nr_free);
d7a5752c
MG
1093 info->free_blocks_total += blocks;
1094
1095 /* Count free base pages */
1096 info->free_pages += blocks << order;
1097
1098 /* Count the suitable free blocks */
1099 if (order >= suitable_order)
1100 info->free_blocks_suitable += blocks <<
1101 (order - suitable_order);
1102 }
1103}
f1a5ab12
MG
1104
1105/*
1106 * A fragmentation index only makes sense if an allocation of a requested
1107 * size would fail. If that is true, the fragmentation index indicates
1108 * whether external fragmentation or a lack of memory was the problem.
1109 * The value can be used to determine if page reclaim or compaction
1110 * should be used
1111 */
56de7263 1112static int __fragmentation_index(unsigned int order, struct contig_page_info *info)
f1a5ab12
MG
1113{
1114 unsigned long requested = 1UL << order;
1115
5e0a760b 1116 if (WARN_ON_ONCE(order > MAX_PAGE_ORDER))
88d6ac40
WY
1117 return 0;
1118
f1a5ab12
MG
1119 if (!info->free_blocks_total)
1120 return 0;
1121
1122 /* Fragmentation index only makes sense when a request would fail */
1123 if (info->free_blocks_suitable)
1124 return -1000;
1125
1126 /*
1127 * Index is between 0 and 1 so return within 3 decimal places
1128 *
1129 * 0 => allocation would fail due to lack of memory
1130 * 1 => allocation would fail due to fragmentation
1131 */
1132 return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total);
1133}
56de7263 1134
facdaa91
NG
1135/*
1136 * Calculates external fragmentation within a zone wrt the given order.
1137 * It is defined as the percentage of pages found in blocks of size
1138 * less than 1 << order. It returns values in range [0, 100].
1139 */
d34c0a75 1140unsigned int extfrag_for_order(struct zone *zone, unsigned int order)
facdaa91
NG
1141{
1142 struct contig_page_info info;
1143
1144 fill_contig_page_info(zone, order, &info);
1145 if (info.free_pages == 0)
1146 return 0;
1147
1148 return div_u64((info.free_pages -
1149 (info.free_blocks_suitable << order)) * 100,
1150 info.free_pages);
1151}
1152
56de7263
MG
1153/* Same as __fragmentation index but allocs contig_page_info on stack */
1154int fragmentation_index(struct zone *zone, unsigned int order)
1155{
1156 struct contig_page_info info;
1157
1158 fill_contig_page_info(zone, order, &info);
1159 return __fragmentation_index(order, &info);
1160}
d7a5752c
MG
1161#endif
1162
ebc5d83d
KK
1163#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || \
1164 defined(CONFIG_NUMA) || defined(CONFIG_MEMCG)
fa25c503
KM
1165#ifdef CONFIG_ZONE_DMA
1166#define TEXT_FOR_DMA(xx) xx "_dma",
1167#else
1168#define TEXT_FOR_DMA(xx)
1169#endif
1170
1171#ifdef CONFIG_ZONE_DMA32
1172#define TEXT_FOR_DMA32(xx) xx "_dma32",
1173#else
1174#define TEXT_FOR_DMA32(xx)
1175#endif
1176
1177#ifdef CONFIG_HIGHMEM
1178#define TEXT_FOR_HIGHMEM(xx) xx "_high",
1179#else
1180#define TEXT_FOR_HIGHMEM(xx)
1181#endif
1182
a39c5d3c
HL
1183#ifdef CONFIG_ZONE_DEVICE
1184#define TEXT_FOR_DEVICE(xx) xx "_device",
1185#else
1186#define TEXT_FOR_DEVICE(xx)
1187#endif
1188
fa25c503 1189#define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
a39c5d3c
HL
1190 TEXT_FOR_HIGHMEM(xx) xx "_movable", \
1191 TEXT_FOR_DEVICE(xx)
fa25c503
KM
1192
1193const char * const vmstat_text[] = {
8d92890b 1194 /* enum zone_stat_item counters */
fa25c503 1195 "nr_free_pages",
a211c655 1196 "nr_free_pages_blocks",
71c799f4
MK
1197 "nr_zone_inactive_anon",
1198 "nr_zone_active_anon",
1199 "nr_zone_inactive_file",
1200 "nr_zone_active_file",
1201 "nr_zone_unevictable",
5a1c84b4 1202 "nr_zone_write_pending",
fa25c503 1203 "nr_mlock",
91537fee
MK
1204#if IS_ENABLED(CONFIG_ZSMALLOC)
1205 "nr_zspages",
1206#endif
3a321d2a 1207 "nr_free_cma",
dcdfdd40
KS
1208#ifdef CONFIG_UNACCEPTED_MEMORY
1209 "nr_unaccepted",
1210#endif
3a321d2a
KW
1211
1212 /* enum numa_stat_item counters */
fa25c503
KM
1213#ifdef CONFIG_NUMA
1214 "numa_hit",
1215 "numa_miss",
1216 "numa_foreign",
1217 "numa_interleave",
1218 "numa_local",
1219 "numa_other",
1220#endif
09316c09 1221
9d7ea9a2 1222 /* enum node_stat_item counters */
599d0c95
MG
1223 "nr_inactive_anon",
1224 "nr_active_anon",
1225 "nr_inactive_file",
1226 "nr_active_file",
1227 "nr_unevictable",
385386cf
JW
1228 "nr_slab_reclaimable",
1229 "nr_slab_unreclaimable",
599d0c95
MG
1230 "nr_isolated_anon",
1231 "nr_isolated_file",
68d48e6a 1232 "workingset_nodes",
170b04b7
JK
1233 "workingset_refault_anon",
1234 "workingset_refault_file",
1235 "workingset_activate_anon",
1236 "workingset_activate_file",
1237 "workingset_restore_anon",
1238 "workingset_restore_file",
1e6b1085 1239 "workingset_nodereclaim",
50658e2e
MG
1240 "nr_anon_pages",
1241 "nr_mapped",
11fb9989
MG
1242 "nr_file_pages",
1243 "nr_dirty",
1244 "nr_writeback",
1245 "nr_writeback_temp",
1246 "nr_shmem",
1247 "nr_shmem_hugepages",
1248 "nr_shmem_pmdmapped",
60fbf0ab
SL
1249 "nr_file_hugepages",
1250 "nr_file_pmdmapped",
11fb9989 1251 "nr_anon_transparent_hugepages",
c4a25635
MG
1252 "nr_vmscan_write",
1253 "nr_vmscan_immediate_reclaim",
1254 "nr_dirtied",
1255 "nr_written",
8cd7c588 1256 "nr_throttled_written",
b29940c1 1257 "nr_kernel_misc_reclaimable",
1970dc6f
JH
1258 "nr_foll_pin_acquired",
1259 "nr_foll_pin_released",
991e7673
SB
1260 "nr_kernel_stack",
1261#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
1262 "nr_shadow_call_stack",
1263#endif
f0c0c115 1264 "nr_page_table_pages",
ebc97a52 1265 "nr_sec_page_table_pages",
bd3520a9
PT
1266#ifdef CONFIG_IOMMU_SUPPORT
1267 "nr_iommu_pages",
1268#endif
b6038942
SB
1269#ifdef CONFIG_SWAP
1270 "nr_swapcached",
1271#endif
e39bb6be
HY
1272#ifdef CONFIG_NUMA_BALANCING
1273 "pgpromote_success",
c6833e10 1274 "pgpromote_candidate",
b805ab3c 1275#endif
23e9f013
LZ
1276 "pgdemote_kswapd",
1277 "pgdemote_direct",
1278 "pgdemote_khugepaged",
e452872b 1279 "pgdemote_proactive",
05d4532b
JH
1280#ifdef CONFIG_HUGETLB_PAGE
1281 "nr_hugetlb",
1282#endif
835de376 1283 "nr_balloon_pages",
f4cb78af 1284 /* system-wide enum vm_stat_item counters */
fa25c503
KM
1285 "nr_dirty_threshold",
1286 "nr_dirty_background_threshold",
9d857311
PT
1287 "nr_memmap_pages",
1288 "nr_memmap_boot_pages",
fa25c503 1289
ebc5d83d 1290#if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG)
09316c09 1291 /* enum vm_event_item counters */
fa25c503
KM
1292 "pgpgin",
1293 "pgpgout",
1294 "pswpin",
1295 "pswpout",
1296
1297 TEXTS_FOR_ZONES("pgalloc")
7cc30fcf
MG
1298 TEXTS_FOR_ZONES("allocstall")
1299 TEXTS_FOR_ZONES("pgskip")
fa25c503
KM
1300
1301 "pgfree",
1302 "pgactivate",
1303 "pgdeactivate",
f7ad2a6c 1304 "pglazyfree",
fa25c503
KM
1305
1306 "pgfault",
1307 "pgmajfault",
854e9ed0 1308 "pglazyfreed",
fa25c503 1309
599d0c95 1310 "pgrefill",
798a6b87 1311 "pgreuse",
599d0c95
MG
1312 "pgsteal_kswapd",
1313 "pgsteal_direct",
57e9cc50 1314 "pgsteal_khugepaged",
e452872b 1315 "pgsteal_proactive",
599d0c95
MG
1316 "pgscan_kswapd",
1317 "pgscan_direct",
57e9cc50 1318 "pgscan_khugepaged",
e452872b 1319 "pgscan_proactive",
68243e76 1320 "pgscan_direct_throttle",
497a6c1b
JW
1321 "pgscan_anon",
1322 "pgscan_file",
1323 "pgsteal_anon",
1324 "pgsteal_file",
fa25c503
KM
1325
1326#ifdef CONFIG_NUMA
5fe690a5 1327 "zone_reclaim_success",
fa25c503
KM
1328 "zone_reclaim_failed",
1329#endif
1330 "pginodesteal",
1331 "slabs_scanned",
fa25c503
KM
1332 "kswapd_inodesteal",
1333 "kswapd_low_wmark_hit_quickly",
1334 "kswapd_high_wmark_hit_quickly",
fa25c503 1335 "pageoutrun",
fa25c503
KM
1336
1337 "pgrotated",
1338
5509a5d2
DH
1339 "drop_pagecache",
1340 "drop_slab",
8e675f7a 1341 "oom_kill",
5509a5d2 1342
03c5a6e1
MG
1343#ifdef CONFIG_NUMA_BALANCING
1344 "numa_pte_updates",
72403b4a 1345 "numa_huge_pte_updates",
03c5a6e1
MG
1346 "numa_hint_faults",
1347 "numa_hint_faults_local",
1348 "numa_pages_migrated",
ad6b26b6
CY
1349 "numa_task_migrated",
1350 "numa_task_swapped",
03c5a6e1 1351#endif
5647bc29
MG
1352#ifdef CONFIG_MIGRATION
1353 "pgmigrate_success",
1354 "pgmigrate_fail",
1a5bae25
AK
1355 "thp_migration_success",
1356 "thp_migration_fail",
1357 "thp_migration_split",
5647bc29 1358#endif
fa25c503 1359#ifdef CONFIG_COMPACTION
397487db
MG
1360 "compact_migrate_scanned",
1361 "compact_free_scanned",
1362 "compact_isolated",
fa25c503
KM
1363 "compact_stall",
1364 "compact_fail",
1365 "compact_success",
698b1b30 1366 "compact_daemon_wake",
7f354a54
DR
1367 "compact_daemon_migrate_scanned",
1368 "compact_daemon_free_scanned",
fa25c503
KM
1369#endif
1370
1371#ifdef CONFIG_HUGETLB_PAGE
1372 "htlb_buddy_alloc_success",
1373 "htlb_buddy_alloc_fail",
bbb26920
MK
1374#endif
1375#ifdef CONFIG_CMA
1376 "cma_alloc_success",
1377 "cma_alloc_fail",
fa25c503
KM
1378#endif
1379 "unevictable_pgs_culled",
1380 "unevictable_pgs_scanned",
1381 "unevictable_pgs_rescued",
1382 "unevictable_pgs_mlocked",
1383 "unevictable_pgs_munlocked",
1384 "unevictable_pgs_cleared",
1385 "unevictable_pgs_stranded",
fa25c503
KM
1386
1387#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1388 "thp_fault_alloc",
1389 "thp_fault_fallback",
85b9f46e 1390 "thp_fault_fallback_charge",
fa25c503
KM
1391 "thp_collapse_alloc",
1392 "thp_collapse_alloc_failed",
95ecedcd 1393 "thp_file_alloc",
dcdf11ee 1394 "thp_file_fallback",
85b9f46e 1395 "thp_file_fallback_charge",
95ecedcd 1396 "thp_file_mapped",
122afea9
KS
1397 "thp_split_page",
1398 "thp_split_page_failed",
f9719a03 1399 "thp_deferred_split_page",
dafff3f4 1400 "thp_underused_split_page",
122afea9 1401 "thp_split_pmd",
e9ea874a
YY
1402 "thp_scan_exceed_none_pte",
1403 "thp_scan_exceed_swap_pte",
1404 "thp_scan_exceed_share_pte",
ce9311cf
YX
1405#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
1406 "thp_split_pud",
1407#endif
d8a8e1f0
KS
1408 "thp_zero_page_alloc",
1409 "thp_zero_page_alloc_failed",
225311a4 1410 "thp_swpout",
fe490cc0 1411 "thp_swpout_fallback",
fa25c503 1412#endif
09316c09
KK
1413#ifdef CONFIG_MEMORY_BALLOON
1414 "balloon_inflate",
1415 "balloon_deflate",
1416#ifdef CONFIG_BALLOON_COMPACTION
1417 "balloon_migrate",
1418#endif
1419#endif /* CONFIG_MEMORY_BALLOON */
ec659934 1420#ifdef CONFIG_DEBUG_TLBFLUSH
9824cf97
DH
1421 "nr_tlb_remote_flush",
1422 "nr_tlb_remote_flush_received",
1423 "nr_tlb_local_flush_all",
1424 "nr_tlb_local_flush_one",
ec659934 1425#endif /* CONFIG_DEBUG_TLBFLUSH */
fa25c503 1426
cbc65df2
HY
1427#ifdef CONFIG_SWAP
1428 "swap_ra",
1429 "swap_ra_hit",
e7ac4dae
BS
1430 "swpin_zero",
1431 "swpout_zero",
4d45c3af
YY
1432#ifdef CONFIG_KSM
1433 "ksm_swpin_copy",
1434#endif
cbc65df2 1435#endif
94bfe85b
YY
1436#ifdef CONFIG_KSM
1437 "cow_ksm",
1438#endif
f6498b77
JW
1439#ifdef CONFIG_ZSWAP
1440 "zswpin",
1441 "zswpout",
7108cc3f 1442 "zswpwb",
f6498b77 1443#endif
575299ea
S
1444#ifdef CONFIG_X86
1445 "direct_map_level2_splits",
1446 "direct_map_level3_splits",
41d88484
KS
1447 "direct_map_level2_collapses",
1448 "direct_map_level3_collapses",
575299ea 1449#endif
52f23865
SB
1450#ifdef CONFIG_PER_VMA_LOCK_STATS
1451 "vma_lock_success",
1452 "vma_lock_abort",
1453 "vma_lock_retry",
1454 "vma_lock_miss",
1455#endif
c4a6fce8
PT
1456#ifdef CONFIG_DEBUG_STACK_USAGE
1457 "kstack_1k",
1458#if THREAD_SIZE > 1024
1459 "kstack_2k",
1460#endif
1461#if THREAD_SIZE > 2048
1462 "kstack_4k",
1463#endif
1464#if THREAD_SIZE > 4096
1465 "kstack_8k",
1466#endif
1467#if THREAD_SIZE > 8192
1468 "kstack_16k",
1469#endif
1470#if THREAD_SIZE > 16384
1471 "kstack_32k",
1472#endif
1473#if THREAD_SIZE > 32768
1474 "kstack_64k",
1475#endif
1476#if THREAD_SIZE > 65536
1477 "kstack_rest",
1478#endif
1479#endif
ebc5d83d 1480#endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */
fa25c503 1481};
ebc5d83d 1482#endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA || CONFIG_MEMCG */
fa25c503 1483
3c486871
AM
1484#if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \
1485 defined(CONFIG_PROC_FS)
1486static void *frag_start(struct seq_file *m, loff_t *pos)
1487{
1488 pg_data_t *pgdat;
1489 loff_t node = *pos;
1490
1491 for (pgdat = first_online_pgdat();
1492 pgdat && node;
1493 pgdat = next_online_pgdat(pgdat))
1494 --node;
1495
1496 return pgdat;
1497}
1498
1499static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
1500{
1501 pg_data_t *pgdat = (pg_data_t *)arg;
1502
1503 (*pos)++;
1504 return next_online_pgdat(pgdat);
1505}
1506
1507static void frag_stop(struct seq_file *m, void *arg)
1508{
1509}
1510
b2bd8598
DR
1511/*
1512 * Walk zones in a node and print using a callback.
1513 * If @assert_populated is true, only use callback for zones that are populated.
1514 */
3c486871 1515static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
727c080f 1516 bool assert_populated, bool nolock,
3c486871
AM
1517 void (*print)(struct seq_file *m, pg_data_t *, struct zone *))
1518{
1519 struct zone *zone;
1520 struct zone *node_zones = pgdat->node_zones;
1521 unsigned long flags;
1522
1523 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
b2bd8598 1524 if (assert_populated && !populated_zone(zone))
3c486871
AM
1525 continue;
1526
727c080f
VM
1527 if (!nolock)
1528 spin_lock_irqsave(&zone->lock, flags);
3c486871 1529 print(m, pgdat, zone);
727c080f
VM
1530 if (!nolock)
1531 spin_unlock_irqrestore(&zone->lock, flags);
3c486871
AM
1532 }
1533}
1534#endif
1535
d7a5752c 1536#ifdef CONFIG_PROC_FS
467c996c
MG
1537static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
1538 struct zone *zone)
1539{
1540 int order;
1541
1542 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
fd377218 1543 for (order = 0; order < NR_PAGE_ORDERS; ++order)
af1c31ac
LS
1544 /*
1545 * Access to nr_free is lockless as nr_free is used only for
1546 * printing purposes. Use data_race to avoid KCSAN warning.
1547 */
1548 seq_printf(m, "%6lu ", data_race(zone->free_area[order].nr_free));
467c996c
MG
1549 seq_putc(m, '\n');
1550}
1551
1552/*
1553 * This walks the free areas for each zone.
1554 */
1555static int frag_show(struct seq_file *m, void *arg)
1556{
1557 pg_data_t *pgdat = (pg_data_t *)arg;
727c080f 1558 walk_zones_in_node(m, pgdat, true, false, frag_show_print);
467c996c
MG
1559 return 0;
1560}
1561
1562static void pagetypeinfo_showfree_print(struct seq_file *m,
1563 pg_data_t *pgdat, struct zone *zone)
1564{
1565 int order, mtype;
1566
1567 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) {
1568 seq_printf(m, "Node %4d, zone %8s, type %12s ",
1569 pgdat->node_id,
1570 zone->name,
1571 migratetype_names[mtype]);
fd377218 1572 for (order = 0; order < NR_PAGE_ORDERS; ++order) {
467c996c
MG
1573 unsigned long freecount = 0;
1574 struct free_area *area;
1575 struct list_head *curr;
93b3a674 1576 bool overflow = false;
467c996c
MG
1577
1578 area = &(zone->free_area[order]);
1579
93b3a674
MH
1580 list_for_each(curr, &area->free_list[mtype]) {
1581 /*
1582 * Cap the free_list iteration because it might
1583 * be really large and we are under a spinlock
1584 * so a long time spent here could trigger a
1585 * hard lockup detector. Anyway this is a
1586 * debugging tool so knowing there is a handful
1587 * of pages of this order should be more than
1588 * sufficient.
1589 */
1590 if (++freecount >= 100000) {
1591 overflow = true;
1592 break;
1593 }
1594 }
1595 seq_printf(m, "%s%6lu ", overflow ? ">" : "", freecount);
1596 spin_unlock_irq(&zone->lock);
1597 cond_resched();
1598 spin_lock_irq(&zone->lock);
467c996c 1599 }
f6ac2354
CL
1600 seq_putc(m, '\n');
1601 }
467c996c
MG
1602}
1603
1604/* Print out the free pages at each order for each migatetype */
33090af9 1605static void pagetypeinfo_showfree(struct seq_file *m, void *arg)
467c996c
MG
1606{
1607 int order;
1608 pg_data_t *pgdat = (pg_data_t *)arg;
1609
1610 /* Print header */
1611 seq_printf(m, "%-43s ", "Free pages count per migrate type at order");
fd377218 1612 for (order = 0; order < NR_PAGE_ORDERS; ++order)
467c996c
MG
1613 seq_printf(m, "%6d ", order);
1614 seq_putc(m, '\n');
1615
727c080f 1616 walk_zones_in_node(m, pgdat, true, false, pagetypeinfo_showfree_print);
467c996c
MG
1617}
1618
1619static void pagetypeinfo_showblockcount_print(struct seq_file *m,
1620 pg_data_t *pgdat, struct zone *zone)
1621{
1622 int mtype;
1623 unsigned long pfn;
1624 unsigned long start_pfn = zone->zone_start_pfn;
108bcc96 1625 unsigned long end_pfn = zone_end_pfn(zone);
467c996c
MG
1626 unsigned long count[MIGRATE_TYPES] = { 0, };
1627
1628 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
1629 struct page *page;
1630
d336e94e
MH
1631 page = pfn_to_online_page(pfn);
1632 if (!page)
467c996c
MG
1633 continue;
1634
a91c43c7
JK
1635 if (page_zone(page) != zone)
1636 continue;
1637
467c996c
MG
1638 mtype = get_pageblock_migratetype(page);
1639
e80d6a24
MG
1640 if (mtype < MIGRATE_TYPES)
1641 count[mtype]++;
467c996c
MG
1642 }
1643
1644 /* Print counts */
1645 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1646 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1647 seq_printf(m, "%12lu ", count[mtype]);
1648 seq_putc(m, '\n');
1649}
1650
f113e641 1651/* Print out the number of pageblocks for each migratetype */
33090af9 1652static void pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
467c996c
MG
1653{
1654 int mtype;
1655 pg_data_t *pgdat = (pg_data_t *)arg;
1656
1657 seq_printf(m, "\n%-23s", "Number of blocks type ");
1658 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1659 seq_printf(m, "%12s ", migratetype_names[mtype]);
1660 seq_putc(m, '\n');
727c080f
VM
1661 walk_zones_in_node(m, pgdat, true, false,
1662 pagetypeinfo_showblockcount_print);
467c996c
MG
1663}
1664
48c96a36
JK
1665/*
1666 * Print out the number of pageblocks for each migratetype that contain pages
1667 * of other types. This gives an indication of how well fallbacks are being
1668 * contained by rmqueue_fallback(). It requires information from PAGE_OWNER
1669 * to determine what is going on
1670 */
1671static void pagetypeinfo_showmixedcount(struct seq_file *m, pg_data_t *pgdat)
1672{
1673#ifdef CONFIG_PAGE_OWNER
1674 int mtype;
1675
7dd80b8a 1676 if (!static_branch_unlikely(&page_owner_inited))
48c96a36
JK
1677 return;
1678
1679 drain_all_pages(NULL);
1680
1681 seq_printf(m, "\n%-23s", "Number of mixed blocks ");
1682 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1683 seq_printf(m, "%12s ", migratetype_names[mtype]);
1684 seq_putc(m, '\n');
1685
727c080f
VM
1686 walk_zones_in_node(m, pgdat, true, true,
1687 pagetypeinfo_showmixedcount_print);
48c96a36
JK
1688#endif /* CONFIG_PAGE_OWNER */
1689}
1690
467c996c
MG
1691/*
1692 * This prints out statistics in relation to grouping pages by mobility.
1693 * It is expensive to collect so do not constantly read the file.
1694 */
1695static int pagetypeinfo_show(struct seq_file *m, void *arg)
1696{
1697 pg_data_t *pgdat = (pg_data_t *)arg;
1698
41b25a37 1699 /* check memoryless node */
a47b53c5 1700 if (!node_state(pgdat->node_id, N_MEMORY))
41b25a37
KM
1701 return 0;
1702
467c996c
MG
1703 seq_printf(m, "Page block order: %d\n", pageblock_order);
1704 seq_printf(m, "Pages per block: %lu\n", pageblock_nr_pages);
1705 seq_putc(m, '\n');
1706 pagetypeinfo_showfree(m, pgdat);
1707 pagetypeinfo_showblockcount(m, pgdat);
48c96a36 1708 pagetypeinfo_showmixedcount(m, pgdat);
467c996c 1709
f6ac2354
CL
1710 return 0;
1711}
1712
8f32f7e5 1713static const struct seq_operations fragmentation_op = {
f6ac2354
CL
1714 .start = frag_start,
1715 .next = frag_next,
1716 .stop = frag_stop,
1717 .show = frag_show,
1718};
1719
74e2e8e8 1720static const struct seq_operations pagetypeinfo_op = {
467c996c
MG
1721 .start = frag_start,
1722 .next = frag_next,
1723 .stop = frag_stop,
1724 .show = pagetypeinfo_show,
1725};
1726
e2ecc8a7
MG
1727static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone)
1728{
1729 int zid;
1730
1731 for (zid = 0; zid < MAX_NR_ZONES; zid++) {
1732 struct zone *compare = &pgdat->node_zones[zid];
1733
1734 if (populated_zone(compare))
1735 return zone == compare;
1736 }
1737
e2ecc8a7
MG
1738 return false;
1739}
1740
467c996c
MG
1741static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
1742 struct zone *zone)
f6ac2354 1743{
467c996c
MG
1744 int i;
1745 seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
e2ecc8a7
MG
1746 if (is_zone_first_populated(pgdat, zone)) {
1747 seq_printf(m, "\n per-node stats");
1748 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
69473e5d
MS
1749 unsigned long pages = node_page_state_pages(pgdat, i);
1750
1751 if (vmstat_item_print_in_thp(i))
1752 pages /= HPAGE_PMD_NR;
9d7ea9a2 1753 seq_printf(m, "\n %-12s %lu", node_stat_name(i),
69473e5d 1754 pages);
e2ecc8a7
MG
1755 }
1756 }
467c996c
MG
1757 seq_printf(m,
1758 "\n pages free %lu"
a6ea8b5b 1759 "\n boost %lu"
467c996c
MG
1760 "\n min %lu"
1761 "\n low %lu"
1762 "\n high %lu"
528afe6b 1763 "\n promo %lu"
467c996c 1764 "\n spanned %lu"
9feedc9d 1765 "\n present %lu"
3c381db1
DH
1766 "\n managed %lu"
1767 "\n cma %lu",
88f5acf8 1768 zone_page_state(zone, NR_FREE_PAGES),
a6ea8b5b 1769 zone->watermark_boost,
41858966
MG
1770 min_wmark_pages(zone),
1771 low_wmark_pages(zone),
1772 high_wmark_pages(zone),
528afe6b 1773 promo_wmark_pages(zone),
467c996c 1774 zone->spanned_pages,
9feedc9d 1775 zone->present_pages,
3c381db1
DH
1776 zone_managed_pages(zone),
1777 zone_cma_pages(zone));
467c996c 1778
467c996c 1779 seq_printf(m,
3484b2de 1780 "\n protection: (%ld",
467c996c
MG
1781 zone->lowmem_reserve[0]);
1782 for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
3484b2de 1783 seq_printf(m, ", %ld", zone->lowmem_reserve[i]);
7dfb8bf3
DR
1784 seq_putc(m, ')');
1785
a8a4b7ae
BH
1786 /* If unpopulated, no other information is useful */
1787 if (!populated_zone(zone)) {
1788 seq_putc(m, '\n');
1789 return;
1790 }
1791
7dfb8bf3 1792 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
9d7ea9a2
KK
1793 seq_printf(m, "\n %-12s %lu", zone_stat_name(i),
1794 zone_page_state(zone, i));
7dfb8bf3 1795
3a321d2a 1796#ifdef CONFIG_NUMA
2ea80b03 1797 fold_vm_zone_numa_events(zone);
f19298b9 1798 for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++)
9d7ea9a2 1799 seq_printf(m, "\n %-12s %lu", numa_stat_name(i),
f19298b9 1800 zone_numa_event_state(zone, i));
3a321d2a
KW
1801#endif
1802
7dfb8bf3 1803 seq_printf(m, "\n pagesets");
467c996c 1804 for_each_online_cpu(i) {
28f836b6
MG
1805 struct per_cpu_pages *pcp;
1806 struct per_cpu_zonestat __maybe_unused *pzstats;
467c996c 1807
28f836b6 1808 pcp = per_cpu_ptr(zone->per_cpu_pageset, i);
3dfa5721
CL
1809 seq_printf(m,
1810 "\n cpu: %i"
f8780515
MS
1811 "\n count: %i"
1812 "\n high: %i"
1813 "\n batch: %i"
1814 "\n high_min: %i"
1815 "\n high_max: %i",
3dfa5721 1816 i,
28f836b6
MG
1817 pcp->count,
1818 pcp->high,
f8780515
MS
1819 pcp->batch,
1820 pcp->high_min,
1821 pcp->high_max);
df9ecaba 1822#ifdef CONFIG_SMP
28f836b6 1823 pzstats = per_cpu_ptr(zone->per_cpu_zonestats, i);
467c996c 1824 seq_printf(m, "\n vm stats threshold: %d",
28f836b6 1825 pzstats->stat_threshold);
df9ecaba 1826#endif
f6ac2354 1827 }
467c996c 1828 seq_printf(m,
599d0c95 1829 "\n node_unreclaimable: %u"
3a50d14d 1830 "\n start_pfn: %lu",
c73322d0 1831 pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES,
3a50d14d 1832 zone->zone_start_pfn);
467c996c
MG
1833 seq_putc(m, '\n');
1834}
1835
1836/*
b2bd8598
DR
1837 * Output information about zones in @pgdat. All zones are printed regardless
1838 * of whether they are populated or not: lowmem_reserve_ratio operates on the
1839 * set of all zones and userspace would not be aware of such zones if they are
1840 * suppressed here (zoneinfo displays the effect of lowmem_reserve_ratio).
467c996c
MG
1841 */
1842static int zoneinfo_show(struct seq_file *m, void *arg)
1843{
1844 pg_data_t *pgdat = (pg_data_t *)arg;
727c080f 1845 walk_zones_in_node(m, pgdat, false, false, zoneinfo_show_print);
f6ac2354
CL
1846 return 0;
1847}
1848
5c9fe628 1849static const struct seq_operations zoneinfo_op = {
f6ac2354
CL
1850 .start = frag_start, /* iterate over all zones. The same as in
1851 * fragmentation. */
1852 .next = frag_next,
1853 .stop = frag_stop,
1854 .show = zoneinfo_show,
1855};
1856
9d7ea9a2 1857#define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEMS + \
f19298b9 1858 NR_VM_NUMA_EVENT_ITEMS + \
9d7ea9a2 1859 NR_VM_NODE_STAT_ITEMS + \
f4cb78af 1860 NR_VM_STAT_ITEMS + \
9d7ea9a2
KK
1861 (IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \
1862 NR_VM_EVENT_ITEMS : 0))
79da826a 1863
f6ac2354
CL
1864static void *vmstat_start(struct seq_file *m, loff_t *pos)
1865{
2244b95a 1866 unsigned long *v;
9d7ea9a2 1867 int i;
f6ac2354 1868
9d7ea9a2 1869 if (*pos >= NR_VMSTAT_ITEMS)
f6ac2354 1870 return NULL;
79da826a 1871
9d7ea9a2 1872 BUILD_BUG_ON(ARRAY_SIZE(vmstat_text) < NR_VMSTAT_ITEMS);
f19298b9 1873 fold_vm_numa_events();
9d7ea9a2 1874 v = kmalloc_array(NR_VMSTAT_ITEMS, sizeof(unsigned long), GFP_KERNEL);
2244b95a
CL
1875 m->private = v;
1876 if (!v)
f6ac2354 1877 return ERR_PTR(-ENOMEM);
2244b95a 1878 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
c41f012a 1879 v[i] = global_zone_page_state(i);
79da826a
MR
1880 v += NR_VM_ZONE_STAT_ITEMS;
1881
3a321d2a 1882#ifdef CONFIG_NUMA
f19298b9
MG
1883 for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++)
1884 v[i] = global_numa_event_state(i);
1885 v += NR_VM_NUMA_EVENT_ITEMS;
3a321d2a
KW
1886#endif
1887
69473e5d 1888 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
ea426c2a 1889 v[i] = global_node_page_state_pages(i);
69473e5d
MS
1890 if (vmstat_item_print_in_thp(i))
1891 v[i] /= HPAGE_PMD_NR;
1892 }
75ef7184
MG
1893 v += NR_VM_NODE_STAT_ITEMS;
1894
79da826a
MR
1895 global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD,
1896 v + NR_DIRTY_THRESHOLD);
9d857311
PT
1897 v[NR_MEMMAP_PAGES] = atomic_long_read(&nr_memmap_pages);
1898 v[NR_MEMMAP_BOOT_PAGES] = atomic_long_read(&nr_memmap_boot_pages);
f4cb78af 1899 v += NR_VM_STAT_ITEMS;
79da826a 1900
f8891e5e 1901#ifdef CONFIG_VM_EVENT_COUNTERS
79da826a
MR
1902 all_vm_events(v);
1903 v[PGPGIN] /= 2; /* sectors -> kbytes */
1904 v[PGPGOUT] /= 2;
f8891e5e 1905#endif
ff8b16d7 1906 return (unsigned long *)m->private + *pos;
f6ac2354
CL
1907}
1908
1909static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
1910{
1911 (*pos)++;
9d7ea9a2 1912 if (*pos >= NR_VMSTAT_ITEMS)
f6ac2354
CL
1913 return NULL;
1914 return (unsigned long *)m->private + *pos;
1915}
1916
1917static int vmstat_show(struct seq_file *m, void *arg)
1918{
1919 unsigned long *l = arg;
1920 unsigned long off = l - (unsigned long *)m->private;
68ba0326
AD
1921
1922 seq_puts(m, vmstat_text[off]);
75ba1d07 1923 seq_put_decimal_ull(m, " ", *l);
68ba0326 1924 seq_putc(m, '\n');
8d92890b
N
1925
1926 if (off == NR_VMSTAT_ITEMS - 1) {
1927 /*
1928 * We've come to the end - add any deprecated counters to avoid
1929 * breaking userspace which might depend on them being present.
1930 */
1931 seq_puts(m, "nr_unstable 0\n");
1932 }
f6ac2354
CL
1933 return 0;
1934}
1935
1936static void vmstat_stop(struct seq_file *m, void *arg)
1937{
1938 kfree(m->private);
1939 m->private = NULL;
1940}
1941
b6aa44ab 1942static const struct seq_operations vmstat_op = {
f6ac2354
CL
1943 .start = vmstat_start,
1944 .next = vmstat_next,
1945 .stop = vmstat_stop,
1946 .show = vmstat_show,
1947};
f6ac2354
CL
1948#endif /* CONFIG_PROC_FS */
1949
df9ecaba 1950#ifdef CONFIG_SMP
d1187ed2 1951static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
b8974b89 1952static int sysctl_stat_interval __read_mostly = HZ;
f69c2e4d 1953static int vmstat_late_init_done;
d1187ed2 1954
52b6f46b
HD
1955#ifdef CONFIG_PROC_FS
1956static void refresh_vm_stats(struct work_struct *work)
1957{
1958 refresh_cpu_vm_stats(true);
1959}
1960
b8974b89 1961static int vmstat_refresh(const struct ctl_table *table, int write,
32927393 1962 void *buffer, size_t *lenp, loff_t *ppos)
52b6f46b
HD
1963{
1964 long val;
1965 int err;
1966 int i;
1967
1968 /*
1969 * The regular update, every sysctl_stat_interval, may come later
1970 * than expected: leaving a significant amount in per_cpu buckets.
1971 * This is particularly misleading when checking a quantity of HUGE
1972 * pages, immediately after running a test. /proc/sys/vm/stat_refresh,
1973 * which can equally be echo'ed to or cat'ted from (by root),
1974 * can be used to update the stats just before reading them.
1975 *
c41f012a 1976 * Oh, and since global_zone_page_state() etc. are so careful to hide
52b6f46b
HD
1977 * transiently negative values, report an error here if any of
1978 * the stats is negative, so we know to go looking for imbalance.
1979 */
1980 err = schedule_on_each_cpu(refresh_vm_stats);
1981 if (err)
1982 return err;
1983 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
75083aae
HD
1984 /*
1985 * Skip checking stats known to go negative occasionally.
1986 */
1987 switch (i) {
1988 case NR_ZONE_WRITE_PENDING:
1989 case NR_FREE_CMA_PAGES:
1990 continue;
1991 }
75ef7184 1992 val = atomic_long_read(&vm_zone_stat[i]);
52b6f46b 1993 if (val < 0) {
c822f622 1994 pr_warn("%s: %s %ld\n",
9d7ea9a2 1995 __func__, zone_stat_name(i), val);
52b6f46b
HD
1996 }
1997 }
76d8cc3c 1998 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
75083aae
HD
1999 /*
2000 * Skip checking stats known to go negative occasionally.
2001 */
2002 switch (i) {
2003 case NR_WRITEBACK:
2004 continue;
2005 }
76d8cc3c
HD
2006 val = atomic_long_read(&vm_node_stat[i]);
2007 if (val < 0) {
2008 pr_warn("%s: %s %ld\n",
2009 __func__, node_stat_name(i), val);
76d8cc3c
HD
2010 }
2011 }
52b6f46b
HD
2012 if (write)
2013 *ppos += *lenp;
2014 else
2015 *lenp = 0;
2016 return 0;
2017}
2018#endif /* CONFIG_PROC_FS */
2019
d1187ed2
CL
2020static void vmstat_update(struct work_struct *w)
2021{
0eb77e98 2022 if (refresh_cpu_vm_stats(true)) {
7cc36bbd
CL
2023 /*
2024 * Counters were updated so we expect more updates
2025 * to occur in the future. Keep on running the
2026 * update worker thread.
2027 */
ce612879 2028 queue_delayed_work_on(smp_processor_id(), mm_percpu_wq,
f01f17d3
MH
2029 this_cpu_ptr(&vmstat_work),
2030 round_jiffies_relative(sysctl_stat_interval));
7cc36bbd
CL
2031 }
2032}
2033
2034/*
2035 * Check if the diffs for a certain cpu indicate that
2036 * an update is needed.
2037 */
2038static bool need_update(int cpu)
2039{
2bbd00ae 2040 pg_data_t *last_pgdat = NULL;
7cc36bbd
CL
2041 struct zone *zone;
2042
2043 for_each_populated_zone(zone) {
28f836b6 2044 struct per_cpu_zonestat *pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
2bbd00ae 2045 struct per_cpu_nodestat *n;
28f836b6 2046
7cc36bbd
CL
2047 /*
2048 * The fast way of checking if there are any vmstat diffs.
7cc36bbd 2049 */
64632fd3 2050 if (memchr_inv(pzstats->vm_stat_diff, 0, sizeof(pzstats->vm_stat_diff)))
7cc36bbd 2051 return true;
f19298b9 2052
2bbd00ae
JW
2053 if (last_pgdat == zone->zone_pgdat)
2054 continue;
2055 last_pgdat = zone->zone_pgdat;
2056 n = per_cpu_ptr(zone->zone_pgdat->per_cpu_nodestats, cpu);
64632fd3
ML
2057 if (memchr_inv(n->vm_node_stat_diff, 0, sizeof(n->vm_node_stat_diff)))
2058 return true;
7cc36bbd
CL
2059 }
2060 return false;
2061}
2062
7b8da4c7
CL
2063/*
2064 * Switch off vmstat processing and then fold all the remaining differentials
2065 * until the diffs stay at zero. The function is used by NOHZ and can only be
2066 * invoked when tick processing is not active.
2067 */
f01f17d3
MH
2068void quiet_vmstat(void)
2069{
2070 if (system_state != SYSTEM_RUNNING)
2071 return;
2072
7b8da4c7 2073 if (!delayed_work_pending(this_cpu_ptr(&vmstat_work)))
f01f17d3
MH
2074 return;
2075
2076 if (!need_update(smp_processor_id()))
2077 return;
2078
2079 /*
2080 * Just refresh counters and do not care about the pending delayed
2081 * vmstat_update. It doesn't fire that often to matter and canceling
2082 * it would be too expensive from this path.
2083 * vmstat_shepherd will take care about that for us.
2084 */
2085 refresh_cpu_vm_stats(false);
2086}
2087
7cc36bbd
CL
2088/*
2089 * Shepherd worker thread that checks the
2090 * differentials of processors that have their worker
2091 * threads for vm statistics updates disabled because of
2092 * inactivity.
2093 */
2094static void vmstat_shepherd(struct work_struct *w);
2095
0eb77e98 2096static DECLARE_DEFERRABLE_WORK(shepherd, vmstat_shepherd);
7cc36bbd
CL
2097
2098static void vmstat_shepherd(struct work_struct *w)
2099{
2100 int cpu;
2101
7625eccd 2102 cpus_read_lock();
7cc36bbd 2103 /* Check processors whose vmstat worker threads have been disabled */
7b8da4c7 2104 for_each_online_cpu(cpu) {
f01f17d3 2105 struct delayed_work *dw = &per_cpu(vmstat_work, cpu);
7cc36bbd 2106
be5e015d
MT
2107 /*
2108 * In kernel users of vmstat counters either require the precise value and
2109 * they are using zone_page_state_snapshot interface or they can live with
2110 * an imprecision as the regular flushing can happen at arbitrary time and
2111 * cumulative error can grow (see calculate_normal_threshold).
2112 *
2113 * From that POV the regular flushing can be postponed for CPUs that have
2114 * been isolated from the kernel interference without critical
2115 * infrastructure ever noticing. Skip regular flushing from vmstat_shepherd
2116 * for all isolated CPUs to avoid interference with the isolated workload.
2117 */
2118 if (cpu_is_isolated(cpu))
2119 continue;
2120
7b8da4c7 2121 if (!delayed_work_pending(dw) && need_update(cpu))
ce612879 2122 queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0);
fbcc8183
JB
2123
2124 cond_resched();
f01f17d3 2125 }
7625eccd 2126 cpus_read_unlock();
7cc36bbd
CL
2127
2128 schedule_delayed_work(&shepherd,
98f4ebb2 2129 round_jiffies_relative(sysctl_stat_interval));
d1187ed2
CL
2130}
2131
7cc36bbd 2132static void __init start_shepherd_timer(void)
d1187ed2 2133{
7cc36bbd
CL
2134 int cpu;
2135
9fd8fcf1 2136 for_each_possible_cpu(cpu) {
ccde8bd4 2137 INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
7cc36bbd
CL
2138 vmstat_update);
2139
9fd8fcf1
KD
2140 /*
2141 * For secondary CPUs during CPU hotplug scenarios,
2142 * vmstat_cpu_online() will enable the work.
2143 * mm/vmstat:online enables and disables vmstat_work
2144 * symmetrically during CPU hotplug events.
2145 */
2146 if (!cpu_online(cpu))
2147 disable_delayed_work_sync(&per_cpu(vmstat_work, cpu));
2148 }
2149
7cc36bbd
CL
2150 schedule_delayed_work(&shepherd,
2151 round_jiffies_relative(sysctl_stat_interval));
d1187ed2
CL
2152}
2153
03e86dba
TC
2154static void __init init_cpu_node_state(void)
2155{
4c501327 2156 int node;
03e86dba 2157
4c501327 2158 for_each_online_node(node) {
b55032f1 2159 if (!cpumask_empty(cpumask_of_node(node)))
4c501327
SAS
2160 node_set_state(node, N_CPU);
2161 }
03e86dba
TC
2162}
2163
5438da97
SAS
2164static int vmstat_cpu_online(unsigned int cpu)
2165{
f69c2e4d
SS
2166 if (vmstat_late_init_done)
2167 refresh_zone_stat_thresholds();
734c1570
OS
2168
2169 if (!node_state(cpu_to_node(cpu), N_CPU)) {
2170 node_set_state(cpu_to_node(cpu), N_CPU);
734c1570 2171 }
9fd8fcf1 2172 enable_delayed_work(&per_cpu(vmstat_work, cpu));
734c1570 2173
5438da97
SAS
2174 return 0;
2175}
2176
2177static int vmstat_cpu_down_prep(unsigned int cpu)
2178{
9fd8fcf1 2179 disable_delayed_work_sync(&per_cpu(vmstat_work, cpu));
5438da97
SAS
2180 return 0;
2181}
2182
2183static int vmstat_cpu_dead(unsigned int cpu)
807a1bd2 2184{
4c501327 2185 const struct cpumask *node_cpus;
5438da97 2186 int node;
807a1bd2 2187
5438da97
SAS
2188 node = cpu_to_node(cpu);
2189
2190 refresh_zone_stat_thresholds();
4c501327 2191 node_cpus = cpumask_of_node(node);
b55032f1 2192 if (!cpumask_empty(node_cpus))
5438da97 2193 return 0;
807a1bd2
TK
2194
2195 node_clear_state(node, N_CPU);
734c1570 2196
5438da97 2197 return 0;
807a1bd2
TK
2198}
2199
f69c2e4d
SS
2200static int __init vmstat_late_init(void)
2201{
2202 refresh_zone_stat_thresholds();
2203 vmstat_late_init_done = 1;
2204
2205 return 0;
2206}
2207late_initcall(vmstat_late_init);
8f32f7e5 2208#endif
df9ecaba 2209
b8974b89
KY
2210#ifdef CONFIG_PROC_FS
2211static const struct ctl_table vmstat_table[] = {
2212#ifdef CONFIG_SMP
2213 {
2214 .procname = "stat_interval",
2215 .data = &sysctl_stat_interval,
2216 .maxlen = sizeof(sysctl_stat_interval),
2217 .mode = 0644,
2218 .proc_handler = proc_dointvec_jiffies,
2219 },
2220 {
2221 .procname = "stat_refresh",
2222 .data = NULL,
2223 .maxlen = 0,
2224 .mode = 0600,
2225 .proc_handler = vmstat_refresh,
2226 },
2227#endif
2228#ifdef CONFIG_NUMA
2229 {
2230 .procname = "numa_stat",
2231 .data = &sysctl_vm_numa_stat,
2232 .maxlen = sizeof(int),
2233 .mode = 0644,
2234 .proc_handler = sysctl_vm_numa_stat_handler,
2235 .extra1 = SYSCTL_ZERO,
2236 .extra2 = SYSCTL_ONE,
2237 },
2238#endif
2239};
2240#endif
2241
ce612879
MH
2242struct workqueue_struct *mm_percpu_wq;
2243
597b7305 2244void __init init_mm_internals(void)
df9ecaba 2245{
ce612879 2246 int ret __maybe_unused;
5438da97 2247
80d136e1 2248 mm_percpu_wq = alloc_workqueue("mm_percpu_wq", WQ_MEM_RECLAIM, 0);
ce612879
MH
2249
2250#ifdef CONFIG_SMP
5438da97
SAS
2251 ret = cpuhp_setup_state_nocalls(CPUHP_MM_VMSTAT_DEAD, "mm/vmstat:dead",
2252 NULL, vmstat_cpu_dead);
2253 if (ret < 0)
2254 pr_err("vmstat: failed to register 'dead' hotplug state\n");
2255
2256 ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "mm/vmstat:online",
2257 vmstat_cpu_online,
2258 vmstat_cpu_down_prep);
2259 if (ret < 0)
2260 pr_err("vmstat: failed to register 'online' hotplug state\n");
2261
7625eccd 2262 cpus_read_lock();
03e86dba 2263 init_cpu_node_state();
7625eccd 2264 cpus_read_unlock();
d1187ed2 2265
7cc36bbd 2266 start_shepherd_timer();
8f32f7e5
AD
2267#endif
2268#ifdef CONFIG_PROC_FS
fddda2b7 2269 proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op);
abaed011 2270 proc_create_seq("pagetypeinfo", 0400, NULL, &pagetypeinfo_op);
fddda2b7
CH
2271 proc_create_seq("vmstat", 0444, NULL, &vmstat_op);
2272 proc_create_seq("zoneinfo", 0444, NULL, &zoneinfo_op);
b8974b89 2273 register_sysctl_init("vm", vmstat_table);
8f32f7e5 2274#endif
df9ecaba 2275}
d7a5752c
MG
2276
2277#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)
d7a5752c
MG
2278
2279/*
2280 * Return an index indicating how much of the available free memory is
2281 * unusable for an allocation of the requested size.
2282 */
2283static int unusable_free_index(unsigned int order,
2284 struct contig_page_info *info)
2285{
2286 /* No free memory is interpreted as all free memory is unusable */
2287 if (info->free_pages == 0)
2288 return 1000;
2289
2290 /*
2291 * Index should be a value between 0 and 1. Return a value to 3
2292 * decimal places.
2293 *
2294 * 0 => no fragmentation
2295 * 1 => high fragmentation
2296 */
2297 return div_u64((info->free_pages - (info->free_blocks_suitable << order)) * 1000ULL, info->free_pages);
2298
2299}
2300
2301static void unusable_show_print(struct seq_file *m,
2302 pg_data_t *pgdat, struct zone *zone)
2303{
2304 unsigned int order;
2305 int index;
2306 struct contig_page_info info;
2307
2308 seq_printf(m, "Node %d, zone %8s ",
2309 pgdat->node_id,
2310 zone->name);
fd377218 2311 for (order = 0; order < NR_PAGE_ORDERS; ++order) {
d7a5752c
MG
2312 fill_contig_page_info(zone, order, &info);
2313 index = unusable_free_index(order, &info);
2314 seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
2315 }
2316
2317 seq_putc(m, '\n');
2318}
2319
2320/*
2321 * Display unusable free space index
2322 *
2323 * The unusable free space index measures how much of the available free
2324 * memory cannot be used to satisfy an allocation of a given size and is a
2325 * value between 0 and 1. The higher the value, the more of free memory is
2326 * unusable and by implication, the worse the external fragmentation is. This
2327 * can be expressed as a percentage by multiplying by 100.
2328 */
2329static int unusable_show(struct seq_file *m, void *arg)
2330{
2331 pg_data_t *pgdat = (pg_data_t *)arg;
2332
2333 /* check memoryless node */
a47b53c5 2334 if (!node_state(pgdat->node_id, N_MEMORY))
d7a5752c
MG
2335 return 0;
2336
727c080f 2337 walk_zones_in_node(m, pgdat, true, false, unusable_show_print);
d7a5752c
MG
2338
2339 return 0;
2340}
2341
01a99560 2342static const struct seq_operations unusable_sops = {
d7a5752c
MG
2343 .start = frag_start,
2344 .next = frag_next,
2345 .stop = frag_stop,
2346 .show = unusable_show,
2347};
2348
01a99560 2349DEFINE_SEQ_ATTRIBUTE(unusable);
d7a5752c 2350
f1a5ab12
MG
2351static void extfrag_show_print(struct seq_file *m,
2352 pg_data_t *pgdat, struct zone *zone)
2353{
2354 unsigned int order;
2355 int index;
2356
2357 /* Alloc on stack as interrupts are disabled for zone walk */
2358 struct contig_page_info info;
2359
2360 seq_printf(m, "Node %d, zone %8s ",
2361 pgdat->node_id,
2362 zone->name);
fd377218 2363 for (order = 0; order < NR_PAGE_ORDERS; ++order) {
f1a5ab12 2364 fill_contig_page_info(zone, order, &info);
56de7263 2365 index = __fragmentation_index(order, &info);
a9970586 2366 seq_printf(m, "%2d.%03d ", index / 1000, index % 1000);
f1a5ab12
MG
2367 }
2368
2369 seq_putc(m, '\n');
2370}
2371
2372/*
2373 * Display fragmentation index for orders that allocations would fail for
2374 */
2375static int extfrag_show(struct seq_file *m, void *arg)
2376{
2377 pg_data_t *pgdat = (pg_data_t *)arg;
2378
727c080f 2379 walk_zones_in_node(m, pgdat, true, false, extfrag_show_print);
f1a5ab12
MG
2380
2381 return 0;
2382}
2383
01a99560 2384static const struct seq_operations extfrag_sops = {
f1a5ab12
MG
2385 .start = frag_start,
2386 .next = frag_next,
2387 .stop = frag_stop,
2388 .show = extfrag_show,
2389};
2390
01a99560 2391DEFINE_SEQ_ATTRIBUTE(extfrag);
f1a5ab12 2392
d7a5752c
MG
2393static int __init extfrag_debug_init(void)
2394{
bde8bd8a
S
2395 struct dentry *extfrag_debug_root;
2396
d7a5752c 2397 extfrag_debug_root = debugfs_create_dir("extfrag", NULL);
d7a5752c 2398
d9f7979c 2399 debugfs_create_file("unusable_index", 0444, extfrag_debug_root, NULL,
01a99560 2400 &unusable_fops);
d7a5752c 2401
d9f7979c 2402 debugfs_create_file("extfrag_index", 0444, extfrag_debug_root, NULL,
01a99560 2403 &extfrag_fops);
f1a5ab12 2404
d7a5752c
MG
2405 return 0;
2406}
2407
2408module_init(extfrag_debug_init);
15995a35 2409
d7a5752c 2410#endif