Merge tag 'mm-hotfixes-stable-2025-07-11-16-16' of git://git.kernel.org/pub/scm/linux...
[linux-block.git] / mm / vmstat.c
CommitLineData
457c8996 1// SPDX-License-Identifier: GPL-2.0-only
f6ac2354
CL
2/*
3 * linux/mm/vmstat.c
4 *
5 * Manages VM statistics
6 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
2244b95a
CL
7 *
8 * zoned VM statistics
9 * Copyright (C) 2006 Silicon Graphics, Inc.,
786d5cc2 10 * Christoph Lameter <cl@gentwo.org>
7cc36bbd 11 * Copyright (C) 2008-2014 Christoph Lameter
f6ac2354 12 */
8f32f7e5 13#include <linux/fs.h>
f6ac2354 14#include <linux/mm.h>
4e950f6f 15#include <linux/err.h>
2244b95a 16#include <linux/module.h>
5a0e3ad6 17#include <linux/slab.h>
df9ecaba 18#include <linux/cpu.h>
7cc36bbd 19#include <linux/cpumask.h>
c748e134 20#include <linux/vmstat.h>
3c486871
AM
21#include <linux/proc_fs.h>
22#include <linux/seq_file.h>
23#include <linux/debugfs.h>
e8edc6e0 24#include <linux/sched.h>
f1a5ab12 25#include <linux/math64.h>
79da826a 26#include <linux/writeback.h>
36deb0be 27#include <linux/compaction.h>
6e543d57 28#include <linux/mm_inline.h>
48c96a36 29#include <linux/page_owner.h>
be5e015d 30#include <linux/sched/isolation.h>
6e543d57
LD
31
32#include "internal.h"
f6ac2354 33
b8974b89 34#ifdef CONFIG_PROC_FS
4518085e 35#ifdef CONFIG_NUMA
b8974b89
KY
36#define ENABLE_NUMA_STAT 1
37static int sysctl_vm_numa_stat = ENABLE_NUMA_STAT;
4518085e
KW
38
39/* zero numa counters within a zone */
40static void zero_zone_numa_counters(struct zone *zone)
41{
42 int item, cpu;
43
f19298b9
MG
44 for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++) {
45 atomic_long_set(&zone->vm_numa_event[item], 0);
46 for_each_online_cpu(cpu) {
47 per_cpu_ptr(zone->per_cpu_zonestats, cpu)->vm_numa_event[item]
4518085e 48 = 0;
f19298b9 49 }
4518085e
KW
50 }
51}
52
53/* zero numa counters of all the populated zones */
54static void zero_zones_numa_counters(void)
55{
56 struct zone *zone;
57
58 for_each_populated_zone(zone)
59 zero_zone_numa_counters(zone);
60}
61
62/* zero global numa counters */
63static void zero_global_numa_counters(void)
64{
65 int item;
66
f19298b9
MG
67 for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++)
68 atomic_long_set(&vm_numa_event[item], 0);
4518085e
KW
69}
70
71static void invalid_numa_statistics(void)
72{
73 zero_zones_numa_counters();
74 zero_global_numa_counters();
75}
76
77static DEFINE_MUTEX(vm_numa_stat_lock);
78
b8974b89 79static int sysctl_vm_numa_stat_handler(const struct ctl_table *table, int write,
32927393 80 void *buffer, size_t *length, loff_t *ppos)
4518085e
KW
81{
82 int ret, oldval;
83
84 mutex_lock(&vm_numa_stat_lock);
85 if (write)
86 oldval = sysctl_vm_numa_stat;
87 ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
88 if (ret || !write)
89 goto out;
90
91 if (oldval == sysctl_vm_numa_stat)
92 goto out;
93 else if (sysctl_vm_numa_stat == ENABLE_NUMA_STAT) {
94 static_branch_enable(&vm_numa_stat_key);
95 pr_info("enable numa statistics\n");
96 } else {
97 static_branch_disable(&vm_numa_stat_key);
98 invalid_numa_statistics();
99 pr_info("disable numa statistics, and clear numa counters\n");
100 }
101
102out:
103 mutex_unlock(&vm_numa_stat_lock);
104 return ret;
105}
106#endif
b8974b89 107#endif /* CONFIG_PROC_FS */
4518085e 108
f8891e5e
CL
109#ifdef CONFIG_VM_EVENT_COUNTERS
110DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
111EXPORT_PER_CPU_SYMBOL(vm_event_states);
112
31f961a8 113static void sum_vm_events(unsigned long *ret)
f8891e5e 114{
9eccf2a8 115 int cpu;
f8891e5e
CL
116 int i;
117
118 memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
119
31f961a8 120 for_each_online_cpu(cpu) {
f8891e5e
CL
121 struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
122
f8891e5e
CL
123 for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
124 ret[i] += this->event[i];
125 }
126}
127
128/*
129 * Accumulate the vm event counters across all CPUs.
130 * The result is unavoidably approximate - it can change
131 * during and after execution of this function.
132*/
133void all_vm_events(unsigned long *ret)
134{
7625eccd 135 cpus_read_lock();
31f961a8 136 sum_vm_events(ret);
7625eccd 137 cpus_read_unlock();
f8891e5e 138}
32dd66fc 139EXPORT_SYMBOL_GPL(all_vm_events);
f8891e5e 140
f8891e5e
CL
141/*
142 * Fold the foreign cpu events into our own.
143 *
144 * This is adding to the events on one processor
145 * but keeps the global counts constant.
146 */
147void vm_events_fold_cpu(int cpu)
148{
149 struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu);
150 int i;
151
152 for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
153 count_vm_events(i, fold_state->event[i]);
154 fold_state->event[i] = 0;
155 }
156}
f8891e5e
CL
157
158#endif /* CONFIG_VM_EVENT_COUNTERS */
159
2244b95a
CL
160/*
161 * Manage combined zone based / global counters
162 *
163 * vm_stat contains the global counters
164 */
75ef7184
MG
165atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
166atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp;
f19298b9 167atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS] __cacheline_aligned_in_smp;
75ef7184
MG
168EXPORT_SYMBOL(vm_zone_stat);
169EXPORT_SYMBOL(vm_node_stat);
2244b95a 170
ebeac3ea
GU
171#ifdef CONFIG_NUMA
172static void fold_vm_zone_numa_events(struct zone *zone)
173{
174 unsigned long zone_numa_events[NR_VM_NUMA_EVENT_ITEMS] = { 0, };
175 int cpu;
176 enum numa_stat_item item;
177
178 for_each_online_cpu(cpu) {
179 struct per_cpu_zonestat *pzstats;
180
181 pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
182 for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++)
183 zone_numa_events[item] += xchg(&pzstats->vm_numa_event[item], 0);
184 }
185
186 for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++)
187 zone_numa_event_add(zone_numa_events[item], zone, item);
188}
189
190void fold_vm_numa_events(void)
191{
192 struct zone *zone;
193
194 for_each_populated_zone(zone)
195 fold_vm_zone_numa_events(zone);
196}
197#endif
198
2244b95a
CL
199#ifdef CONFIG_SMP
200
b44129b3 201int calculate_pressure_threshold(struct zone *zone)
88f5acf8
MG
202{
203 int threshold;
204 int watermark_distance;
205
206 /*
207 * As vmstats are not up to date, there is drift between the estimated
208 * and real values. For high thresholds and a high number of CPUs, it
209 * is possible for the min watermark to be breached while the estimated
210 * value looks fine. The pressure threshold is a reduced value such
211 * that even the maximum amount of drift will not accidentally breach
212 * the min watermark
213 */
214 watermark_distance = low_wmark_pages(zone) - min_wmark_pages(zone);
215 threshold = max(1, (int)(watermark_distance / num_online_cpus()));
216
217 /*
218 * Maximum threshold is 125
219 */
220 threshold = min(125, threshold);
221
222 return threshold;
223}
224
b44129b3 225int calculate_normal_threshold(struct zone *zone)
df9ecaba
CL
226{
227 int threshold;
228 int mem; /* memory in 128 MB units */
229
230 /*
231 * The threshold scales with the number of processors and the amount
232 * of memory per zone. More memory means that we can defer updates for
233 * longer, more processors could lead to more contention.
234 * fls() is used to have a cheap way of logarithmic scaling.
235 *
236 * Some sample thresholds:
237 *
ea15ba17 238 * Threshold Processors (fls) Zonesize fls(mem)+1
df9ecaba
CL
239 * ------------------------------------------------------------------
240 * 8 1 1 0.9-1 GB 4
241 * 16 2 2 0.9-1 GB 4
242 * 20 2 2 1-2 GB 5
243 * 24 2 2 2-4 GB 6
244 * 28 2 2 4-8 GB 7
245 * 32 2 2 8-16 GB 8
246 * 4 2 2 <128M 1
247 * 30 4 3 2-4 GB 5
248 * 48 4 3 8-16 GB 8
249 * 32 8 4 1-2 GB 4
250 * 32 8 4 0.9-1GB 4
251 * 10 16 5 <128M 1
252 * 40 16 5 900M 4
253 * 70 64 7 2-4 GB 5
254 * 84 64 7 4-8 GB 6
255 * 108 512 9 4-8 GB 6
256 * 125 1024 10 8-16 GB 8
257 * 125 1024 10 16-32 GB 9
258 */
259
9705bea5 260 mem = zone_managed_pages(zone) >> (27 - PAGE_SHIFT);
df9ecaba
CL
261
262 threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem));
263
264 /*
265 * Maximum threshold is 125
266 */
267 threshold = min(125, threshold);
268
269 return threshold;
270}
2244b95a
CL
271
272/*
df9ecaba 273 * Refresh the thresholds for each zone.
2244b95a 274 */
a6cccdc3 275void refresh_zone_stat_thresholds(void)
2244b95a 276{
75ef7184 277 struct pglist_data *pgdat;
df9ecaba
CL
278 struct zone *zone;
279 int cpu;
280 int threshold;
281
75ef7184
MG
282 /* Zero current pgdat thresholds */
283 for_each_online_pgdat(pgdat) {
284 for_each_online_cpu(cpu) {
285 per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold = 0;
286 }
287 }
288
ee99c71c 289 for_each_populated_zone(zone) {
75ef7184 290 struct pglist_data *pgdat = zone->zone_pgdat;
aa454840
CL
291 unsigned long max_drift, tolerate_drift;
292
b44129b3 293 threshold = calculate_normal_threshold(zone);
df9ecaba 294
75ef7184
MG
295 for_each_online_cpu(cpu) {
296 int pgdat_threshold;
297
28f836b6 298 per_cpu_ptr(zone->per_cpu_zonestats, cpu)->stat_threshold
99dcc3e5 299 = threshold;
1d90ca89 300
75ef7184
MG
301 /* Base nodestat threshold on the largest populated zone. */
302 pgdat_threshold = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold;
303 per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold
304 = max(threshold, pgdat_threshold);
305 }
306
aa454840
CL
307 /*
308 * Only set percpu_drift_mark if there is a danger that
309 * NR_FREE_PAGES reports the low watermark is ok when in fact
310 * the min watermark could be breached by an allocation
311 */
312 tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone);
313 max_drift = num_online_cpus() * threshold;
314 if (max_drift > tolerate_drift)
315 zone->percpu_drift_mark = high_wmark_pages(zone) +
316 max_drift;
df9ecaba 317 }
2244b95a
CL
318}
319
b44129b3
MG
320void set_pgdat_percpu_threshold(pg_data_t *pgdat,
321 int (*calculate_pressure)(struct zone *))
88f5acf8
MG
322{
323 struct zone *zone;
324 int cpu;
325 int threshold;
326 int i;
327
88f5acf8
MG
328 for (i = 0; i < pgdat->nr_zones; i++) {
329 zone = &pgdat->node_zones[i];
330 if (!zone->percpu_drift_mark)
331 continue;
332
b44129b3 333 threshold = (*calculate_pressure)(zone);
1d90ca89 334 for_each_online_cpu(cpu)
28f836b6 335 per_cpu_ptr(zone->per_cpu_zonestats, cpu)->stat_threshold
88f5acf8
MG
336 = threshold;
337 }
88f5acf8
MG
338}
339
2244b95a 340/*
bea04b07
JZ
341 * For use when we know that interrupts are disabled,
342 * or when we know that preemption is disabled and that
343 * particular counter cannot be updated from interrupt context.
2244b95a
CL
344 */
345void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
6cdb18ad 346 long delta)
2244b95a 347{
28f836b6 348 struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
12938a92 349 s8 __percpu *p = pcp->vm_stat_diff + item;
2244b95a 350 long x;
12938a92
CL
351 long t;
352
c68ed794
IM
353 /*
354 * Accurate vmstat updates require a RMW. On !PREEMPT_RT kernels,
355 * atomicity is provided by IRQs being disabled -- either explicitly
356 * or via local_lock_irq. On PREEMPT_RT, local_lock_irq only disables
357 * CPU migrations and preemption potentially corrupts a counter so
358 * disable preemption.
359 */
7a025e91 360 preempt_disable_nested();
c68ed794 361
12938a92 362 x = delta + __this_cpu_read(*p);
2244b95a 363
12938a92 364 t = __this_cpu_read(pcp->stat_threshold);
2244b95a 365
40610076 366 if (unlikely(abs(x) > t)) {
2244b95a
CL
367 zone_page_state_add(x, zone, item);
368 x = 0;
369 }
12938a92 370 __this_cpu_write(*p, x);
c68ed794 371
7a025e91 372 preempt_enable_nested();
2244b95a
CL
373}
374EXPORT_SYMBOL(__mod_zone_page_state);
375
75ef7184
MG
376void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
377 long delta)
378{
379 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
380 s8 __percpu *p = pcp->vm_node_stat_diff + item;
381 long x;
382 long t;
383
ea426c2a 384 if (vmstat_item_in_bytes(item)) {
629484ae
JW
385 /*
386 * Only cgroups use subpage accounting right now; at
387 * the global level, these items still change in
388 * multiples of whole pages. Store them as pages
389 * internally to keep the per-cpu counters compact.
390 */
ea426c2a
RG
391 VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1));
392 delta >>= PAGE_SHIFT;
393 }
394
c68ed794 395 /* See __mod_node_page_state */
7a025e91 396 preempt_disable_nested();
c68ed794 397
75ef7184
MG
398 x = delta + __this_cpu_read(*p);
399
400 t = __this_cpu_read(pcp->stat_threshold);
401
40610076 402 if (unlikely(abs(x) > t)) {
75ef7184
MG
403 node_page_state_add(x, pgdat, item);
404 x = 0;
405 }
406 __this_cpu_write(*p, x);
c68ed794 407
7a025e91 408 preempt_enable_nested();
75ef7184
MG
409}
410EXPORT_SYMBOL(__mod_node_page_state);
411
2244b95a
CL
412/*
413 * Optimized increment and decrement functions.
414 *
415 * These are only for a single page and therefore can take a struct page *
416 * argument instead of struct zone *. This allows the inclusion of the code
417 * generated for page_zone(page) into the optimized functions.
418 *
419 * No overflow check is necessary and therefore the differential can be
420 * incremented or decremented in place which may allow the compilers to
421 * generate better code.
2244b95a
CL
422 * The increment or decrement is known and therefore one boundary check can
423 * be omitted.
424 *
df9ecaba
CL
425 * NOTE: These functions are very performance sensitive. Change only
426 * with care.
427 *
2244b95a
CL
428 * Some processors have inc/dec instructions that are atomic vs an interrupt.
429 * However, the code must first determine the differential location in a zone
430 * based on the processor number and then inc/dec the counter. There is no
431 * guarantee without disabling preemption that the processor will not change
432 * in between and therefore the atomicity vs. interrupt cannot be exploited
433 * in a useful way here.
434 */
c8785385 435void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
2244b95a 436{
28f836b6 437 struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
12938a92
CL
438 s8 __percpu *p = pcp->vm_stat_diff + item;
439 s8 v, t;
2244b95a 440
c68ed794 441 /* See __mod_node_page_state */
7a025e91 442 preempt_disable_nested();
c68ed794 443
908ee0f1 444 v = __this_cpu_inc_return(*p);
12938a92
CL
445 t = __this_cpu_read(pcp->stat_threshold);
446 if (unlikely(v > t)) {
447 s8 overstep = t >> 1;
df9ecaba 448
12938a92
CL
449 zone_page_state_add(v + overstep, zone, item);
450 __this_cpu_write(*p, -overstep);
2244b95a 451 }
c68ed794 452
7a025e91 453 preempt_enable_nested();
2244b95a 454}
ca889e6c 455
75ef7184
MG
456void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
457{
458 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
459 s8 __percpu *p = pcp->vm_node_stat_diff + item;
460 s8 v, t;
461
ea426c2a
RG
462 VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
463
c68ed794 464 /* See __mod_node_page_state */
7a025e91 465 preempt_disable_nested();
c68ed794 466
75ef7184
MG
467 v = __this_cpu_inc_return(*p);
468 t = __this_cpu_read(pcp->stat_threshold);
469 if (unlikely(v > t)) {
470 s8 overstep = t >> 1;
471
472 node_page_state_add(v + overstep, pgdat, item);
473 __this_cpu_write(*p, -overstep);
474 }
c68ed794 475
7a025e91 476 preempt_enable_nested();
75ef7184
MG
477}
478
ca889e6c
CL
479void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
480{
481 __inc_zone_state(page_zone(page), item);
482}
2244b95a
CL
483EXPORT_SYMBOL(__inc_zone_page_state);
484
75ef7184
MG
485void __inc_node_page_state(struct page *page, enum node_stat_item item)
486{
487 __inc_node_state(page_pgdat(page), item);
488}
489EXPORT_SYMBOL(__inc_node_page_state);
490
c8785385 491void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
2244b95a 492{
28f836b6 493 struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
12938a92
CL
494 s8 __percpu *p = pcp->vm_stat_diff + item;
495 s8 v, t;
2244b95a 496
c68ed794 497 /* See __mod_node_page_state */
7a025e91 498 preempt_disable_nested();
c68ed794 499
908ee0f1 500 v = __this_cpu_dec_return(*p);
12938a92
CL
501 t = __this_cpu_read(pcp->stat_threshold);
502 if (unlikely(v < - t)) {
503 s8 overstep = t >> 1;
2244b95a 504
12938a92
CL
505 zone_page_state_add(v - overstep, zone, item);
506 __this_cpu_write(*p, overstep);
2244b95a 507 }
c68ed794 508
7a025e91 509 preempt_enable_nested();
2244b95a 510}
c8785385 511
75ef7184
MG
512void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
513{
514 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
515 s8 __percpu *p = pcp->vm_node_stat_diff + item;
516 s8 v, t;
517
ea426c2a
RG
518 VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
519
c68ed794 520 /* See __mod_node_page_state */
7a025e91 521 preempt_disable_nested();
c68ed794 522
75ef7184
MG
523 v = __this_cpu_dec_return(*p);
524 t = __this_cpu_read(pcp->stat_threshold);
525 if (unlikely(v < - t)) {
526 s8 overstep = t >> 1;
527
528 node_page_state_add(v - overstep, pgdat, item);
529 __this_cpu_write(*p, overstep);
530 }
c68ed794 531
7a025e91 532 preempt_enable_nested();
75ef7184
MG
533}
534
c8785385
CL
535void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
536{
537 __dec_zone_state(page_zone(page), item);
538}
2244b95a
CL
539EXPORT_SYMBOL(__dec_zone_page_state);
540
75ef7184
MG
541void __dec_node_page_state(struct page *page, enum node_stat_item item)
542{
543 __dec_node_state(page_pgdat(page), item);
544}
545EXPORT_SYMBOL(__dec_node_page_state);
546
4156153c 547#ifdef CONFIG_HAVE_CMPXCHG_LOCAL
7c839120
CL
548/*
549 * If we have cmpxchg_local support then we do not need to incur the overhead
550 * that comes with local_irq_save/restore if we use this_cpu_cmpxchg.
551 *
552 * mod_state() modifies the zone counter state through atomic per cpu
553 * operations.
554 *
555 * Overstep mode specifies how overstep should handled:
556 * 0 No overstepping
557 * 1 Overstepping half of threshold
558 * -1 Overstepping minus half of threshold
559*/
75ef7184
MG
560static inline void mod_zone_state(struct zone *zone,
561 enum zone_stat_item item, long delta, int overstep_mode)
7c839120 562{
28f836b6 563 struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
7c839120 564 s8 __percpu *p = pcp->vm_stat_diff + item;
77cd8148
UB
565 long n, t, z;
566 s8 o;
7c839120 567
77cd8148 568 o = this_cpu_read(*p);
7c839120
CL
569 do {
570 z = 0; /* overflow to zone counters */
571
572 /*
573 * The fetching of the stat_threshold is racy. We may apply
574 * a counter threshold to the wrong the cpu if we get
d3bc2367
CL
575 * rescheduled while executing here. However, the next
576 * counter update will apply the threshold again and
577 * therefore bring the counter under the threshold again.
578 *
579 * Most of the time the thresholds are the same anyways
580 * for all cpus in a zone.
7c839120
CL
581 */
582 t = this_cpu_read(pcp->stat_threshold);
583
77cd8148 584 n = delta + (long)o;
7c839120 585
40610076 586 if (abs(n) > t) {
7c839120
CL
587 int os = overstep_mode * (t >> 1) ;
588
589 /* Overflow must be added to zone counters */
590 z = n + os;
591 n = -os;
592 }
77cd8148 593 } while (!this_cpu_try_cmpxchg(*p, &o, n));
7c839120
CL
594
595 if (z)
596 zone_page_state_add(z, zone, item);
597}
598
599void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
6cdb18ad 600 long delta)
7c839120 601{
75ef7184 602 mod_zone_state(zone, item, delta, 0);
7c839120
CL
603}
604EXPORT_SYMBOL(mod_zone_page_state);
605
7c839120
CL
606void inc_zone_page_state(struct page *page, enum zone_stat_item item)
607{
75ef7184 608 mod_zone_state(page_zone(page), item, 1, 1);
7c839120
CL
609}
610EXPORT_SYMBOL(inc_zone_page_state);
611
612void dec_zone_page_state(struct page *page, enum zone_stat_item item)
613{
75ef7184 614 mod_zone_state(page_zone(page), item, -1, -1);
7c839120
CL
615}
616EXPORT_SYMBOL(dec_zone_page_state);
75ef7184
MG
617
618static inline void mod_node_state(struct pglist_data *pgdat,
619 enum node_stat_item item, int delta, int overstep_mode)
620{
621 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
622 s8 __percpu *p = pcp->vm_node_stat_diff + item;
77cd8148
UB
623 long n, t, z;
624 s8 o;
75ef7184 625
ea426c2a 626 if (vmstat_item_in_bytes(item)) {
629484ae
JW
627 /*
628 * Only cgroups use subpage accounting right now; at
629 * the global level, these items still change in
630 * multiples of whole pages. Store them as pages
631 * internally to keep the per-cpu counters compact.
632 */
ea426c2a
RG
633 VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1));
634 delta >>= PAGE_SHIFT;
635 }
636
77cd8148 637 o = this_cpu_read(*p);
75ef7184
MG
638 do {
639 z = 0; /* overflow to node counters */
640
641 /*
642 * The fetching of the stat_threshold is racy. We may apply
643 * a counter threshold to the wrong the cpu if we get
644 * rescheduled while executing here. However, the next
645 * counter update will apply the threshold again and
646 * therefore bring the counter under the threshold again.
647 *
648 * Most of the time the thresholds are the same anyways
649 * for all cpus in a node.
650 */
651 t = this_cpu_read(pcp->stat_threshold);
652
77cd8148 653 n = delta + (long)o;
75ef7184 654
40610076 655 if (abs(n) > t) {
75ef7184
MG
656 int os = overstep_mode * (t >> 1) ;
657
658 /* Overflow must be added to node counters */
659 z = n + os;
660 n = -os;
661 }
77cd8148 662 } while (!this_cpu_try_cmpxchg(*p, &o, n));
75ef7184
MG
663
664 if (z)
665 node_page_state_add(z, pgdat, item);
666}
667
668void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
669 long delta)
670{
671 mod_node_state(pgdat, item, delta, 0);
672}
673EXPORT_SYMBOL(mod_node_page_state);
674
675void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
676{
677 mod_node_state(pgdat, item, 1, 1);
678}
679
680void inc_node_page_state(struct page *page, enum node_stat_item item)
681{
682 mod_node_state(page_pgdat(page), item, 1, 1);
683}
684EXPORT_SYMBOL(inc_node_page_state);
685
686void dec_node_page_state(struct page *page, enum node_stat_item item)
687{
688 mod_node_state(page_pgdat(page), item, -1, -1);
689}
690EXPORT_SYMBOL(dec_node_page_state);
7c839120
CL
691#else
692/*
693 * Use interrupt disable to serialize counter updates
694 */
695void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
6cdb18ad 696 long delta)
7c839120
CL
697{
698 unsigned long flags;
699
700 local_irq_save(flags);
701 __mod_zone_page_state(zone, item, delta);
702 local_irq_restore(flags);
703}
704EXPORT_SYMBOL(mod_zone_page_state);
705
2244b95a
CL
706void inc_zone_page_state(struct page *page, enum zone_stat_item item)
707{
708 unsigned long flags;
709 struct zone *zone;
2244b95a
CL
710
711 zone = page_zone(page);
712 local_irq_save(flags);
ca889e6c 713 __inc_zone_state(zone, item);
2244b95a
CL
714 local_irq_restore(flags);
715}
716EXPORT_SYMBOL(inc_zone_page_state);
717
718void dec_zone_page_state(struct page *page, enum zone_stat_item item)
719{
720 unsigned long flags;
2244b95a 721
2244b95a 722 local_irq_save(flags);
a302eb4e 723 __dec_zone_page_state(page, item);
2244b95a
CL
724 local_irq_restore(flags);
725}
726EXPORT_SYMBOL(dec_zone_page_state);
727
75ef7184
MG
728void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
729{
730 unsigned long flags;
731
732 local_irq_save(flags);
733 __inc_node_state(pgdat, item);
734 local_irq_restore(flags);
735}
736EXPORT_SYMBOL(inc_node_state);
737
738void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
739 long delta)
740{
741 unsigned long flags;
742
743 local_irq_save(flags);
744 __mod_node_page_state(pgdat, item, delta);
745 local_irq_restore(flags);
746}
747EXPORT_SYMBOL(mod_node_page_state);
748
749void inc_node_page_state(struct page *page, enum node_stat_item item)
750{
751 unsigned long flags;
752 struct pglist_data *pgdat;
753
754 pgdat = page_pgdat(page);
755 local_irq_save(flags);
756 __inc_node_state(pgdat, item);
757 local_irq_restore(flags);
758}
759EXPORT_SYMBOL(inc_node_page_state);
760
761void dec_node_page_state(struct page *page, enum node_stat_item item)
762{
763 unsigned long flags;
764
765 local_irq_save(flags);
766 __dec_node_page_state(page, item);
767 local_irq_restore(flags);
768}
769EXPORT_SYMBOL(dec_node_page_state);
770#endif
7cc36bbd
CL
771
772/*
773 * Fold a differential into the global counters.
774 * Returns the number of counters updated.
775 */
f19298b9 776static int fold_diff(int *zone_diff, int *node_diff)
3a321d2a
KW
777{
778 int i;
779 int changes = 0;
780
781 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
782 if (zone_diff[i]) {
783 atomic_long_add(zone_diff[i], &vm_zone_stat[i]);
784 changes++;
785 }
786
3a321d2a
KW
787 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
788 if (node_diff[i]) {
789 atomic_long_add(node_diff[i], &vm_node_stat[i]);
790 changes++;
791 }
792 return changes;
793}
f19298b9 794
2244b95a 795/*
2bb921e5 796 * Update the zone counters for the current cpu.
a7f75e25 797 *
4037d452
CL
798 * Note that refresh_cpu_vm_stats strives to only access
799 * node local memory. The per cpu pagesets on remote zones are placed
800 * in the memory local to the processor using that pageset. So the
801 * loop over all zones will access a series of cachelines local to
802 * the processor.
803 *
804 * The call to zone_page_state_add updates the cachelines with the
805 * statistics in the remote zone struct as well as the global cachelines
806 * with the global counters. These could cause remote node cache line
807 * bouncing and will have to be only done when necessary.
7cc36bbd
CL
808 *
809 * The function returns the number of global counters updated.
2244b95a 810 */
0eb77e98 811static int refresh_cpu_vm_stats(bool do_pagesets)
2244b95a 812{
75ef7184 813 struct pglist_data *pgdat;
2244b95a
CL
814 struct zone *zone;
815 int i;
75ef7184
MG
816 int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
817 int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
7cc36bbd 818 int changes = 0;
2244b95a 819
ee99c71c 820 for_each_populated_zone(zone) {
28f836b6 821 struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats;
28f836b6 822 struct per_cpu_pages __percpu *pcp = zone->per_cpu_pageset;
2244b95a 823
fbc2edb0
CL
824 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
825 int v;
2244b95a 826
28f836b6 827 v = this_cpu_xchg(pzstats->vm_stat_diff[i], 0);
fbc2edb0 828 if (v) {
a7f75e25 829
a7f75e25 830 atomic_long_add(v, &zone->vm_stat[i]);
75ef7184 831 global_zone_diff[i] += v;
4037d452
CL
832#ifdef CONFIG_NUMA
833 /* 3 seconds idle till flush */
28f836b6 834 __this_cpu_write(pcp->expire, 3);
4037d452 835#endif
2244b95a 836 }
fbc2edb0 837 }
3a321d2a 838
0eb77e98
CL
839 if (do_pagesets) {
840 cond_resched();
51a755c5
HY
841
842 changes += decay_pcp_high(zone, this_cpu_ptr(pcp));
843#ifdef CONFIG_NUMA
0eb77e98
CL
844 /*
845 * Deal with draining the remote pageset of this
846 * processor
847 *
848 * Check if there are pages remaining in this pageset
849 * if not then there is nothing to expire.
850 */
28f836b6
MG
851 if (!__this_cpu_read(pcp->expire) ||
852 !__this_cpu_read(pcp->count))
0eb77e98 853 continue;
4037d452 854
0eb77e98
CL
855 /*
856 * We never drain zones local to this processor.
857 */
858 if (zone_to_nid(zone) == numa_node_id()) {
28f836b6 859 __this_cpu_write(pcp->expire, 0);
0eb77e98
CL
860 continue;
861 }
4037d452 862
fa8c4f9a
HY
863 if (__this_cpu_dec_return(pcp->expire)) {
864 changes++;
0eb77e98 865 continue;
fa8c4f9a 866 }
4037d452 867
28f836b6
MG
868 if (__this_cpu_read(pcp->count)) {
869 drain_zone_pages(zone, this_cpu_ptr(pcp));
0eb77e98
CL
870 changes++;
871 }
4037d452 872#endif
51a755c5 873 }
2244b95a 874 }
75ef7184
MG
875
876 for_each_online_pgdat(pgdat) {
877 struct per_cpu_nodestat __percpu *p = pgdat->per_cpu_nodestats;
878
879 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
880 int v;
881
882 v = this_cpu_xchg(p->vm_node_stat_diff[i], 0);
883 if (v) {
884 atomic_long_add(v, &pgdat->vm_stat[i]);
885 global_node_diff[i] += v;
886 }
887 }
888 }
889
890 changes += fold_diff(global_zone_diff, global_node_diff);
7cc36bbd 891 return changes;
2244b95a
CL
892}
893
2bb921e5
CL
894/*
895 * Fold the data for an offline cpu into the global array.
896 * There cannot be any access by the offline cpu and therefore
897 * synchronization is simplified.
898 */
899void cpu_vm_stats_fold(int cpu)
900{
75ef7184 901 struct pglist_data *pgdat;
2bb921e5
CL
902 struct zone *zone;
903 int i;
75ef7184
MG
904 int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
905 int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
2bb921e5
CL
906
907 for_each_populated_zone(zone) {
28f836b6 908 struct per_cpu_zonestat *pzstats;
2bb921e5 909
28f836b6 910 pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
2bb921e5 911
f19298b9 912 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
28f836b6 913 if (pzstats->vm_stat_diff[i]) {
2bb921e5
CL
914 int v;
915
28f836b6
MG
916 v = pzstats->vm_stat_diff[i];
917 pzstats->vm_stat_diff[i] = 0;
2bb921e5 918 atomic_long_add(v, &zone->vm_stat[i]);
75ef7184 919 global_zone_diff[i] += v;
2bb921e5 920 }
f19298b9 921 }
3a321d2a 922#ifdef CONFIG_NUMA
f19298b9
MG
923 for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) {
924 if (pzstats->vm_numa_event[i]) {
925 unsigned long v;
3a321d2a 926
f19298b9
MG
927 v = pzstats->vm_numa_event[i];
928 pzstats->vm_numa_event[i] = 0;
929 zone_numa_event_add(v, zone, i);
3a321d2a 930 }
f19298b9 931 }
3a321d2a 932#endif
2bb921e5
CL
933 }
934
75ef7184
MG
935 for_each_online_pgdat(pgdat) {
936 struct per_cpu_nodestat *p;
937
938 p = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu);
939
940 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
941 if (p->vm_node_stat_diff[i]) {
942 int v;
943
944 v = p->vm_node_stat_diff[i];
945 p->vm_node_stat_diff[i] = 0;
946 atomic_long_add(v, &pgdat->vm_stat[i]);
947 global_node_diff[i] += v;
948 }
949 }
950
951 fold_diff(global_zone_diff, global_node_diff);
2bb921e5
CL
952}
953
40f4b1ea
CS
954/*
955 * this is only called if !populated_zone(zone), which implies no other users of
f0953a1b 956 * pset->vm_stat_diff[] exist.
40f4b1ea 957 */
28f836b6 958void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *pzstats)
5a883813 959{
f19298b9 960 unsigned long v;
5a883813
MK
961 int i;
962
f19298b9 963 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
28f836b6 964 if (pzstats->vm_stat_diff[i]) {
f19298b9 965 v = pzstats->vm_stat_diff[i];
28f836b6 966 pzstats->vm_stat_diff[i] = 0;
f19298b9 967 zone_page_state_add(v, zone, i);
5a883813 968 }
f19298b9 969 }
3a321d2a
KW
970
971#ifdef CONFIG_NUMA
f19298b9
MG
972 for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) {
973 if (pzstats->vm_numa_event[i]) {
974 v = pzstats->vm_numa_event[i];
975 pzstats->vm_numa_event[i] = 0;
976 zone_numa_event_add(v, zone, i);
3a321d2a 977 }
f19298b9 978 }
3a321d2a 979#endif
5a883813 980}
2244b95a
CL
981#endif
982
ca889e6c 983#ifdef CONFIG_NUMA
c2d42c16 984/*
75ef7184
MG
985 * Determine the per node value of a stat item. This function
986 * is called frequently in a NUMA machine, so try to be as
987 * frugal as possible.
c2d42c16 988 */
75ef7184
MG
989unsigned long sum_zone_node_page_state(int node,
990 enum zone_stat_item item)
c2d42c16
AM
991{
992 struct zone *zones = NODE_DATA(node)->node_zones;
e87d59f7
JK
993 int i;
994 unsigned long count = 0;
c2d42c16 995
e87d59f7
JK
996 for (i = 0; i < MAX_NR_ZONES; i++)
997 count += zone_page_state(zones + i, item);
998
999 return count;
c2d42c16
AM
1000}
1001
f19298b9
MG
1002/* Determine the per node value of a numa stat item. */
1003unsigned long sum_zone_numa_event_state(int node,
3a321d2a
KW
1004 enum numa_stat_item item)
1005{
1006 struct zone *zones = NODE_DATA(node)->node_zones;
3a321d2a 1007 unsigned long count = 0;
f19298b9 1008 int i;
3a321d2a
KW
1009
1010 for (i = 0; i < MAX_NR_ZONES; i++)
f19298b9 1011 count += zone_numa_event_state(zones + i, item);
3a321d2a
KW
1012
1013 return count;
1014}
1015
75ef7184
MG
1016/*
1017 * Determine the per node value of a stat item.
1018 */
ea426c2a
RG
1019unsigned long node_page_state_pages(struct pglist_data *pgdat,
1020 enum node_stat_item item)
75ef7184
MG
1021{
1022 long x = atomic_long_read(&pgdat->vm_stat[item]);
1023#ifdef CONFIG_SMP
1024 if (x < 0)
1025 x = 0;
1026#endif
1027 return x;
1028}
ea426c2a
RG
1029
1030unsigned long node_page_state(struct pglist_data *pgdat,
1031 enum node_stat_item item)
1032{
1033 VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
1034
1035 return node_page_state_pages(pgdat, item);
1036}
ca889e6c
CL
1037#endif
1038
9d857311
PT
1039/*
1040 * Count number of pages "struct page" and "struct page_ext" consume.
1041 * nr_memmap_boot_pages: # of pages allocated by boot allocator
1042 * nr_memmap_pages: # of pages that were allocated by buddy allocator
1043 */
1044static atomic_long_t nr_memmap_boot_pages = ATOMIC_LONG_INIT(0);
1045static atomic_long_t nr_memmap_pages = ATOMIC_LONG_INIT(0);
1046
1047void memmap_boot_pages_add(long delta)
1048{
1049 atomic_long_add(delta, &nr_memmap_boot_pages);
1050}
1051
1052void memmap_pages_add(long delta)
1053{
1054 atomic_long_add(delta, &nr_memmap_pages);
1055}
1056
d7a5752c 1057#ifdef CONFIG_COMPACTION
36deb0be 1058
d7a5752c
MG
1059struct contig_page_info {
1060 unsigned long free_pages;
1061 unsigned long free_blocks_total;
1062 unsigned long free_blocks_suitable;
1063};
1064
1065/*
1066 * Calculate the number of free pages in a zone, how many contiguous
1067 * pages are free and how many are large enough to satisfy an allocation of
1068 * the target size. Note that this function makes no attempt to estimate
1069 * how many suitable free blocks there *might* be if MOVABLE pages were
1070 * migrated. Calculating that is possible, but expensive and can be
1071 * figured out from userspace
1072 */
1073static void fill_contig_page_info(struct zone *zone,
1074 unsigned int suitable_order,
1075 struct contig_page_info *info)
1076{
1077 unsigned int order;
1078
1079 info->free_pages = 0;
1080 info->free_blocks_total = 0;
1081 info->free_blocks_suitable = 0;
1082
fd377218 1083 for (order = 0; order < NR_PAGE_ORDERS; order++) {
d7a5752c
MG
1084 unsigned long blocks;
1085
af1c31ac
LS
1086 /*
1087 * Count number of free blocks.
1088 *
1089 * Access to nr_free is lockless as nr_free is used only for
1090 * diagnostic purposes. Use data_race to avoid KCSAN warning.
1091 */
1092 blocks = data_race(zone->free_area[order].nr_free);
d7a5752c
MG
1093 info->free_blocks_total += blocks;
1094
1095 /* Count free base pages */
1096 info->free_pages += blocks << order;
1097
1098 /* Count the suitable free blocks */
1099 if (order >= suitable_order)
1100 info->free_blocks_suitable += blocks <<
1101 (order - suitable_order);
1102 }
1103}
f1a5ab12
MG
1104
1105/*
1106 * A fragmentation index only makes sense if an allocation of a requested
1107 * size would fail. If that is true, the fragmentation index indicates
1108 * whether external fragmentation or a lack of memory was the problem.
1109 * The value can be used to determine if page reclaim or compaction
1110 * should be used
1111 */
56de7263 1112static int __fragmentation_index(unsigned int order, struct contig_page_info *info)
f1a5ab12
MG
1113{
1114 unsigned long requested = 1UL << order;
1115
5e0a760b 1116 if (WARN_ON_ONCE(order > MAX_PAGE_ORDER))
88d6ac40
WY
1117 return 0;
1118
f1a5ab12
MG
1119 if (!info->free_blocks_total)
1120 return 0;
1121
1122 /* Fragmentation index only makes sense when a request would fail */
1123 if (info->free_blocks_suitable)
1124 return -1000;
1125
1126 /*
1127 * Index is between 0 and 1 so return within 3 decimal places
1128 *
1129 * 0 => allocation would fail due to lack of memory
1130 * 1 => allocation would fail due to fragmentation
1131 */
1132 return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total);
1133}
56de7263 1134
facdaa91
NG
1135/*
1136 * Calculates external fragmentation within a zone wrt the given order.
1137 * It is defined as the percentage of pages found in blocks of size
1138 * less than 1 << order. It returns values in range [0, 100].
1139 */
d34c0a75 1140unsigned int extfrag_for_order(struct zone *zone, unsigned int order)
facdaa91
NG
1141{
1142 struct contig_page_info info;
1143
1144 fill_contig_page_info(zone, order, &info);
1145 if (info.free_pages == 0)
1146 return 0;
1147
1148 return div_u64((info.free_pages -
1149 (info.free_blocks_suitable << order)) * 100,
1150 info.free_pages);
1151}
1152
56de7263
MG
1153/* Same as __fragmentation index but allocs contig_page_info on stack */
1154int fragmentation_index(struct zone *zone, unsigned int order)
1155{
1156 struct contig_page_info info;
1157
1158 fill_contig_page_info(zone, order, &info);
1159 return __fragmentation_index(order, &info);
1160}
d7a5752c
MG
1161#endif
1162
ebc5d83d
KK
1163#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || \
1164 defined(CONFIG_NUMA) || defined(CONFIG_MEMCG)
fa25c503
KM
1165#ifdef CONFIG_ZONE_DMA
1166#define TEXT_FOR_DMA(xx) xx "_dma",
1167#else
1168#define TEXT_FOR_DMA(xx)
1169#endif
1170
1171#ifdef CONFIG_ZONE_DMA32
1172#define TEXT_FOR_DMA32(xx) xx "_dma32",
1173#else
1174#define TEXT_FOR_DMA32(xx)
1175#endif
1176
1177#ifdef CONFIG_HIGHMEM
1178#define TEXT_FOR_HIGHMEM(xx) xx "_high",
1179#else
1180#define TEXT_FOR_HIGHMEM(xx)
1181#endif
1182
a39c5d3c
HL
1183#ifdef CONFIG_ZONE_DEVICE
1184#define TEXT_FOR_DEVICE(xx) xx "_device",
1185#else
1186#define TEXT_FOR_DEVICE(xx)
1187#endif
1188
fa25c503 1189#define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
a39c5d3c
HL
1190 TEXT_FOR_HIGHMEM(xx) xx "_movable", \
1191 TEXT_FOR_DEVICE(xx)
fa25c503
KM
1192
1193const char * const vmstat_text[] = {
8d92890b 1194 /* enum zone_stat_item counters */
fa25c503 1195 "nr_free_pages",
a211c655 1196 "nr_free_pages_blocks",
71c799f4
MK
1197 "nr_zone_inactive_anon",
1198 "nr_zone_active_anon",
1199 "nr_zone_inactive_file",
1200 "nr_zone_active_file",
1201 "nr_zone_unevictable",
5a1c84b4 1202 "nr_zone_write_pending",
fa25c503 1203 "nr_mlock",
91537fee
MK
1204#if IS_ENABLED(CONFIG_ZSMALLOC)
1205 "nr_zspages",
1206#endif
3a321d2a 1207 "nr_free_cma",
dcdfdd40
KS
1208#ifdef CONFIG_UNACCEPTED_MEMORY
1209 "nr_unaccepted",
1210#endif
3a321d2a
KW
1211
1212 /* enum numa_stat_item counters */
fa25c503
KM
1213#ifdef CONFIG_NUMA
1214 "numa_hit",
1215 "numa_miss",
1216 "numa_foreign",
1217 "numa_interleave",
1218 "numa_local",
1219 "numa_other",
1220#endif
09316c09 1221
9d7ea9a2 1222 /* enum node_stat_item counters */
599d0c95
MG
1223 "nr_inactive_anon",
1224 "nr_active_anon",
1225 "nr_inactive_file",
1226 "nr_active_file",
1227 "nr_unevictable",
385386cf
JW
1228 "nr_slab_reclaimable",
1229 "nr_slab_unreclaimable",
599d0c95
MG
1230 "nr_isolated_anon",
1231 "nr_isolated_file",
68d48e6a 1232 "workingset_nodes",
170b04b7
JK
1233 "workingset_refault_anon",
1234 "workingset_refault_file",
1235 "workingset_activate_anon",
1236 "workingset_activate_file",
1237 "workingset_restore_anon",
1238 "workingset_restore_file",
1e6b1085 1239 "workingset_nodereclaim",
50658e2e
MG
1240 "nr_anon_pages",
1241 "nr_mapped",
11fb9989
MG
1242 "nr_file_pages",
1243 "nr_dirty",
1244 "nr_writeback",
1245 "nr_writeback_temp",
1246 "nr_shmem",
1247 "nr_shmem_hugepages",
1248 "nr_shmem_pmdmapped",
60fbf0ab
SL
1249 "nr_file_hugepages",
1250 "nr_file_pmdmapped",
11fb9989 1251 "nr_anon_transparent_hugepages",
c4a25635
MG
1252 "nr_vmscan_write",
1253 "nr_vmscan_immediate_reclaim",
1254 "nr_dirtied",
1255 "nr_written",
8cd7c588 1256 "nr_throttled_written",
b29940c1 1257 "nr_kernel_misc_reclaimable",
1970dc6f
JH
1258 "nr_foll_pin_acquired",
1259 "nr_foll_pin_released",
991e7673
SB
1260 "nr_kernel_stack",
1261#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
1262 "nr_shadow_call_stack",
1263#endif
f0c0c115 1264 "nr_page_table_pages",
ebc97a52 1265 "nr_sec_page_table_pages",
bd3520a9
PT
1266#ifdef CONFIG_IOMMU_SUPPORT
1267 "nr_iommu_pages",
1268#endif
b6038942
SB
1269#ifdef CONFIG_SWAP
1270 "nr_swapcached",
1271#endif
e39bb6be
HY
1272#ifdef CONFIG_NUMA_BALANCING
1273 "pgpromote_success",
c6833e10 1274 "pgpromote_candidate",
b805ab3c 1275#endif
23e9f013
LZ
1276 "pgdemote_kswapd",
1277 "pgdemote_direct",
1278 "pgdemote_khugepaged",
e452872b 1279 "pgdemote_proactive",
05d4532b
JH
1280#ifdef CONFIG_HUGETLB_PAGE
1281 "nr_hugetlb",
1282#endif
835de376 1283 "nr_balloon_pages",
f4cb78af 1284 /* system-wide enum vm_stat_item counters */
fa25c503
KM
1285 "nr_dirty_threshold",
1286 "nr_dirty_background_threshold",
9d857311
PT
1287 "nr_memmap_pages",
1288 "nr_memmap_boot_pages",
fa25c503 1289
ebc5d83d 1290#if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG)
09316c09 1291 /* enum vm_event_item counters */
fa25c503
KM
1292 "pgpgin",
1293 "pgpgout",
1294 "pswpin",
1295 "pswpout",
1296
1297 TEXTS_FOR_ZONES("pgalloc")
7cc30fcf
MG
1298 TEXTS_FOR_ZONES("allocstall")
1299 TEXTS_FOR_ZONES("pgskip")
fa25c503
KM
1300
1301 "pgfree",
1302 "pgactivate",
1303 "pgdeactivate",
f7ad2a6c 1304 "pglazyfree",
fa25c503
KM
1305
1306 "pgfault",
1307 "pgmajfault",
854e9ed0 1308 "pglazyfreed",
fa25c503 1309
599d0c95 1310 "pgrefill",
798a6b87 1311 "pgreuse",
599d0c95
MG
1312 "pgsteal_kswapd",
1313 "pgsteal_direct",
57e9cc50 1314 "pgsteal_khugepaged",
e452872b 1315 "pgsteal_proactive",
599d0c95
MG
1316 "pgscan_kswapd",
1317 "pgscan_direct",
57e9cc50 1318 "pgscan_khugepaged",
e452872b 1319 "pgscan_proactive",
68243e76 1320 "pgscan_direct_throttle",
497a6c1b
JW
1321 "pgscan_anon",
1322 "pgscan_file",
1323 "pgsteal_anon",
1324 "pgsteal_file",
fa25c503
KM
1325
1326#ifdef CONFIG_NUMA
5fe690a5 1327 "zone_reclaim_success",
fa25c503
KM
1328 "zone_reclaim_failed",
1329#endif
1330 "pginodesteal",
1331 "slabs_scanned",
fa25c503
KM
1332 "kswapd_inodesteal",
1333 "kswapd_low_wmark_hit_quickly",
1334 "kswapd_high_wmark_hit_quickly",
fa25c503 1335 "pageoutrun",
fa25c503
KM
1336
1337 "pgrotated",
1338
5509a5d2
DH
1339 "drop_pagecache",
1340 "drop_slab",
8e675f7a 1341 "oom_kill",
5509a5d2 1342
03c5a6e1
MG
1343#ifdef CONFIG_NUMA_BALANCING
1344 "numa_pte_updates",
72403b4a 1345 "numa_huge_pte_updates",
03c5a6e1
MG
1346 "numa_hint_faults",
1347 "numa_hint_faults_local",
1348 "numa_pages_migrated",
1349#endif
5647bc29
MG
1350#ifdef CONFIG_MIGRATION
1351 "pgmigrate_success",
1352 "pgmigrate_fail",
1a5bae25
AK
1353 "thp_migration_success",
1354 "thp_migration_fail",
1355 "thp_migration_split",
5647bc29 1356#endif
fa25c503 1357#ifdef CONFIG_COMPACTION
397487db
MG
1358 "compact_migrate_scanned",
1359 "compact_free_scanned",
1360 "compact_isolated",
fa25c503
KM
1361 "compact_stall",
1362 "compact_fail",
1363 "compact_success",
698b1b30 1364 "compact_daemon_wake",
7f354a54
DR
1365 "compact_daemon_migrate_scanned",
1366 "compact_daemon_free_scanned",
fa25c503
KM
1367#endif
1368
1369#ifdef CONFIG_HUGETLB_PAGE
1370 "htlb_buddy_alloc_success",
1371 "htlb_buddy_alloc_fail",
bbb26920
MK
1372#endif
1373#ifdef CONFIG_CMA
1374 "cma_alloc_success",
1375 "cma_alloc_fail",
fa25c503
KM
1376#endif
1377 "unevictable_pgs_culled",
1378 "unevictable_pgs_scanned",
1379 "unevictable_pgs_rescued",
1380 "unevictable_pgs_mlocked",
1381 "unevictable_pgs_munlocked",
1382 "unevictable_pgs_cleared",
1383 "unevictable_pgs_stranded",
fa25c503
KM
1384
1385#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1386 "thp_fault_alloc",
1387 "thp_fault_fallback",
85b9f46e 1388 "thp_fault_fallback_charge",
fa25c503
KM
1389 "thp_collapse_alloc",
1390 "thp_collapse_alloc_failed",
95ecedcd 1391 "thp_file_alloc",
dcdf11ee 1392 "thp_file_fallback",
85b9f46e 1393 "thp_file_fallback_charge",
95ecedcd 1394 "thp_file_mapped",
122afea9
KS
1395 "thp_split_page",
1396 "thp_split_page_failed",
f9719a03 1397 "thp_deferred_split_page",
dafff3f4 1398 "thp_underused_split_page",
122afea9 1399 "thp_split_pmd",
e9ea874a
YY
1400 "thp_scan_exceed_none_pte",
1401 "thp_scan_exceed_swap_pte",
1402 "thp_scan_exceed_share_pte",
ce9311cf
YX
1403#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
1404 "thp_split_pud",
1405#endif
d8a8e1f0
KS
1406 "thp_zero_page_alloc",
1407 "thp_zero_page_alloc_failed",
225311a4 1408 "thp_swpout",
fe490cc0 1409 "thp_swpout_fallback",
fa25c503 1410#endif
09316c09
KK
1411#ifdef CONFIG_MEMORY_BALLOON
1412 "balloon_inflate",
1413 "balloon_deflate",
1414#ifdef CONFIG_BALLOON_COMPACTION
1415 "balloon_migrate",
1416#endif
1417#endif /* CONFIG_MEMORY_BALLOON */
ec659934 1418#ifdef CONFIG_DEBUG_TLBFLUSH
9824cf97
DH
1419 "nr_tlb_remote_flush",
1420 "nr_tlb_remote_flush_received",
1421 "nr_tlb_local_flush_all",
1422 "nr_tlb_local_flush_one",
ec659934 1423#endif /* CONFIG_DEBUG_TLBFLUSH */
fa25c503 1424
cbc65df2
HY
1425#ifdef CONFIG_SWAP
1426 "swap_ra",
1427 "swap_ra_hit",
e7ac4dae
BS
1428 "swpin_zero",
1429 "swpout_zero",
4d45c3af
YY
1430#ifdef CONFIG_KSM
1431 "ksm_swpin_copy",
1432#endif
cbc65df2 1433#endif
94bfe85b
YY
1434#ifdef CONFIG_KSM
1435 "cow_ksm",
1436#endif
f6498b77
JW
1437#ifdef CONFIG_ZSWAP
1438 "zswpin",
1439 "zswpout",
7108cc3f 1440 "zswpwb",
f6498b77 1441#endif
575299ea
S
1442#ifdef CONFIG_X86
1443 "direct_map_level2_splits",
1444 "direct_map_level3_splits",
41d88484
KS
1445 "direct_map_level2_collapses",
1446 "direct_map_level3_collapses",
575299ea 1447#endif
52f23865
SB
1448#ifdef CONFIG_PER_VMA_LOCK_STATS
1449 "vma_lock_success",
1450 "vma_lock_abort",
1451 "vma_lock_retry",
1452 "vma_lock_miss",
1453#endif
c4a6fce8
PT
1454#ifdef CONFIG_DEBUG_STACK_USAGE
1455 "kstack_1k",
1456#if THREAD_SIZE > 1024
1457 "kstack_2k",
1458#endif
1459#if THREAD_SIZE > 2048
1460 "kstack_4k",
1461#endif
1462#if THREAD_SIZE > 4096
1463 "kstack_8k",
1464#endif
1465#if THREAD_SIZE > 8192
1466 "kstack_16k",
1467#endif
1468#if THREAD_SIZE > 16384
1469 "kstack_32k",
1470#endif
1471#if THREAD_SIZE > 32768
1472 "kstack_64k",
1473#endif
1474#if THREAD_SIZE > 65536
1475 "kstack_rest",
1476#endif
1477#endif
ebc5d83d 1478#endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */
fa25c503 1479};
ebc5d83d 1480#endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA || CONFIG_MEMCG */
fa25c503 1481
3c486871
AM
1482#if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \
1483 defined(CONFIG_PROC_FS)
1484static void *frag_start(struct seq_file *m, loff_t *pos)
1485{
1486 pg_data_t *pgdat;
1487 loff_t node = *pos;
1488
1489 for (pgdat = first_online_pgdat();
1490 pgdat && node;
1491 pgdat = next_online_pgdat(pgdat))
1492 --node;
1493
1494 return pgdat;
1495}
1496
1497static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
1498{
1499 pg_data_t *pgdat = (pg_data_t *)arg;
1500
1501 (*pos)++;
1502 return next_online_pgdat(pgdat);
1503}
1504
1505static void frag_stop(struct seq_file *m, void *arg)
1506{
1507}
1508
b2bd8598
DR
1509/*
1510 * Walk zones in a node and print using a callback.
1511 * If @assert_populated is true, only use callback for zones that are populated.
1512 */
3c486871 1513static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
727c080f 1514 bool assert_populated, bool nolock,
3c486871
AM
1515 void (*print)(struct seq_file *m, pg_data_t *, struct zone *))
1516{
1517 struct zone *zone;
1518 struct zone *node_zones = pgdat->node_zones;
1519 unsigned long flags;
1520
1521 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
b2bd8598 1522 if (assert_populated && !populated_zone(zone))
3c486871
AM
1523 continue;
1524
727c080f
VM
1525 if (!nolock)
1526 spin_lock_irqsave(&zone->lock, flags);
3c486871 1527 print(m, pgdat, zone);
727c080f
VM
1528 if (!nolock)
1529 spin_unlock_irqrestore(&zone->lock, flags);
3c486871
AM
1530 }
1531}
1532#endif
1533
d7a5752c 1534#ifdef CONFIG_PROC_FS
467c996c
MG
1535static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
1536 struct zone *zone)
1537{
1538 int order;
1539
1540 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
fd377218 1541 for (order = 0; order < NR_PAGE_ORDERS; ++order)
af1c31ac
LS
1542 /*
1543 * Access to nr_free is lockless as nr_free is used only for
1544 * printing purposes. Use data_race to avoid KCSAN warning.
1545 */
1546 seq_printf(m, "%6lu ", data_race(zone->free_area[order].nr_free));
467c996c
MG
1547 seq_putc(m, '\n');
1548}
1549
1550/*
1551 * This walks the free areas for each zone.
1552 */
1553static int frag_show(struct seq_file *m, void *arg)
1554{
1555 pg_data_t *pgdat = (pg_data_t *)arg;
727c080f 1556 walk_zones_in_node(m, pgdat, true, false, frag_show_print);
467c996c
MG
1557 return 0;
1558}
1559
1560static void pagetypeinfo_showfree_print(struct seq_file *m,
1561 pg_data_t *pgdat, struct zone *zone)
1562{
1563 int order, mtype;
1564
1565 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) {
1566 seq_printf(m, "Node %4d, zone %8s, type %12s ",
1567 pgdat->node_id,
1568 zone->name,
1569 migratetype_names[mtype]);
fd377218 1570 for (order = 0; order < NR_PAGE_ORDERS; ++order) {
467c996c
MG
1571 unsigned long freecount = 0;
1572 struct free_area *area;
1573 struct list_head *curr;
93b3a674 1574 bool overflow = false;
467c996c
MG
1575
1576 area = &(zone->free_area[order]);
1577
93b3a674
MH
1578 list_for_each(curr, &area->free_list[mtype]) {
1579 /*
1580 * Cap the free_list iteration because it might
1581 * be really large and we are under a spinlock
1582 * so a long time spent here could trigger a
1583 * hard lockup detector. Anyway this is a
1584 * debugging tool so knowing there is a handful
1585 * of pages of this order should be more than
1586 * sufficient.
1587 */
1588 if (++freecount >= 100000) {
1589 overflow = true;
1590 break;
1591 }
1592 }
1593 seq_printf(m, "%s%6lu ", overflow ? ">" : "", freecount);
1594 spin_unlock_irq(&zone->lock);
1595 cond_resched();
1596 spin_lock_irq(&zone->lock);
467c996c 1597 }
f6ac2354
CL
1598 seq_putc(m, '\n');
1599 }
467c996c
MG
1600}
1601
1602/* Print out the free pages at each order for each migatetype */
33090af9 1603static void pagetypeinfo_showfree(struct seq_file *m, void *arg)
467c996c
MG
1604{
1605 int order;
1606 pg_data_t *pgdat = (pg_data_t *)arg;
1607
1608 /* Print header */
1609 seq_printf(m, "%-43s ", "Free pages count per migrate type at order");
fd377218 1610 for (order = 0; order < NR_PAGE_ORDERS; ++order)
467c996c
MG
1611 seq_printf(m, "%6d ", order);
1612 seq_putc(m, '\n');
1613
727c080f 1614 walk_zones_in_node(m, pgdat, true, false, pagetypeinfo_showfree_print);
467c996c
MG
1615}
1616
1617static void pagetypeinfo_showblockcount_print(struct seq_file *m,
1618 pg_data_t *pgdat, struct zone *zone)
1619{
1620 int mtype;
1621 unsigned long pfn;
1622 unsigned long start_pfn = zone->zone_start_pfn;
108bcc96 1623 unsigned long end_pfn = zone_end_pfn(zone);
467c996c
MG
1624 unsigned long count[MIGRATE_TYPES] = { 0, };
1625
1626 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
1627 struct page *page;
1628
d336e94e
MH
1629 page = pfn_to_online_page(pfn);
1630 if (!page)
467c996c
MG
1631 continue;
1632
a91c43c7
JK
1633 if (page_zone(page) != zone)
1634 continue;
1635
467c996c
MG
1636 mtype = get_pageblock_migratetype(page);
1637
e80d6a24
MG
1638 if (mtype < MIGRATE_TYPES)
1639 count[mtype]++;
467c996c
MG
1640 }
1641
1642 /* Print counts */
1643 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1644 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1645 seq_printf(m, "%12lu ", count[mtype]);
1646 seq_putc(m, '\n');
1647}
1648
f113e641 1649/* Print out the number of pageblocks for each migratetype */
33090af9 1650static void pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
467c996c
MG
1651{
1652 int mtype;
1653 pg_data_t *pgdat = (pg_data_t *)arg;
1654
1655 seq_printf(m, "\n%-23s", "Number of blocks type ");
1656 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1657 seq_printf(m, "%12s ", migratetype_names[mtype]);
1658 seq_putc(m, '\n');
727c080f
VM
1659 walk_zones_in_node(m, pgdat, true, false,
1660 pagetypeinfo_showblockcount_print);
467c996c
MG
1661}
1662
48c96a36
JK
1663/*
1664 * Print out the number of pageblocks for each migratetype that contain pages
1665 * of other types. This gives an indication of how well fallbacks are being
1666 * contained by rmqueue_fallback(). It requires information from PAGE_OWNER
1667 * to determine what is going on
1668 */
1669static void pagetypeinfo_showmixedcount(struct seq_file *m, pg_data_t *pgdat)
1670{
1671#ifdef CONFIG_PAGE_OWNER
1672 int mtype;
1673
7dd80b8a 1674 if (!static_branch_unlikely(&page_owner_inited))
48c96a36
JK
1675 return;
1676
1677 drain_all_pages(NULL);
1678
1679 seq_printf(m, "\n%-23s", "Number of mixed blocks ");
1680 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1681 seq_printf(m, "%12s ", migratetype_names[mtype]);
1682 seq_putc(m, '\n');
1683
727c080f
VM
1684 walk_zones_in_node(m, pgdat, true, true,
1685 pagetypeinfo_showmixedcount_print);
48c96a36
JK
1686#endif /* CONFIG_PAGE_OWNER */
1687}
1688
467c996c
MG
1689/*
1690 * This prints out statistics in relation to grouping pages by mobility.
1691 * It is expensive to collect so do not constantly read the file.
1692 */
1693static int pagetypeinfo_show(struct seq_file *m, void *arg)
1694{
1695 pg_data_t *pgdat = (pg_data_t *)arg;
1696
41b25a37 1697 /* check memoryless node */
a47b53c5 1698 if (!node_state(pgdat->node_id, N_MEMORY))
41b25a37
KM
1699 return 0;
1700
467c996c
MG
1701 seq_printf(m, "Page block order: %d\n", pageblock_order);
1702 seq_printf(m, "Pages per block: %lu\n", pageblock_nr_pages);
1703 seq_putc(m, '\n');
1704 pagetypeinfo_showfree(m, pgdat);
1705 pagetypeinfo_showblockcount(m, pgdat);
48c96a36 1706 pagetypeinfo_showmixedcount(m, pgdat);
467c996c 1707
f6ac2354
CL
1708 return 0;
1709}
1710
8f32f7e5 1711static const struct seq_operations fragmentation_op = {
f6ac2354
CL
1712 .start = frag_start,
1713 .next = frag_next,
1714 .stop = frag_stop,
1715 .show = frag_show,
1716};
1717
74e2e8e8 1718static const struct seq_operations pagetypeinfo_op = {
467c996c
MG
1719 .start = frag_start,
1720 .next = frag_next,
1721 .stop = frag_stop,
1722 .show = pagetypeinfo_show,
1723};
1724
e2ecc8a7
MG
1725static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone)
1726{
1727 int zid;
1728
1729 for (zid = 0; zid < MAX_NR_ZONES; zid++) {
1730 struct zone *compare = &pgdat->node_zones[zid];
1731
1732 if (populated_zone(compare))
1733 return zone == compare;
1734 }
1735
e2ecc8a7
MG
1736 return false;
1737}
1738
467c996c
MG
1739static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
1740 struct zone *zone)
f6ac2354 1741{
467c996c
MG
1742 int i;
1743 seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
e2ecc8a7
MG
1744 if (is_zone_first_populated(pgdat, zone)) {
1745 seq_printf(m, "\n per-node stats");
1746 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
69473e5d
MS
1747 unsigned long pages = node_page_state_pages(pgdat, i);
1748
1749 if (vmstat_item_print_in_thp(i))
1750 pages /= HPAGE_PMD_NR;
9d7ea9a2 1751 seq_printf(m, "\n %-12s %lu", node_stat_name(i),
69473e5d 1752 pages);
e2ecc8a7
MG
1753 }
1754 }
467c996c
MG
1755 seq_printf(m,
1756 "\n pages free %lu"
a6ea8b5b 1757 "\n boost %lu"
467c996c
MG
1758 "\n min %lu"
1759 "\n low %lu"
1760 "\n high %lu"
528afe6b 1761 "\n promo %lu"
467c996c 1762 "\n spanned %lu"
9feedc9d 1763 "\n present %lu"
3c381db1
DH
1764 "\n managed %lu"
1765 "\n cma %lu",
88f5acf8 1766 zone_page_state(zone, NR_FREE_PAGES),
a6ea8b5b 1767 zone->watermark_boost,
41858966
MG
1768 min_wmark_pages(zone),
1769 low_wmark_pages(zone),
1770 high_wmark_pages(zone),
528afe6b 1771 promo_wmark_pages(zone),
467c996c 1772 zone->spanned_pages,
9feedc9d 1773 zone->present_pages,
3c381db1
DH
1774 zone_managed_pages(zone),
1775 zone_cma_pages(zone));
467c996c 1776
467c996c 1777 seq_printf(m,
3484b2de 1778 "\n protection: (%ld",
467c996c
MG
1779 zone->lowmem_reserve[0]);
1780 for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
3484b2de 1781 seq_printf(m, ", %ld", zone->lowmem_reserve[i]);
7dfb8bf3
DR
1782 seq_putc(m, ')');
1783
a8a4b7ae
BH
1784 /* If unpopulated, no other information is useful */
1785 if (!populated_zone(zone)) {
1786 seq_putc(m, '\n');
1787 return;
1788 }
1789
7dfb8bf3 1790 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
9d7ea9a2
KK
1791 seq_printf(m, "\n %-12s %lu", zone_stat_name(i),
1792 zone_page_state(zone, i));
7dfb8bf3 1793
3a321d2a 1794#ifdef CONFIG_NUMA
2ea80b03 1795 fold_vm_zone_numa_events(zone);
f19298b9 1796 for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++)
9d7ea9a2 1797 seq_printf(m, "\n %-12s %lu", numa_stat_name(i),
f19298b9 1798 zone_numa_event_state(zone, i));
3a321d2a
KW
1799#endif
1800
7dfb8bf3 1801 seq_printf(m, "\n pagesets");
467c996c 1802 for_each_online_cpu(i) {
28f836b6
MG
1803 struct per_cpu_pages *pcp;
1804 struct per_cpu_zonestat __maybe_unused *pzstats;
467c996c 1805
28f836b6 1806 pcp = per_cpu_ptr(zone->per_cpu_pageset, i);
3dfa5721
CL
1807 seq_printf(m,
1808 "\n cpu: %i"
f8780515
MS
1809 "\n count: %i"
1810 "\n high: %i"
1811 "\n batch: %i"
1812 "\n high_min: %i"
1813 "\n high_max: %i",
3dfa5721 1814 i,
28f836b6
MG
1815 pcp->count,
1816 pcp->high,
f8780515
MS
1817 pcp->batch,
1818 pcp->high_min,
1819 pcp->high_max);
df9ecaba 1820#ifdef CONFIG_SMP
28f836b6 1821 pzstats = per_cpu_ptr(zone->per_cpu_zonestats, i);
467c996c 1822 seq_printf(m, "\n vm stats threshold: %d",
28f836b6 1823 pzstats->stat_threshold);
df9ecaba 1824#endif
f6ac2354 1825 }
467c996c 1826 seq_printf(m,
599d0c95 1827 "\n node_unreclaimable: %u"
3a50d14d 1828 "\n start_pfn: %lu",
c73322d0 1829 pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES,
3a50d14d 1830 zone->zone_start_pfn);
467c996c
MG
1831 seq_putc(m, '\n');
1832}
1833
1834/*
b2bd8598
DR
1835 * Output information about zones in @pgdat. All zones are printed regardless
1836 * of whether they are populated or not: lowmem_reserve_ratio operates on the
1837 * set of all zones and userspace would not be aware of such zones if they are
1838 * suppressed here (zoneinfo displays the effect of lowmem_reserve_ratio).
467c996c
MG
1839 */
1840static int zoneinfo_show(struct seq_file *m, void *arg)
1841{
1842 pg_data_t *pgdat = (pg_data_t *)arg;
727c080f 1843 walk_zones_in_node(m, pgdat, false, false, zoneinfo_show_print);
f6ac2354
CL
1844 return 0;
1845}
1846
5c9fe628 1847static const struct seq_operations zoneinfo_op = {
f6ac2354
CL
1848 .start = frag_start, /* iterate over all zones. The same as in
1849 * fragmentation. */
1850 .next = frag_next,
1851 .stop = frag_stop,
1852 .show = zoneinfo_show,
1853};
1854
9d7ea9a2 1855#define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEMS + \
f19298b9 1856 NR_VM_NUMA_EVENT_ITEMS + \
9d7ea9a2 1857 NR_VM_NODE_STAT_ITEMS + \
f4cb78af 1858 NR_VM_STAT_ITEMS + \
9d7ea9a2
KK
1859 (IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \
1860 NR_VM_EVENT_ITEMS : 0))
79da826a 1861
f6ac2354
CL
1862static void *vmstat_start(struct seq_file *m, loff_t *pos)
1863{
2244b95a 1864 unsigned long *v;
9d7ea9a2 1865 int i;
f6ac2354 1866
9d7ea9a2 1867 if (*pos >= NR_VMSTAT_ITEMS)
f6ac2354 1868 return NULL;
79da826a 1869
9d7ea9a2 1870 BUILD_BUG_ON(ARRAY_SIZE(vmstat_text) < NR_VMSTAT_ITEMS);
f19298b9 1871 fold_vm_numa_events();
9d7ea9a2 1872 v = kmalloc_array(NR_VMSTAT_ITEMS, sizeof(unsigned long), GFP_KERNEL);
2244b95a
CL
1873 m->private = v;
1874 if (!v)
f6ac2354 1875 return ERR_PTR(-ENOMEM);
2244b95a 1876 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
c41f012a 1877 v[i] = global_zone_page_state(i);
79da826a
MR
1878 v += NR_VM_ZONE_STAT_ITEMS;
1879
3a321d2a 1880#ifdef CONFIG_NUMA
f19298b9
MG
1881 for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++)
1882 v[i] = global_numa_event_state(i);
1883 v += NR_VM_NUMA_EVENT_ITEMS;
3a321d2a
KW
1884#endif
1885
69473e5d 1886 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
ea426c2a 1887 v[i] = global_node_page_state_pages(i);
69473e5d
MS
1888 if (vmstat_item_print_in_thp(i))
1889 v[i] /= HPAGE_PMD_NR;
1890 }
75ef7184
MG
1891 v += NR_VM_NODE_STAT_ITEMS;
1892
79da826a
MR
1893 global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD,
1894 v + NR_DIRTY_THRESHOLD);
9d857311
PT
1895 v[NR_MEMMAP_PAGES] = atomic_long_read(&nr_memmap_pages);
1896 v[NR_MEMMAP_BOOT_PAGES] = atomic_long_read(&nr_memmap_boot_pages);
f4cb78af 1897 v += NR_VM_STAT_ITEMS;
79da826a 1898
f8891e5e 1899#ifdef CONFIG_VM_EVENT_COUNTERS
79da826a
MR
1900 all_vm_events(v);
1901 v[PGPGIN] /= 2; /* sectors -> kbytes */
1902 v[PGPGOUT] /= 2;
f8891e5e 1903#endif
ff8b16d7 1904 return (unsigned long *)m->private + *pos;
f6ac2354
CL
1905}
1906
1907static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
1908{
1909 (*pos)++;
9d7ea9a2 1910 if (*pos >= NR_VMSTAT_ITEMS)
f6ac2354
CL
1911 return NULL;
1912 return (unsigned long *)m->private + *pos;
1913}
1914
1915static int vmstat_show(struct seq_file *m, void *arg)
1916{
1917 unsigned long *l = arg;
1918 unsigned long off = l - (unsigned long *)m->private;
68ba0326
AD
1919
1920 seq_puts(m, vmstat_text[off]);
75ba1d07 1921 seq_put_decimal_ull(m, " ", *l);
68ba0326 1922 seq_putc(m, '\n');
8d92890b
N
1923
1924 if (off == NR_VMSTAT_ITEMS - 1) {
1925 /*
1926 * We've come to the end - add any deprecated counters to avoid
1927 * breaking userspace which might depend on them being present.
1928 */
1929 seq_puts(m, "nr_unstable 0\n");
1930 }
f6ac2354
CL
1931 return 0;
1932}
1933
1934static void vmstat_stop(struct seq_file *m, void *arg)
1935{
1936 kfree(m->private);
1937 m->private = NULL;
1938}
1939
b6aa44ab 1940static const struct seq_operations vmstat_op = {
f6ac2354
CL
1941 .start = vmstat_start,
1942 .next = vmstat_next,
1943 .stop = vmstat_stop,
1944 .show = vmstat_show,
1945};
f6ac2354
CL
1946#endif /* CONFIG_PROC_FS */
1947
df9ecaba 1948#ifdef CONFIG_SMP
d1187ed2 1949static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
b8974b89 1950static int sysctl_stat_interval __read_mostly = HZ;
f69c2e4d 1951static int vmstat_late_init_done;
d1187ed2 1952
52b6f46b
HD
1953#ifdef CONFIG_PROC_FS
1954static void refresh_vm_stats(struct work_struct *work)
1955{
1956 refresh_cpu_vm_stats(true);
1957}
1958
b8974b89 1959static int vmstat_refresh(const struct ctl_table *table, int write,
32927393 1960 void *buffer, size_t *lenp, loff_t *ppos)
52b6f46b
HD
1961{
1962 long val;
1963 int err;
1964 int i;
1965
1966 /*
1967 * The regular update, every sysctl_stat_interval, may come later
1968 * than expected: leaving a significant amount in per_cpu buckets.
1969 * This is particularly misleading when checking a quantity of HUGE
1970 * pages, immediately after running a test. /proc/sys/vm/stat_refresh,
1971 * which can equally be echo'ed to or cat'ted from (by root),
1972 * can be used to update the stats just before reading them.
1973 *
c41f012a 1974 * Oh, and since global_zone_page_state() etc. are so careful to hide
52b6f46b
HD
1975 * transiently negative values, report an error here if any of
1976 * the stats is negative, so we know to go looking for imbalance.
1977 */
1978 err = schedule_on_each_cpu(refresh_vm_stats);
1979 if (err)
1980 return err;
1981 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
75083aae
HD
1982 /*
1983 * Skip checking stats known to go negative occasionally.
1984 */
1985 switch (i) {
1986 case NR_ZONE_WRITE_PENDING:
1987 case NR_FREE_CMA_PAGES:
1988 continue;
1989 }
75ef7184 1990 val = atomic_long_read(&vm_zone_stat[i]);
52b6f46b 1991 if (val < 0) {
c822f622 1992 pr_warn("%s: %s %ld\n",
9d7ea9a2 1993 __func__, zone_stat_name(i), val);
52b6f46b
HD
1994 }
1995 }
76d8cc3c 1996 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
75083aae
HD
1997 /*
1998 * Skip checking stats known to go negative occasionally.
1999 */
2000 switch (i) {
2001 case NR_WRITEBACK:
2002 continue;
2003 }
76d8cc3c
HD
2004 val = atomic_long_read(&vm_node_stat[i]);
2005 if (val < 0) {
2006 pr_warn("%s: %s %ld\n",
2007 __func__, node_stat_name(i), val);
76d8cc3c
HD
2008 }
2009 }
52b6f46b
HD
2010 if (write)
2011 *ppos += *lenp;
2012 else
2013 *lenp = 0;
2014 return 0;
2015}
2016#endif /* CONFIG_PROC_FS */
2017
d1187ed2
CL
2018static void vmstat_update(struct work_struct *w)
2019{
0eb77e98 2020 if (refresh_cpu_vm_stats(true)) {
7cc36bbd
CL
2021 /*
2022 * Counters were updated so we expect more updates
2023 * to occur in the future. Keep on running the
2024 * update worker thread.
2025 */
ce612879 2026 queue_delayed_work_on(smp_processor_id(), mm_percpu_wq,
f01f17d3
MH
2027 this_cpu_ptr(&vmstat_work),
2028 round_jiffies_relative(sysctl_stat_interval));
7cc36bbd
CL
2029 }
2030}
2031
2032/*
2033 * Check if the diffs for a certain cpu indicate that
2034 * an update is needed.
2035 */
2036static bool need_update(int cpu)
2037{
2bbd00ae 2038 pg_data_t *last_pgdat = NULL;
7cc36bbd
CL
2039 struct zone *zone;
2040
2041 for_each_populated_zone(zone) {
28f836b6 2042 struct per_cpu_zonestat *pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
2bbd00ae 2043 struct per_cpu_nodestat *n;
28f836b6 2044
7cc36bbd
CL
2045 /*
2046 * The fast way of checking if there are any vmstat diffs.
7cc36bbd 2047 */
64632fd3 2048 if (memchr_inv(pzstats->vm_stat_diff, 0, sizeof(pzstats->vm_stat_diff)))
7cc36bbd 2049 return true;
f19298b9 2050
2bbd00ae
JW
2051 if (last_pgdat == zone->zone_pgdat)
2052 continue;
2053 last_pgdat = zone->zone_pgdat;
2054 n = per_cpu_ptr(zone->zone_pgdat->per_cpu_nodestats, cpu);
64632fd3
ML
2055 if (memchr_inv(n->vm_node_stat_diff, 0, sizeof(n->vm_node_stat_diff)))
2056 return true;
7cc36bbd
CL
2057 }
2058 return false;
2059}
2060
7b8da4c7
CL
2061/*
2062 * Switch off vmstat processing and then fold all the remaining differentials
2063 * until the diffs stay at zero. The function is used by NOHZ and can only be
2064 * invoked when tick processing is not active.
2065 */
f01f17d3
MH
2066void quiet_vmstat(void)
2067{
2068 if (system_state != SYSTEM_RUNNING)
2069 return;
2070
7b8da4c7 2071 if (!delayed_work_pending(this_cpu_ptr(&vmstat_work)))
f01f17d3
MH
2072 return;
2073
2074 if (!need_update(smp_processor_id()))
2075 return;
2076
2077 /*
2078 * Just refresh counters and do not care about the pending delayed
2079 * vmstat_update. It doesn't fire that often to matter and canceling
2080 * it would be too expensive from this path.
2081 * vmstat_shepherd will take care about that for us.
2082 */
2083 refresh_cpu_vm_stats(false);
2084}
2085
7cc36bbd
CL
2086/*
2087 * Shepherd worker thread that checks the
2088 * differentials of processors that have their worker
2089 * threads for vm statistics updates disabled because of
2090 * inactivity.
2091 */
2092static void vmstat_shepherd(struct work_struct *w);
2093
0eb77e98 2094static DECLARE_DEFERRABLE_WORK(shepherd, vmstat_shepherd);
7cc36bbd
CL
2095
2096static void vmstat_shepherd(struct work_struct *w)
2097{
2098 int cpu;
2099
7625eccd 2100 cpus_read_lock();
7cc36bbd 2101 /* Check processors whose vmstat worker threads have been disabled */
7b8da4c7 2102 for_each_online_cpu(cpu) {
f01f17d3 2103 struct delayed_work *dw = &per_cpu(vmstat_work, cpu);
7cc36bbd 2104
be5e015d
MT
2105 /*
2106 * In kernel users of vmstat counters either require the precise value and
2107 * they are using zone_page_state_snapshot interface or they can live with
2108 * an imprecision as the regular flushing can happen at arbitrary time and
2109 * cumulative error can grow (see calculate_normal_threshold).
2110 *
2111 * From that POV the regular flushing can be postponed for CPUs that have
2112 * been isolated from the kernel interference without critical
2113 * infrastructure ever noticing. Skip regular flushing from vmstat_shepherd
2114 * for all isolated CPUs to avoid interference with the isolated workload.
2115 */
2116 if (cpu_is_isolated(cpu))
2117 continue;
2118
7b8da4c7 2119 if (!delayed_work_pending(dw) && need_update(cpu))
ce612879 2120 queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0);
fbcc8183
JB
2121
2122 cond_resched();
f01f17d3 2123 }
7625eccd 2124 cpus_read_unlock();
7cc36bbd
CL
2125
2126 schedule_delayed_work(&shepherd,
98f4ebb2 2127 round_jiffies_relative(sysctl_stat_interval));
d1187ed2
CL
2128}
2129
7cc36bbd 2130static void __init start_shepherd_timer(void)
d1187ed2 2131{
7cc36bbd
CL
2132 int cpu;
2133
9fd8fcf1 2134 for_each_possible_cpu(cpu) {
ccde8bd4 2135 INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
7cc36bbd
CL
2136 vmstat_update);
2137
9fd8fcf1
KD
2138 /*
2139 * For secondary CPUs during CPU hotplug scenarios,
2140 * vmstat_cpu_online() will enable the work.
2141 * mm/vmstat:online enables and disables vmstat_work
2142 * symmetrically during CPU hotplug events.
2143 */
2144 if (!cpu_online(cpu))
2145 disable_delayed_work_sync(&per_cpu(vmstat_work, cpu));
2146 }
2147
7cc36bbd
CL
2148 schedule_delayed_work(&shepherd,
2149 round_jiffies_relative(sysctl_stat_interval));
d1187ed2
CL
2150}
2151
03e86dba
TC
2152static void __init init_cpu_node_state(void)
2153{
4c501327 2154 int node;
03e86dba 2155
4c501327 2156 for_each_online_node(node) {
b55032f1 2157 if (!cpumask_empty(cpumask_of_node(node)))
4c501327
SAS
2158 node_set_state(node, N_CPU);
2159 }
03e86dba
TC
2160}
2161
5438da97
SAS
2162static int vmstat_cpu_online(unsigned int cpu)
2163{
f69c2e4d
SS
2164 if (vmstat_late_init_done)
2165 refresh_zone_stat_thresholds();
734c1570
OS
2166
2167 if (!node_state(cpu_to_node(cpu), N_CPU)) {
2168 node_set_state(cpu_to_node(cpu), N_CPU);
734c1570 2169 }
9fd8fcf1 2170 enable_delayed_work(&per_cpu(vmstat_work, cpu));
734c1570 2171
5438da97
SAS
2172 return 0;
2173}
2174
2175static int vmstat_cpu_down_prep(unsigned int cpu)
2176{
9fd8fcf1 2177 disable_delayed_work_sync(&per_cpu(vmstat_work, cpu));
5438da97
SAS
2178 return 0;
2179}
2180
2181static int vmstat_cpu_dead(unsigned int cpu)
807a1bd2 2182{
4c501327 2183 const struct cpumask *node_cpus;
5438da97 2184 int node;
807a1bd2 2185
5438da97
SAS
2186 node = cpu_to_node(cpu);
2187
2188 refresh_zone_stat_thresholds();
4c501327 2189 node_cpus = cpumask_of_node(node);
b55032f1 2190 if (!cpumask_empty(node_cpus))
5438da97 2191 return 0;
807a1bd2
TK
2192
2193 node_clear_state(node, N_CPU);
734c1570 2194
5438da97 2195 return 0;
807a1bd2
TK
2196}
2197
f69c2e4d
SS
2198static int __init vmstat_late_init(void)
2199{
2200 refresh_zone_stat_thresholds();
2201 vmstat_late_init_done = 1;
2202
2203 return 0;
2204}
2205late_initcall(vmstat_late_init);
8f32f7e5 2206#endif
df9ecaba 2207
b8974b89
KY
2208#ifdef CONFIG_PROC_FS
2209static const struct ctl_table vmstat_table[] = {
2210#ifdef CONFIG_SMP
2211 {
2212 .procname = "stat_interval",
2213 .data = &sysctl_stat_interval,
2214 .maxlen = sizeof(sysctl_stat_interval),
2215 .mode = 0644,
2216 .proc_handler = proc_dointvec_jiffies,
2217 },
2218 {
2219 .procname = "stat_refresh",
2220 .data = NULL,
2221 .maxlen = 0,
2222 .mode = 0600,
2223 .proc_handler = vmstat_refresh,
2224 },
2225#endif
2226#ifdef CONFIG_NUMA
2227 {
2228 .procname = "numa_stat",
2229 .data = &sysctl_vm_numa_stat,
2230 .maxlen = sizeof(int),
2231 .mode = 0644,
2232 .proc_handler = sysctl_vm_numa_stat_handler,
2233 .extra1 = SYSCTL_ZERO,
2234 .extra2 = SYSCTL_ONE,
2235 },
2236#endif
2237};
2238#endif
2239
ce612879
MH
2240struct workqueue_struct *mm_percpu_wq;
2241
597b7305 2242void __init init_mm_internals(void)
df9ecaba 2243{
ce612879 2244 int ret __maybe_unused;
5438da97 2245
80d136e1 2246 mm_percpu_wq = alloc_workqueue("mm_percpu_wq", WQ_MEM_RECLAIM, 0);
ce612879
MH
2247
2248#ifdef CONFIG_SMP
5438da97
SAS
2249 ret = cpuhp_setup_state_nocalls(CPUHP_MM_VMSTAT_DEAD, "mm/vmstat:dead",
2250 NULL, vmstat_cpu_dead);
2251 if (ret < 0)
2252 pr_err("vmstat: failed to register 'dead' hotplug state\n");
2253
2254 ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "mm/vmstat:online",
2255 vmstat_cpu_online,
2256 vmstat_cpu_down_prep);
2257 if (ret < 0)
2258 pr_err("vmstat: failed to register 'online' hotplug state\n");
2259
7625eccd 2260 cpus_read_lock();
03e86dba 2261 init_cpu_node_state();
7625eccd 2262 cpus_read_unlock();
d1187ed2 2263
7cc36bbd 2264 start_shepherd_timer();
8f32f7e5
AD
2265#endif
2266#ifdef CONFIG_PROC_FS
fddda2b7 2267 proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op);
abaed011 2268 proc_create_seq("pagetypeinfo", 0400, NULL, &pagetypeinfo_op);
fddda2b7
CH
2269 proc_create_seq("vmstat", 0444, NULL, &vmstat_op);
2270 proc_create_seq("zoneinfo", 0444, NULL, &zoneinfo_op);
b8974b89 2271 register_sysctl_init("vm", vmstat_table);
8f32f7e5 2272#endif
df9ecaba 2273}
d7a5752c
MG
2274
2275#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)
d7a5752c
MG
2276
2277/*
2278 * Return an index indicating how much of the available free memory is
2279 * unusable for an allocation of the requested size.
2280 */
2281static int unusable_free_index(unsigned int order,
2282 struct contig_page_info *info)
2283{
2284 /* No free memory is interpreted as all free memory is unusable */
2285 if (info->free_pages == 0)
2286 return 1000;
2287
2288 /*
2289 * Index should be a value between 0 and 1. Return a value to 3
2290 * decimal places.
2291 *
2292 * 0 => no fragmentation
2293 * 1 => high fragmentation
2294 */
2295 return div_u64((info->free_pages - (info->free_blocks_suitable << order)) * 1000ULL, info->free_pages);
2296
2297}
2298
2299static void unusable_show_print(struct seq_file *m,
2300 pg_data_t *pgdat, struct zone *zone)
2301{
2302 unsigned int order;
2303 int index;
2304 struct contig_page_info info;
2305
2306 seq_printf(m, "Node %d, zone %8s ",
2307 pgdat->node_id,
2308 zone->name);
fd377218 2309 for (order = 0; order < NR_PAGE_ORDERS; ++order) {
d7a5752c
MG
2310 fill_contig_page_info(zone, order, &info);
2311 index = unusable_free_index(order, &info);
2312 seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
2313 }
2314
2315 seq_putc(m, '\n');
2316}
2317
2318/*
2319 * Display unusable free space index
2320 *
2321 * The unusable free space index measures how much of the available free
2322 * memory cannot be used to satisfy an allocation of a given size and is a
2323 * value between 0 and 1. The higher the value, the more of free memory is
2324 * unusable and by implication, the worse the external fragmentation is. This
2325 * can be expressed as a percentage by multiplying by 100.
2326 */
2327static int unusable_show(struct seq_file *m, void *arg)
2328{
2329 pg_data_t *pgdat = (pg_data_t *)arg;
2330
2331 /* check memoryless node */
a47b53c5 2332 if (!node_state(pgdat->node_id, N_MEMORY))
d7a5752c
MG
2333 return 0;
2334
727c080f 2335 walk_zones_in_node(m, pgdat, true, false, unusable_show_print);
d7a5752c
MG
2336
2337 return 0;
2338}
2339
01a99560 2340static const struct seq_operations unusable_sops = {
d7a5752c
MG
2341 .start = frag_start,
2342 .next = frag_next,
2343 .stop = frag_stop,
2344 .show = unusable_show,
2345};
2346
01a99560 2347DEFINE_SEQ_ATTRIBUTE(unusable);
d7a5752c 2348
f1a5ab12
MG
2349static void extfrag_show_print(struct seq_file *m,
2350 pg_data_t *pgdat, struct zone *zone)
2351{
2352 unsigned int order;
2353 int index;
2354
2355 /* Alloc on stack as interrupts are disabled for zone walk */
2356 struct contig_page_info info;
2357
2358 seq_printf(m, "Node %d, zone %8s ",
2359 pgdat->node_id,
2360 zone->name);
fd377218 2361 for (order = 0; order < NR_PAGE_ORDERS; ++order) {
f1a5ab12 2362 fill_contig_page_info(zone, order, &info);
56de7263 2363 index = __fragmentation_index(order, &info);
a9970586 2364 seq_printf(m, "%2d.%03d ", index / 1000, index % 1000);
f1a5ab12
MG
2365 }
2366
2367 seq_putc(m, '\n');
2368}
2369
2370/*
2371 * Display fragmentation index for orders that allocations would fail for
2372 */
2373static int extfrag_show(struct seq_file *m, void *arg)
2374{
2375 pg_data_t *pgdat = (pg_data_t *)arg;
2376
727c080f 2377 walk_zones_in_node(m, pgdat, true, false, extfrag_show_print);
f1a5ab12
MG
2378
2379 return 0;
2380}
2381
01a99560 2382static const struct seq_operations extfrag_sops = {
f1a5ab12
MG
2383 .start = frag_start,
2384 .next = frag_next,
2385 .stop = frag_stop,
2386 .show = extfrag_show,
2387};
2388
01a99560 2389DEFINE_SEQ_ATTRIBUTE(extfrag);
f1a5ab12 2390
d7a5752c
MG
2391static int __init extfrag_debug_init(void)
2392{
bde8bd8a
S
2393 struct dentry *extfrag_debug_root;
2394
d7a5752c 2395 extfrag_debug_root = debugfs_create_dir("extfrag", NULL);
d7a5752c 2396
d9f7979c 2397 debugfs_create_file("unusable_index", 0444, extfrag_debug_root, NULL,
01a99560 2398 &unusable_fops);
d7a5752c 2399
d9f7979c 2400 debugfs_create_file("extfrag_index", 0444, extfrag_debug_root, NULL,
01a99560 2401 &extfrag_fops);
f1a5ab12 2402
d7a5752c
MG
2403 return 0;
2404}
2405
2406module_init(extfrag_debug_init);
15995a35 2407
d7a5752c 2408#endif