Commit | Line | Data |
---|---|---|
f6ac2354 CL |
1 | /* |
2 | * linux/mm/vmstat.c | |
3 | * | |
4 | * Manages VM statistics | |
5 | * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds | |
2244b95a CL |
6 | * |
7 | * zoned VM statistics | |
8 | * Copyright (C) 2006 Silicon Graphics, Inc., | |
9 | * Christoph Lameter <christoph@lameter.com> | |
f6ac2354 CL |
10 | */ |
11 | ||
12 | #include <linux/config.h> | |
13 | #include <linux/mm.h> | |
2244b95a | 14 | #include <linux/module.h> |
f6ac2354 CL |
15 | |
16 | /* | |
17 | * Accumulate the page_state information across all CPUs. | |
18 | * The result is unavoidably approximate - it can change | |
19 | * during and after execution of this function. | |
20 | */ | |
21 | DEFINE_PER_CPU(struct page_state, page_states) = {0}; | |
22 | ||
f6ac2354 CL |
23 | static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask) |
24 | { | |
25 | unsigned cpu; | |
26 | ||
27 | memset(ret, 0, nr * sizeof(unsigned long)); | |
28 | cpus_and(*cpumask, *cpumask, cpu_online_map); | |
29 | ||
30 | for_each_cpu_mask(cpu, *cpumask) { | |
31 | unsigned long *in; | |
32 | unsigned long *out; | |
33 | unsigned off; | |
34 | unsigned next_cpu; | |
35 | ||
36 | in = (unsigned long *)&per_cpu(page_states, cpu); | |
37 | ||
38 | next_cpu = next_cpu(cpu, *cpumask); | |
39 | if (likely(next_cpu < NR_CPUS)) | |
40 | prefetch(&per_cpu(page_states, next_cpu)); | |
41 | ||
42 | out = (unsigned long *)ret; | |
43 | for (off = 0; off < nr; off++) | |
44 | *out++ += *in++; | |
45 | } | |
46 | } | |
47 | ||
f6ac2354 CL |
48 | void get_full_page_state(struct page_state *ret) |
49 | { | |
50 | cpumask_t mask = CPU_MASK_ALL; | |
51 | ||
52 | __get_page_state(ret, sizeof(*ret) / sizeof(unsigned long), &mask); | |
53 | } | |
54 | ||
f6ac2354 CL |
55 | void __mod_page_state_offset(unsigned long offset, unsigned long delta) |
56 | { | |
57 | void *ptr; | |
58 | ||
59 | ptr = &__get_cpu_var(page_states); | |
60 | *(unsigned long *)(ptr + offset) += delta; | |
61 | } | |
62 | EXPORT_SYMBOL(__mod_page_state_offset); | |
63 | ||
64 | void mod_page_state_offset(unsigned long offset, unsigned long delta) | |
65 | { | |
66 | unsigned long flags; | |
67 | void *ptr; | |
68 | ||
69 | local_irq_save(flags); | |
70 | ptr = &__get_cpu_var(page_states); | |
71 | *(unsigned long *)(ptr + offset) += delta; | |
72 | local_irq_restore(flags); | |
73 | } | |
74 | EXPORT_SYMBOL(mod_page_state_offset); | |
75 | ||
76 | void __get_zone_counts(unsigned long *active, unsigned long *inactive, | |
77 | unsigned long *free, struct pglist_data *pgdat) | |
78 | { | |
79 | struct zone *zones = pgdat->node_zones; | |
80 | int i; | |
81 | ||
82 | *active = 0; | |
83 | *inactive = 0; | |
84 | *free = 0; | |
85 | for (i = 0; i < MAX_NR_ZONES; i++) { | |
86 | *active += zones[i].nr_active; | |
87 | *inactive += zones[i].nr_inactive; | |
88 | *free += zones[i].free_pages; | |
89 | } | |
90 | } | |
91 | ||
92 | void get_zone_counts(unsigned long *active, | |
93 | unsigned long *inactive, unsigned long *free) | |
94 | { | |
95 | struct pglist_data *pgdat; | |
96 | ||
97 | *active = 0; | |
98 | *inactive = 0; | |
99 | *free = 0; | |
100 | for_each_online_pgdat(pgdat) { | |
101 | unsigned long l, m, n; | |
102 | __get_zone_counts(&l, &m, &n, pgdat); | |
103 | *active += l; | |
104 | *inactive += m; | |
105 | *free += n; | |
106 | } | |
107 | } | |
108 | ||
2244b95a CL |
109 | /* |
110 | * Manage combined zone based / global counters | |
111 | * | |
112 | * vm_stat contains the global counters | |
113 | */ | |
114 | atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; | |
115 | EXPORT_SYMBOL(vm_stat); | |
116 | ||
117 | #ifdef CONFIG_SMP | |
118 | ||
119 | #define STAT_THRESHOLD 32 | |
120 | ||
121 | /* | |
122 | * Determine pointer to currently valid differential byte given a zone and | |
123 | * the item number. | |
124 | * | |
125 | * Preemption must be off | |
126 | */ | |
127 | static inline s8 *diff_pointer(struct zone *zone, enum zone_stat_item item) | |
128 | { | |
129 | return &zone_pcp(zone, smp_processor_id())->vm_stat_diff[item]; | |
130 | } | |
131 | ||
132 | /* | |
133 | * For use when we know that interrupts are disabled. | |
134 | */ | |
135 | void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, | |
136 | int delta) | |
137 | { | |
138 | s8 *p; | |
139 | long x; | |
140 | ||
141 | p = diff_pointer(zone, item); | |
142 | x = delta + *p; | |
143 | ||
144 | if (unlikely(x > STAT_THRESHOLD || x < -STAT_THRESHOLD)) { | |
145 | zone_page_state_add(x, zone, item); | |
146 | x = 0; | |
147 | } | |
148 | ||
149 | *p = x; | |
150 | } | |
151 | EXPORT_SYMBOL(__mod_zone_page_state); | |
152 | ||
153 | /* | |
154 | * For an unknown interrupt state | |
155 | */ | |
156 | void mod_zone_page_state(struct zone *zone, enum zone_stat_item item, | |
157 | int delta) | |
158 | { | |
159 | unsigned long flags; | |
160 | ||
161 | local_irq_save(flags); | |
162 | __mod_zone_page_state(zone, item, delta); | |
163 | local_irq_restore(flags); | |
164 | } | |
165 | EXPORT_SYMBOL(mod_zone_page_state); | |
166 | ||
167 | /* | |
168 | * Optimized increment and decrement functions. | |
169 | * | |
170 | * These are only for a single page and therefore can take a struct page * | |
171 | * argument instead of struct zone *. This allows the inclusion of the code | |
172 | * generated for page_zone(page) into the optimized functions. | |
173 | * | |
174 | * No overflow check is necessary and therefore the differential can be | |
175 | * incremented or decremented in place which may allow the compilers to | |
176 | * generate better code. | |
177 | * | |
178 | * The increment or decrement is known and therefore one boundary check can | |
179 | * be omitted. | |
180 | * | |
181 | * Some processors have inc/dec instructions that are atomic vs an interrupt. | |
182 | * However, the code must first determine the differential location in a zone | |
183 | * based on the processor number and then inc/dec the counter. There is no | |
184 | * guarantee without disabling preemption that the processor will not change | |
185 | * in between and therefore the atomicity vs. interrupt cannot be exploited | |
186 | * in a useful way here. | |
187 | */ | |
ca889e6c | 188 | static void __inc_zone_state(struct zone *zone, enum zone_stat_item item) |
2244b95a | 189 | { |
2244b95a CL |
190 | s8 *p = diff_pointer(zone, item); |
191 | ||
192 | (*p)++; | |
193 | ||
194 | if (unlikely(*p > STAT_THRESHOLD)) { | |
195 | zone_page_state_add(*p, zone, item); | |
196 | *p = 0; | |
197 | } | |
198 | } | |
ca889e6c CL |
199 | |
200 | void __inc_zone_page_state(struct page *page, enum zone_stat_item item) | |
201 | { | |
202 | __inc_zone_state(page_zone(page), item); | |
203 | } | |
2244b95a CL |
204 | EXPORT_SYMBOL(__inc_zone_page_state); |
205 | ||
206 | void __dec_zone_page_state(struct page *page, enum zone_stat_item item) | |
207 | { | |
208 | struct zone *zone = page_zone(page); | |
209 | s8 *p = diff_pointer(zone, item); | |
210 | ||
211 | (*p)--; | |
212 | ||
213 | if (unlikely(*p < -STAT_THRESHOLD)) { | |
214 | zone_page_state_add(*p, zone, item); | |
215 | *p = 0; | |
216 | } | |
217 | } | |
218 | EXPORT_SYMBOL(__dec_zone_page_state); | |
219 | ||
ca889e6c CL |
220 | void inc_zone_state(struct zone *zone, enum zone_stat_item item) |
221 | { | |
222 | unsigned long flags; | |
223 | ||
224 | local_irq_save(flags); | |
225 | __inc_zone_state(zone, item); | |
226 | local_irq_restore(flags); | |
227 | } | |
228 | ||
2244b95a CL |
229 | void inc_zone_page_state(struct page *page, enum zone_stat_item item) |
230 | { | |
231 | unsigned long flags; | |
232 | struct zone *zone; | |
2244b95a CL |
233 | |
234 | zone = page_zone(page); | |
235 | local_irq_save(flags); | |
ca889e6c | 236 | __inc_zone_state(zone, item); |
2244b95a CL |
237 | local_irq_restore(flags); |
238 | } | |
239 | EXPORT_SYMBOL(inc_zone_page_state); | |
240 | ||
241 | void dec_zone_page_state(struct page *page, enum zone_stat_item item) | |
242 | { | |
243 | unsigned long flags; | |
244 | struct zone *zone; | |
245 | s8 *p; | |
246 | ||
247 | zone = page_zone(page); | |
248 | local_irq_save(flags); | |
249 | p = diff_pointer(zone, item); | |
250 | ||
251 | (*p)--; | |
252 | ||
253 | if (unlikely(*p < -STAT_THRESHOLD)) { | |
254 | zone_page_state_add(*p, zone, item); | |
255 | *p = 0; | |
256 | } | |
257 | local_irq_restore(flags); | |
258 | } | |
259 | EXPORT_SYMBOL(dec_zone_page_state); | |
260 | ||
261 | /* | |
262 | * Update the zone counters for one cpu. | |
263 | */ | |
264 | void refresh_cpu_vm_stats(int cpu) | |
265 | { | |
266 | struct zone *zone; | |
267 | int i; | |
268 | unsigned long flags; | |
269 | ||
270 | for_each_zone(zone) { | |
271 | struct per_cpu_pageset *pcp; | |
272 | ||
273 | pcp = zone_pcp(zone, cpu); | |
274 | ||
275 | for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) | |
276 | if (pcp->vm_stat_diff[i]) { | |
277 | local_irq_save(flags); | |
278 | zone_page_state_add(pcp->vm_stat_diff[i], | |
279 | zone, i); | |
280 | pcp->vm_stat_diff[i] = 0; | |
281 | local_irq_restore(flags); | |
282 | } | |
283 | } | |
284 | } | |
285 | ||
286 | static void __refresh_cpu_vm_stats(void *dummy) | |
287 | { | |
288 | refresh_cpu_vm_stats(smp_processor_id()); | |
289 | } | |
290 | ||
291 | /* | |
292 | * Consolidate all counters. | |
293 | * | |
294 | * Note that the result is less inaccurate but still inaccurate | |
295 | * if concurrent processes are allowed to run. | |
296 | */ | |
297 | void refresh_vm_stats(void) | |
298 | { | |
299 | on_each_cpu(__refresh_cpu_vm_stats, NULL, 0, 1); | |
300 | } | |
301 | EXPORT_SYMBOL(refresh_vm_stats); | |
302 | ||
303 | #endif | |
304 | ||
ca889e6c CL |
305 | #ifdef CONFIG_NUMA |
306 | /* | |
307 | * zonelist = the list of zones passed to the allocator | |
308 | * z = the zone from which the allocation occurred. | |
309 | * | |
310 | * Must be called with interrupts disabled. | |
311 | */ | |
312 | void zone_statistics(struct zonelist *zonelist, struct zone *z) | |
313 | { | |
314 | if (z->zone_pgdat == zonelist->zones[0]->zone_pgdat) { | |
315 | __inc_zone_state(z, NUMA_HIT); | |
316 | } else { | |
317 | __inc_zone_state(z, NUMA_MISS); | |
318 | __inc_zone_state(zonelist->zones[0], NUMA_FOREIGN); | |
319 | } | |
320 | if (z->zone_pgdat == NODE_DATA(numa_node_id())) | |
321 | __inc_zone_state(z, NUMA_LOCAL); | |
322 | else | |
323 | __inc_zone_state(z, NUMA_OTHER); | |
324 | } | |
325 | #endif | |
326 | ||
f6ac2354 CL |
327 | #ifdef CONFIG_PROC_FS |
328 | ||
329 | #include <linux/seq_file.h> | |
330 | ||
331 | static void *frag_start(struct seq_file *m, loff_t *pos) | |
332 | { | |
333 | pg_data_t *pgdat; | |
334 | loff_t node = *pos; | |
335 | for (pgdat = first_online_pgdat(); | |
336 | pgdat && node; | |
337 | pgdat = next_online_pgdat(pgdat)) | |
338 | --node; | |
339 | ||
340 | return pgdat; | |
341 | } | |
342 | ||
343 | static void *frag_next(struct seq_file *m, void *arg, loff_t *pos) | |
344 | { | |
345 | pg_data_t *pgdat = (pg_data_t *)arg; | |
346 | ||
347 | (*pos)++; | |
348 | return next_online_pgdat(pgdat); | |
349 | } | |
350 | ||
351 | static void frag_stop(struct seq_file *m, void *arg) | |
352 | { | |
353 | } | |
354 | ||
355 | /* | |
356 | * This walks the free areas for each zone. | |
357 | */ | |
358 | static int frag_show(struct seq_file *m, void *arg) | |
359 | { | |
360 | pg_data_t *pgdat = (pg_data_t *)arg; | |
361 | struct zone *zone; | |
362 | struct zone *node_zones = pgdat->node_zones; | |
363 | unsigned long flags; | |
364 | int order; | |
365 | ||
366 | for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) { | |
367 | if (!populated_zone(zone)) | |
368 | continue; | |
369 | ||
370 | spin_lock_irqsave(&zone->lock, flags); | |
371 | seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); | |
372 | for (order = 0; order < MAX_ORDER; ++order) | |
373 | seq_printf(m, "%6lu ", zone->free_area[order].nr_free); | |
374 | spin_unlock_irqrestore(&zone->lock, flags); | |
375 | seq_putc(m, '\n'); | |
376 | } | |
377 | return 0; | |
378 | } | |
379 | ||
380 | struct seq_operations fragmentation_op = { | |
381 | .start = frag_start, | |
382 | .next = frag_next, | |
383 | .stop = frag_stop, | |
384 | .show = frag_show, | |
385 | }; | |
386 | ||
387 | static char *vmstat_text[] = { | |
2244b95a | 388 | /* Zoned VM counters */ |
f3dbd344 | 389 | "nr_anon_pages", |
65ba55f5 | 390 | "nr_mapped", |
347ce434 | 391 | "nr_file_pages", |
9a865ffa | 392 | "nr_slab", |
df849a15 | 393 | "nr_page_table_pages", |
b1e7a8fd | 394 | "nr_dirty", |
ce866b34 | 395 | "nr_writeback", |
f6ac2354 | 396 | "nr_unstable", |
d2c5e30c | 397 | "nr_bounce", |
f6ac2354 | 398 | |
ca889e6c CL |
399 | #ifdef CONFIG_NUMA |
400 | "numa_hit", | |
401 | "numa_miss", | |
402 | "numa_foreign", | |
403 | "numa_interleave", | |
404 | "numa_local", | |
405 | "numa_other", | |
406 | #endif | |
407 | ||
fd39fc85 | 408 | /* Event counters */ |
f6ac2354 CL |
409 | "pgpgin", |
410 | "pgpgout", | |
411 | "pswpin", | |
412 | "pswpout", | |
413 | ||
414 | "pgalloc_high", | |
415 | "pgalloc_normal", | |
416 | "pgalloc_dma32", | |
417 | "pgalloc_dma", | |
418 | ||
419 | "pgfree", | |
420 | "pgactivate", | |
421 | "pgdeactivate", | |
422 | ||
423 | "pgfault", | |
424 | "pgmajfault", | |
425 | ||
426 | "pgrefill_high", | |
427 | "pgrefill_normal", | |
428 | "pgrefill_dma32", | |
429 | "pgrefill_dma", | |
430 | ||
431 | "pgsteal_high", | |
432 | "pgsteal_normal", | |
433 | "pgsteal_dma32", | |
434 | "pgsteal_dma", | |
435 | ||
436 | "pgscan_kswapd_high", | |
437 | "pgscan_kswapd_normal", | |
438 | "pgscan_kswapd_dma32", | |
439 | "pgscan_kswapd_dma", | |
440 | ||
441 | "pgscan_direct_high", | |
442 | "pgscan_direct_normal", | |
443 | "pgscan_direct_dma32", | |
444 | "pgscan_direct_dma", | |
445 | ||
446 | "pginodesteal", | |
447 | "slabs_scanned", | |
448 | "kswapd_steal", | |
449 | "kswapd_inodesteal", | |
450 | "pageoutrun", | |
451 | "allocstall", | |
452 | ||
453 | "pgrotated", | |
f6ac2354 CL |
454 | }; |
455 | ||
456 | /* | |
457 | * Output information about zones in @pgdat. | |
458 | */ | |
459 | static int zoneinfo_show(struct seq_file *m, void *arg) | |
460 | { | |
461 | pg_data_t *pgdat = arg; | |
462 | struct zone *zone; | |
463 | struct zone *node_zones = pgdat->node_zones; | |
464 | unsigned long flags; | |
465 | ||
466 | for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; zone++) { | |
467 | int i; | |
468 | ||
469 | if (!populated_zone(zone)) | |
470 | continue; | |
471 | ||
472 | spin_lock_irqsave(&zone->lock, flags); | |
473 | seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name); | |
474 | seq_printf(m, | |
475 | "\n pages free %lu" | |
476 | "\n min %lu" | |
477 | "\n low %lu" | |
478 | "\n high %lu" | |
479 | "\n active %lu" | |
480 | "\n inactive %lu" | |
481 | "\n scanned %lu (a: %lu i: %lu)" | |
482 | "\n spanned %lu" | |
483 | "\n present %lu", | |
484 | zone->free_pages, | |
485 | zone->pages_min, | |
486 | zone->pages_low, | |
487 | zone->pages_high, | |
488 | zone->nr_active, | |
489 | zone->nr_inactive, | |
490 | zone->pages_scanned, | |
491 | zone->nr_scan_active, zone->nr_scan_inactive, | |
492 | zone->spanned_pages, | |
493 | zone->present_pages); | |
2244b95a CL |
494 | |
495 | for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) | |
496 | seq_printf(m, "\n %-12s %lu", vmstat_text[i], | |
497 | zone_page_state(zone, i)); | |
498 | ||
f6ac2354 CL |
499 | seq_printf(m, |
500 | "\n protection: (%lu", | |
501 | zone->lowmem_reserve[0]); | |
502 | for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++) | |
503 | seq_printf(m, ", %lu", zone->lowmem_reserve[i]); | |
504 | seq_printf(m, | |
505 | ")" | |
506 | "\n pagesets"); | |
507 | for_each_online_cpu(i) { | |
508 | struct per_cpu_pageset *pageset; | |
509 | int j; | |
510 | ||
511 | pageset = zone_pcp(zone, i); | |
512 | for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) { | |
513 | if (pageset->pcp[j].count) | |
514 | break; | |
515 | } | |
516 | if (j == ARRAY_SIZE(pageset->pcp)) | |
517 | continue; | |
518 | for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) { | |
519 | seq_printf(m, | |
520 | "\n cpu: %i pcp: %i" | |
521 | "\n count: %i" | |
522 | "\n high: %i" | |
523 | "\n batch: %i", | |
524 | i, j, | |
525 | pageset->pcp[j].count, | |
526 | pageset->pcp[j].high, | |
527 | pageset->pcp[j].batch); | |
528 | } | |
f6ac2354 CL |
529 | } |
530 | seq_printf(m, | |
531 | "\n all_unreclaimable: %u" | |
532 | "\n prev_priority: %i" | |
533 | "\n temp_priority: %i" | |
534 | "\n start_pfn: %lu", | |
535 | zone->all_unreclaimable, | |
536 | zone->prev_priority, | |
537 | zone->temp_priority, | |
538 | zone->zone_start_pfn); | |
539 | spin_unlock_irqrestore(&zone->lock, flags); | |
540 | seq_putc(m, '\n'); | |
541 | } | |
542 | return 0; | |
543 | } | |
544 | ||
545 | struct seq_operations zoneinfo_op = { | |
546 | .start = frag_start, /* iterate over all zones. The same as in | |
547 | * fragmentation. */ | |
548 | .next = frag_next, | |
549 | .stop = frag_stop, | |
550 | .show = zoneinfo_show, | |
551 | }; | |
552 | ||
553 | static void *vmstat_start(struct seq_file *m, loff_t *pos) | |
554 | { | |
2244b95a | 555 | unsigned long *v; |
f6ac2354 | 556 | struct page_state *ps; |
2244b95a | 557 | int i; |
f6ac2354 CL |
558 | |
559 | if (*pos >= ARRAY_SIZE(vmstat_text)) | |
560 | return NULL; | |
561 | ||
2244b95a CL |
562 | v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) |
563 | + sizeof(*ps), GFP_KERNEL); | |
564 | m->private = v; | |
565 | if (!v) | |
f6ac2354 | 566 | return ERR_PTR(-ENOMEM); |
2244b95a CL |
567 | for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) |
568 | v[i] = global_page_state(i); | |
569 | ps = (struct page_state *)(v + NR_VM_ZONE_STAT_ITEMS); | |
f6ac2354 CL |
570 | get_full_page_state(ps); |
571 | ps->pgpgin /= 2; /* sectors -> kbytes */ | |
572 | ps->pgpgout /= 2; | |
2244b95a | 573 | return v + *pos; |
f6ac2354 CL |
574 | } |
575 | ||
576 | static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos) | |
577 | { | |
578 | (*pos)++; | |
579 | if (*pos >= ARRAY_SIZE(vmstat_text)) | |
580 | return NULL; | |
581 | return (unsigned long *)m->private + *pos; | |
582 | } | |
583 | ||
584 | static int vmstat_show(struct seq_file *m, void *arg) | |
585 | { | |
586 | unsigned long *l = arg; | |
587 | unsigned long off = l - (unsigned long *)m->private; | |
588 | ||
589 | seq_printf(m, "%s %lu\n", vmstat_text[off], *l); | |
590 | return 0; | |
591 | } | |
592 | ||
593 | static void vmstat_stop(struct seq_file *m, void *arg) | |
594 | { | |
595 | kfree(m->private); | |
596 | m->private = NULL; | |
597 | } | |
598 | ||
599 | struct seq_operations vmstat_op = { | |
600 | .start = vmstat_start, | |
601 | .next = vmstat_next, | |
602 | .stop = vmstat_stop, | |
603 | .show = vmstat_show, | |
604 | }; | |
605 | ||
606 | #endif /* CONFIG_PROC_FS */ | |
607 |