From 2262185c5b287f2758afda79c149b7cf6bee165c Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Thu, 6 Jul 2017 15:40:25 -0700 Subject: [PATCH] mm: per-cgroup memory reclaim stats Track the following reclaim counters for every memory cgroup: PGREFILL, PGSCAN, PGSTEAL, PGACTIVATE, PGDEACTIVATE, PGLAZYFREE and PGLAZYFREED. These values are exposed using the memory.stats interface of cgroup v2. The meaning of each value is the same as for global counters, available using /proc/vmstat. Also, for consistency, rename mem_cgroup_count_vm_event() to count_memcg_event_mm(). Link: http://lkml.kernel.org/r/1494530183-30808-1-git-send-email-guro@fb.com Signed-off-by: Roman Gushchin Suggested-by: Johannes Weiner Acked-by: Michal Hocko Acked-by: Vladimir Davydov Acked-by: Johannes Weiner Cc: Tejun Heo Cc: Li Zefan Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cgroup-v2.txt | 28 ++++++++++++++++++++++ fs/dax.c | 2 +- fs/ncpfs/mmap.c | 2 +- include/linux/memcontrol.h | 48 ++++++++++++++++++++++++++++++++++--- mm/filemap.c | 2 +- mm/memcontrol.c | 10 ++++++++ mm/memory.c | 4 ++-- mm/shmem.c | 3 +-- mm/swap.c | 1 + mm/vmscan.c | 30 +++++++++++++++++------ 10 files changed, 113 insertions(+), 17 deletions(-) diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt index 558c3a739baf..5ac2fbde97e6 100644 --- a/Documentation/cgroup-v2.txt +++ b/Documentation/cgroup-v2.txt @@ -956,6 +956,34 @@ PAGE_SIZE multiple when read back. Number of times a shadow node has been reclaimed + pgrefill + + Amount of scanned pages (in an active LRU list) + + pgscan + + Amount of scanned pages (in an inactive LRU list) + + pgsteal + + Amount of reclaimed pages + + pgactivate + + Amount of pages moved to the active LRU list + + pgdeactivate + + Amount of pages moved to the inactive LRU lis + + pglazyfree + + Amount of pages postponed to be freed under memory pressure + + pglazyfreed + + Amount of reclaimed lazyfree pages + memory.swap.current A read-only single value file which exists on non-root diff --git a/fs/dax.c b/fs/dax.c index 33d05aa02aad..cd8fced592d0 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1213,7 +1213,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, case IOMAP_MAPPED: if (iomap.flags & IOMAP_F_NEW) { count_vm_event(PGMAJFAULT); - mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT); + count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT); major = VM_FAULT_MAJOR; } error = dax_insert_mapping(mapping, iomap.bdev, iomap.dax_dev, diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c index 0c3905e0542e..6719c0be674d 100644 --- a/fs/ncpfs/mmap.c +++ b/fs/ncpfs/mmap.c @@ -89,7 +89,7 @@ static int ncp_file_mmap_fault(struct vm_fault *vmf) * -- nyc */ count_vm_event(PGMAJFAULT); - mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT); + count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT); return VM_FAULT_MAJOR; } diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 899949bbb2f9..b2a5b1cd4e55 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -357,6 +357,17 @@ static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) } struct mem_cgroup *mem_cgroup_from_id(unsigned short id); +static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec) +{ + struct mem_cgroup_per_node *mz; + + if (mem_cgroup_disabled()) + return NULL; + + mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec); + return mz->memcg; +} + /** * parent_mem_cgroup - find the accounting parent of a memcg * @memcg: memcg whose parent to find @@ -546,8 +557,23 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, gfp_t gfp_mask, unsigned long *total_scanned); -static inline void mem_cgroup_count_vm_event(struct mm_struct *mm, - enum vm_event_item idx) +static inline void count_memcg_events(struct mem_cgroup *memcg, + enum vm_event_item idx, + unsigned long count) +{ + if (!mem_cgroup_disabled()) + this_cpu_add(memcg->stat->events[idx], count); +} + +static inline void count_memcg_page_event(struct page *page, + enum memcg_stat_item idx) +{ + if (page->mem_cgroup) + count_memcg_events(page->mem_cgroup, idx, 1); +} + +static inline void count_memcg_event_mm(struct mm_struct *mm, + enum vm_event_item idx) { struct mem_cgroup *memcg; @@ -675,6 +701,11 @@ static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id) return NULL; } +static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec) +{ + return NULL; +} + static inline bool mem_cgroup_online(struct mem_cgroup *memcg) { return true; @@ -789,8 +820,19 @@ static inline void mem_cgroup_split_huge_fixup(struct page *head) { } +static inline void count_memcg_events(struct mem_cgroup *memcg, + enum vm_event_item idx, + unsigned long count) +{ +} + +static inline void count_memcg_page_event(struct page *page, + enum memcg_stat_item idx) +{ +} + static inline -void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx) +void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx) { } #endif /* CONFIG_MEMCG */ diff --git a/mm/filemap.c b/mm/filemap.c index aea58e983a73..2e906ef52143 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2265,7 +2265,7 @@ int filemap_fault(struct vm_fault *vmf) /* No page in the page cache at all */ do_sync_mmap_readahead(vmf->vma, ra, file, offset); count_vm_event(PGMAJFAULT); - mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT); + count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT); ret = VM_FAULT_MAJOR; retry_find: page = find_get_page(mapping, offset); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index fc51a33ddcd1..3e2f8cf85b4c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5230,6 +5230,16 @@ static int memory_stat_show(struct seq_file *m, void *v) seq_printf(m, "pgfault %lu\n", events[PGFAULT]); seq_printf(m, "pgmajfault %lu\n", events[PGMAJFAULT]); + seq_printf(m, "pgrefill %lu\n", events[PGREFILL]); + seq_printf(m, "pgscan %lu\n", events[PGSCAN_KSWAPD] + + events[PGSCAN_DIRECT]); + seq_printf(m, "pgsteal %lu\n", events[PGSTEAL_KSWAPD] + + events[PGSTEAL_DIRECT]); + seq_printf(m, "pgactivate %lu\n", events[PGACTIVATE]); + seq_printf(m, "pgdeactivate %lu\n", events[PGDEACTIVATE]); + seq_printf(m, "pglazyfree %lu\n", events[PGLAZYFREE]); + seq_printf(m, "pglazyfreed %lu\n", events[PGLAZYFREED]); + seq_printf(m, "workingset_refault %lu\n", stat[WORKINGSET_REFAULT]); seq_printf(m, "workingset_activate %lu\n", diff --git a/mm/memory.c b/mm/memory.c index bf3aab1684e9..e31dd97e6114 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2719,7 +2719,7 @@ int do_swap_page(struct vm_fault *vmf) /* Had to read the page from swap area: Major fault */ ret = VM_FAULT_MAJOR; count_vm_event(PGMAJFAULT); - mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT); + count_memcg_event_mm(vma->vm_mm, PGMAJFAULT); } else if (PageHWPoison(page)) { /* * hwpoisoned dirty swapcache pages are kept for killing @@ -3837,7 +3837,7 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address, __set_current_state(TASK_RUNNING); count_vm_event(PGFAULT); - mem_cgroup_count_vm_event(vma->vm_mm, PGFAULT); + count_memcg_event_mm(vma->vm_mm, PGFAULT); /* do counter updates before entering really critical section. */ check_sync_rss_stat(current); diff --git a/mm/shmem.c b/mm/shmem.c index a06f23731d3f..9418f5a9bc46 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1646,8 +1646,7 @@ repeat: if (fault_type) { *fault_type |= VM_FAULT_MAJOR; count_vm_event(PGMAJFAULT); - mem_cgroup_count_vm_event(charge_mm, - PGMAJFAULT); + count_memcg_event_mm(charge_mm, PGMAJFAULT); } /* Here we actually start the io */ page = shmem_swapin(swap, gfp, info, index); diff --git a/mm/swap.c b/mm/swap.c index 98d08b4579fa..4f44dbd7f780 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -591,6 +591,7 @@ static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec, add_page_to_lru_list(page, lruvec, LRU_INACTIVE_FILE); __count_vm_events(PGLAZYFREE, hpage_nr_pages(page)); + count_memcg_page_event(page, PGLAZYFREE); update_page_reclaim_stat(lruvec, 1, 0); } } diff --git a/mm/vmscan.c b/mm/vmscan.c index aebb157258f2..7d3c6c59897c 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1294,6 +1294,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, } count_vm_event(PGLAZYFREED); + count_memcg_page_event(page, PGLAZYFREED); } else if (!mapping || !__remove_mapping(mapping, page, true)) goto keep_locked; /* @@ -1323,6 +1324,7 @@ activate_locked: if (!PageMlocked(page)) { SetPageActive(page); pgactivate++; + count_memcg_page_event(page, PGACTIVATE); } keep_locked: unlock_page(page); @@ -1762,11 +1764,16 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, nr_taken); reclaim_stat->recent_scanned[file] += nr_taken; - if (global_reclaim(sc)) { - if (current_is_kswapd()) + if (current_is_kswapd()) { + if (global_reclaim(sc)) __count_vm_events(PGSCAN_KSWAPD, nr_scanned); - else + count_memcg_events(lruvec_memcg(lruvec), PGSCAN_KSWAPD, + nr_scanned); + } else { + if (global_reclaim(sc)) __count_vm_events(PGSCAN_DIRECT, nr_scanned); + count_memcg_events(lruvec_memcg(lruvec), PGSCAN_DIRECT, + nr_scanned); } spin_unlock_irq(&pgdat->lru_lock); @@ -1778,11 +1785,16 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, spin_lock_irq(&pgdat->lru_lock); - if (global_reclaim(sc)) { - if (current_is_kswapd()) + if (current_is_kswapd()) { + if (global_reclaim(sc)) __count_vm_events(PGSTEAL_KSWAPD, nr_reclaimed); - else + count_memcg_events(lruvec_memcg(lruvec), PGSTEAL_KSWAPD, + nr_reclaimed); + } else { + if (global_reclaim(sc)) __count_vm_events(PGSTEAL_DIRECT, nr_reclaimed); + count_memcg_events(lruvec_memcg(lruvec), PGSTEAL_DIRECT, + nr_reclaimed); } putback_inactive_pages(lruvec, &page_list); @@ -1927,8 +1939,11 @@ static unsigned move_active_pages_to_lru(struct lruvec *lruvec, } } - if (!is_active_lru(lru)) + if (!is_active_lru(lru)) { __count_vm_events(PGDEACTIVATE, nr_moved); + count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE, + nr_moved); + } return nr_moved; } @@ -1966,6 +1981,7 @@ static void shrink_active_list(unsigned long nr_to_scan, reclaim_stat->recent_scanned[file] += nr_taken; __count_vm_events(PGREFILL, nr_scanned); + count_memcg_events(lruvec_memcg(lruvec), PGREFILL, nr_scanned); spin_unlock_irq(&pgdat->lru_lock); -- 2.25.1