memcg: add per-memcg total kernel memory stat
authorYosry Ahmed <yosryahmed@google.com>
Wed, 16 Feb 2022 04:31:08 +0000 (15:31 +1100)
committerStephen Rothwell <sfr@canb.auug.org.au>
Wed, 16 Feb 2022 04:31:08 +0000 (15:31 +1100)
Currently memcg stats show several types of kernel memory: kernel stack,
page tables, sock, vmalloc, and slab.  However, there are other
allocations with __GFP_ACCOUNT (or supersets such as GFP_KERNEL_ACCOUNT)
that are not accounted in any of those stats, a few examples are:

- various kvm allocations (e.g. allocated pages to create vcpus)
- io_uring
- tmp_page in pipes during pipe_write()
- bpf ringbuffers
- unix sockets

Keeping track of the total kernel memory is essential for the ease of
migration from cgroup v1 to v2 as there are large discrepancies between
v1's kmem.usage_in_bytes and the sum of the available kernel memory stats
in v2.  Adding separate memcg stats for all __GFP_ACCOUNT kernel
allocations is an impractical maintenance burden as there a lot of those
all over the kernel code, with more use cases likely to show up in the
future.

Therefore, add a "kernel" memcg stat that is analogous to kmem page
counter, with added benefits such as using rstat infrastructure which
aggregates stats more efficiently.  Additionally, this provides a lighter
alternative in case the legacy kmem is deprecated in the future

Link: https://lkml.kernel.org/r/20220201200823.3283171-1-yosryahmed@google.com
Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
Acked-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Documentation/admin-guide/cgroup-v2.rst
include/linux/memcontrol.h
mm/memcontrol.c

index 5aa368d165dabebd08aec2f57cf31759b6f68382..a0027d570a7f332bcc916f5b5f0a416ffadf65cc 100644 (file)
@@ -1317,6 +1317,11 @@ PAGE_SIZE multiple when read back.
          vmalloc (npn)
                Amount of memory used for vmap backed memory.
 
+         kernel (npn)
+               Amount of total kernel memory, including
+               (kernel_stack, pagetables, percpu, vmalloc, slab) in
+               addition to other kernel memory use cases.
+
          shmem
                Amount of cached filesystem data that is swap-backed,
                such as tmpfs, shm segments, shared anonymous mmap()s
index 0abbd685703b9ac9a52df62b13a7b3c7b1f2d49f..8612d7dd0859443b938e882868734ae9ba8d577f 100644 (file)
@@ -34,6 +34,7 @@ enum memcg_stat_item {
        MEMCG_SOCK,
        MEMCG_PERCPU_B,
        MEMCG_VMALLOC,
+       MEMCG_KMEM,
        MEMCG_NR_STAT,
 };
 
index 209e66893da6ef171b6426604726170ec9fa127d..afece32e67774177077e47b21fd9128bf346c1c7 100644 (file)
@@ -1376,6 +1376,7 @@ static const struct memory_stat memory_stats[] = {
        { "percpu",                     MEMCG_PERCPU_B                  },
        { "sock",                       MEMCG_SOCK                      },
        { "vmalloc",                    MEMCG_VMALLOC                   },
+       { "kernel",                     MEMCG_KMEM                      },
        { "shmem",                      NR_SHMEM                        },
        { "file_mapped",                NR_FILE_MAPPED                  },
        { "file_dirty",                 NR_FILE_DIRTY                   },
@@ -2979,6 +2980,19 @@ static void memcg_free_cache_id(int id)
        ida_simple_remove(&memcg_cache_ida, id);
 }
 
+static void mem_cgroup_kmem_record(struct mem_cgroup *memcg,
+                                  int nr_pages)
+{
+       mod_memcg_state(memcg, MEMCG_KMEM, nr_pages);
+       if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) {
+               if (nr_pages > 0)
+                       page_counter_charge(&memcg->kmem, nr_pages);
+               else
+                       page_counter_uncharge(&memcg->kmem, -nr_pages);
+       }
+}
+
+
 /*
  * obj_cgroup_uncharge_pages: uncharge a number of kernel pages from a objcg
  * @objcg: object cgroup to uncharge
@@ -2991,8 +3005,7 @@ static void obj_cgroup_uncharge_pages(struct obj_cgroup *objcg,
 
        memcg = get_mem_cgroup_from_objcg(objcg);
 
-       if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
-               page_counter_uncharge(&memcg->kmem, nr_pages);
+       mem_cgroup_kmem_record(memcg, -nr_pages);
        refill_stock(memcg, nr_pages);
 
        css_put(&memcg->css);
@@ -3018,8 +3031,7 @@ static int obj_cgroup_charge_pages(struct obj_cgroup *objcg, gfp_t gfp,
        if (ret)
                goto out;
 
-       if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
-               page_counter_charge(&memcg->kmem, nr_pages);
+       mem_cgroup_kmem_record(memcg, nr_pages);
 out:
        css_put(&memcg->css);
 
@@ -6801,8 +6813,8 @@ static void uncharge_batch(const struct uncharge_gather *ug)
                page_counter_uncharge(&ug->memcg->memory, ug->nr_memory);
                if (do_memsw_account())
                        page_counter_uncharge(&ug->memcg->memsw, ug->nr_memory);
-               if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && ug->nr_kmem)
-                       page_counter_uncharge(&ug->memcg->kmem, ug->nr_kmem);
+               if (ug->nr_kmem)
+                       mem_cgroup_kmem_record(ug->memcg, -ug->nr_kmem);
                memcg_oom_recover(ug->memcg);
        }