bpf: Do not allocate percpu memory at init stage
authorYonghong Song <yonghong.song@linux.dev>
Sat, 11 Nov 2023 01:39:28 +0000 (17:39 -0800)
committerAlexei Starovoitov <ast@kernel.org>
Wed, 15 Nov 2023 15:51:06 +0000 (07:51 -0800)
Kirill Shutemov reported significant percpu memory consumption increase after
booting in 288-cpu VM ([1]) due to commit 41a5db8d8161 ("bpf: Add support for
non-fix-size percpu mem allocation"). The percpu memory consumption is
increased from 111MB to 969MB. The number is from /proc/meminfo.

I tried to reproduce the issue with my local VM which at most supports upto
255 cpus. With 252 cpus, without the above commit, the percpu memory
consumption immediately after boot is 57MB while with the above commit the
percpu memory consumption is 231MB.

This is not good since so far percpu memory from bpf memory allocator is not
widely used yet. Let us change pre-allocation in init stage to on-demand
allocation when verifier detects there is a need of percpu memory for bpf
program. With this change, percpu memory consumption after boot can be reduced
signicantly.

  [1] https://lore.kernel.org/lkml/20231109154934.4saimljtqx625l3v@box.shutemov.name/

Fixes: 41a5db8d8161 ("bpf: Add support for non-fix-size percpu mem allocation")
Reported-and-tested-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
Acked-by: Hou Tao <houtao1@huawei.com>
Link: https://lore.kernel.org/r/20231111013928.948838-1-yonghong.song@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
include/linux/bpf.h
kernel/bpf/core.c
kernel/bpf/verifier.c

index 35bff17396c0b34fcfa41e0b31393456471296e2..6762dac3ef76153fe96bbd05a1050b9a31d1a43d 100644 (file)
@@ -56,7 +56,7 @@ extern struct idr btf_idr;
 extern spinlock_t btf_idr_lock;
 extern struct kobject *btf_kobj;
 extern struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma;
-extern bool bpf_global_ma_set, bpf_global_percpu_ma_set;
+extern bool bpf_global_ma_set;
 
 typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64);
 typedef int (*bpf_iter_init_seq_priv_t)(void *private_data,
index 08626b519ce23fce3cefb585e9aa246e9e43025b..cd3afe57ece3cc9a5a52c20243bdafd7fa987f4f 100644 (file)
@@ -64,8 +64,8 @@
 #define OFF    insn->off
 #define IMM    insn->imm
 
-struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma;
-bool bpf_global_ma_set, bpf_global_percpu_ma_set;
+struct bpf_mem_alloc bpf_global_ma;
+bool bpf_global_ma_set;
 
 /* No hurry in this branch
  *
@@ -2934,9 +2934,7 @@ static int __init bpf_global_ma_init(void)
 
        ret = bpf_mem_alloc_init(&bpf_global_ma, 0, false);
        bpf_global_ma_set = !ret;
-       ret = bpf_mem_alloc_init(&bpf_global_percpu_ma, 0, true);
-       bpf_global_percpu_ma_set = !ret;
-       return !bpf_global_ma_set || !bpf_global_percpu_ma_set;
+       return ret;
 }
 late_initcall(bpf_global_ma_init);
 #endif
index a2267d5ed14ededd9b48a9f5a6b6c2a194629724..6da370a047feb90c4866310b57cff4c4d9eafe84 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/poison.h>
 #include <linux/module.h>
 #include <linux/cpumask.h>
+#include <linux/bpf_mem_alloc.h>
 #include <net/xdp.h>
 
 #include "disasm.h"
@@ -41,6 +42,9 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
 #undef BPF_LINK_TYPE
 };
 
+struct bpf_mem_alloc bpf_global_percpu_ma;
+static bool bpf_global_percpu_ma_set;
+
 /* bpf_check() is a static code analyzer that walks eBPF program
  * instruction by instruction and updates register/stack state.
  * All paths of conditional branches are analyzed until 'bpf_exit' insn.
@@ -336,6 +340,7 @@ struct bpf_kfunc_call_arg_meta {
 struct btf *btf_vmlinux;
 
 static DEFINE_MUTEX(bpf_verifier_lock);
+static DEFINE_MUTEX(bpf_percpu_ma_lock);
 
 static const struct bpf_line_info *
 find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
@@ -12091,8 +12096,19 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
                                if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] && !bpf_global_ma_set)
                                        return -ENOMEM;
 
-                               if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl] && !bpf_global_percpu_ma_set)
-                                       return -ENOMEM;
+                               if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
+                                       if (!bpf_global_percpu_ma_set) {
+                                               mutex_lock(&bpf_percpu_ma_lock);
+                                               if (!bpf_global_percpu_ma_set) {
+                                                       err = bpf_mem_alloc_init(&bpf_global_percpu_ma, 0, true);
+                                                       if (!err)
+                                                               bpf_global_percpu_ma_set = true;
+                                               }
+                                               mutex_unlock(&bpf_percpu_ma_lock);
+                                               if (err)
+                                                       return err;
+                                       }
+                               }
 
                                if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) {
                                        verbose(env, "local type ID argument must be in range [0, U32_MAX]\n");