bpf: add bpf_jit_limit knob to restrict unpriv allocations

author Daniel Borkmann <daniel@iogearbox.net>

Mon, 22 Oct 2018 23:11:04 +0000 (01:11 +0200)

committer Alexei Starovoitov <ast@kernel.org>

Fri, 26 Oct 2018 00:11:42 +0000 (17:11 -0700)
author Daniel Borkmann <daniel@iogearbox.net>
Mon, 22 Oct 2018 23:11:04 +0000 (01:11 +0200)
committer Alexei Starovoitov <ast@kernel.org>
Fri, 26 Oct 2018 00:11:42 +0000 (17:11 -0700)
diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt

index 9ecde517728c317ac7428efd616536a0a90f301e..2793d4eac55fd7e12993b3045657fd4156499d06 100644 (file)
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@@ -92,6 +92,14 @@ Values :
         0 - disable JIT kallsyms export (default value)
         1 - enable JIT kallsyms export for privileged users only
  
+bpf_jit_limit
+-------------
+
+This enforces a global limit for memory allocations to the BPF JIT
+compiler in order to reject unprivileged JIT requests once it has
+been surpassed. bpf_jit_limit contains the value of the global limit
+in bytes.
+
  dev_weight
  --------------
  
diff --git a/include/linux/filter.h b/include/linux/filter.h

index 91b4c934f02e7ca0f8aa3c8dfcc6f51233d7d05e..de629b706d1d7dda246819ed37109f098c05785e 100644 (file)
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -854,6 +854,7 @@ bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
  extern int bpf_jit_enable;
  extern int bpf_jit_harden;
  extern int bpf_jit_kallsyms;
+extern int bpf_jit_limit;
  
  typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size);
  
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c

index 7c7eeea8cffcc38124870ff1a8dc350cad393ac2..6377225b208204c1c2d8829a778f50ebaa7d816d 100644 (file)
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -365,10 +365,13 @@ void bpf_prog_kallsyms_del_all(struct bpf_prog *fp)
  }
  
  #ifdef CONFIG_BPF_JIT
+# define BPF_JIT_LIMIT_DEFAULT (PAGE_SIZE * 40000)
+
  /* All BPF JIT sysctl knobs here. */
  int bpf_jit_enable   __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON);
  int bpf_jit_harden   __read_mostly;
  int bpf_jit_kallsyms __read_mostly;
+int bpf_jit_limit    __read_mostly = BPF_JIT_LIMIT_DEFAULT;
  
  static __always_inline void
  bpf_get_prog_addr_region(const struct bpf_prog *prog,
@@ -577,27 +580,64 @@ int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
         return ret;
  }
  
+static atomic_long_t bpf_jit_current;
+
+#if defined(MODULES_VADDR)
+static int __init bpf_jit_charge_init(void)
+{
+       /* Only used as heuristic here to derive limit. */
+       bpf_jit_limit = min_t(u64, round_up((MODULES_END - MODULES_VADDR) >> 2,
+                                           PAGE_SIZE), INT_MAX);
+       return 0;
+}
+pure_initcall(bpf_jit_charge_init);
+#endif
+
+static int bpf_jit_charge_modmem(u32 pages)
+{
+       if (atomic_long_add_return(pages, &bpf_jit_current) >
+           (bpf_jit_limit >> PAGE_SHIFT)) {
+               if (!capable(CAP_SYS_ADMIN)) {
+                       atomic_long_sub(pages, &bpf_jit_current);
+                       return -EPERM;
+               }
+       }
+
+       return 0;
+}
+
+static void bpf_jit_uncharge_modmem(u32 pages)
+{
+       atomic_long_sub(pages, &bpf_jit_current);
+}
+
  struct bpf_binary_header *
  bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
                      unsigned int alignment,
                      bpf_jit_fill_hole_t bpf_fill_ill_insns)
  {
         struct bpf_binary_header *hdr;
-       unsigned int size, hole, start;
+       u32 size, hole, start, pages;
  
         /* Most of BPF filters are really small, but if some of them
          * fill a page, allow at least 128 extra bytes to insert a
          * random section of illegal instructions.
          */
         size = round_up(proglen + sizeof(*hdr) + 128, PAGE_SIZE);
+       pages = size / PAGE_SIZE;
+
+       if (bpf_jit_charge_modmem(pages))
+               return NULL;
         hdr = module_alloc(size);
-       if (hdr == NULL)
+       if (!hdr) {
+               bpf_jit_uncharge_modmem(pages);
                 return NULL;
+       }
  
         /* Fill space with illegal/arch-dep instructions. */
         bpf_fill_ill_insns(hdr, size);
  
-       hdr->pages = size / PAGE_SIZE;
+       hdr->pages = pages;
         hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)),
                      PAGE_SIZE - sizeof(*hdr));
         start = (get_random_int() % hole) & ~(alignment - 1);
@@ -610,7 +650,10 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
  
  void bpf_jit_binary_free(struct bpf_binary_header *hdr)
  {
+       u32 pages = hdr->pages;
+
         module_memfree(hdr);
+       bpf_jit_uncharge_modmem(pages);
  }
  
  /* This symbol is only overridden by archs that have different
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c

index b1a2c5e38530a72478cab1a874cb7f78909a118b..37b4667128a3808395e23b0c53325a5d937c6b54 100644 (file)
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -279,7 +279,6 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write,
         return ret;
  }
  
-# ifdef CONFIG_HAVE_EBPF_JIT
  static int
  proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write,
                                     void __user *buffer, size_t *lenp,
@@ -290,7 +289,6 @@ proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write,
  
         return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
  }
-# endif
  #endif
  
  static struct ctl_table net_core_table[] = {
@@ -397,6 +395,14 @@ static struct ctl_table net_core_table[] = {
                 .extra2         = &one,
         },
  # endif
+       {
+               .procname       = "bpf_jit_limit",
+               .data           = &bpf_jit_limit,
+               .maxlen         = sizeof(int),
+               .mode           = 0600,
+               .proc_handler   = proc_dointvec_minmax_bpf_restricted,
+               .extra1         = &one,
+       },
  #endif
         {
                 .procname       = "netdev_tstamp_prequeue",
author	Daniel Borkmann <daniel@iogearbox.net>
	Mon, 22 Oct 2018 23:11:04 +0000 (01:11 +0200)
committer	Alexei Starovoitov <ast@kernel.org>
	Fri, 26 Oct 2018 00:11:42 +0000 (17:11 -0700)
Documentation/sysctl/net.txt		patch \| blob \| blame \| history
include/linux/filter.h		patch \| blob \| blame \| history
kernel/bpf/core.c		patch \| blob \| blame \| history
net/core/sysctl_net_core.c		patch \| blob \| blame \| history