bpf, cgroup: implement eBPF-based device controller for cgroup v2
authorRoman Gushchin <guro@fb.com>
Sun, 5 Nov 2017 13:15:32 +0000 (08:15 -0500)
committerDavid S. Miller <davem@davemloft.net>
Sun, 5 Nov 2017 14:26:51 +0000 (23:26 +0900)
Cgroup v2 lacks the device controller, provided by cgroup v1.
This patch adds a new eBPF program type, which in combination
of previously added ability to attach multiple eBPF programs
to a cgroup, will provide a similar functionality, but with some
additional flexibility.

This patch introduces a BPF_PROG_TYPE_CGROUP_DEVICE program type.
A program takes major and minor device numbers, device type
(block/character) and access type (mknod/read/write) as parameters
and returns an integer which defines if the operation should be
allowed or terminated with -EPERM.

Signed-off-by: Roman Gushchin <guro@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/linux/bpf-cgroup.h
include/linux/bpf_types.h
include/linux/device_cgroup.h
include/uapi/linux/bpf.h
kernel/bpf/cgroup.c
kernel/bpf/syscall.c
kernel/bpf/verifier.c
tools/include/uapi/linux/bpf.h

index 87a7db9feb382cafebcb34a19af377a0056276f7..a7f16e0f8d68b94bc5743a75c41b538794725724 100644 (file)
@@ -67,6 +67,9 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
                                     struct bpf_sock_ops_kern *sock_ops,
                                     enum bpf_attach_type type);
 
+int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
+                                     short access, enum bpf_attach_type type);
+
 /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb)                            \
 ({                                                                           \
@@ -112,6 +115,17 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
        }                                                                      \
        __ret;                                                                 \
 })
+
+#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access)        \
+({                                                                           \
+       int __ret = 0;                                                        \
+       if (cgroup_bpf_enabled)                                               \
+               __ret = __cgroup_bpf_check_dev_permission(type, major, minor, \
+                                                         access,             \
+                                                         BPF_CGROUP_DEVICE); \
+                                                                             \
+       __ret;                                                                \
+})
 #else
 
 struct cgroup_bpf {};
@@ -122,6 +136,7 @@ static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
 #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
 
 #endif /* CONFIG_CGROUP_BPF */
 
index 53c5b9ad72204cb1661807a29f4ec3da9396a67e..978c1d9c9383a55d11c960ea2ce3e331756ef336 100644 (file)
@@ -19,6 +19,9 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe)
 BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint)
 BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event)
 #endif
+#ifdef CONFIG_CGROUP_BPF
+BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
+#endif
 
 BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
index 2d93d7ecd4796aa536c3946cf9fd32c6dfa4816f..8557efe096dc96018525b8ba62790a104a21b888 100644 (file)
@@ -1,5 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #include <linux/fs.h>
+#include <linux/bpf-cgroup.h>
 
 #define DEVCG_ACC_MKNOD 1
 #define DEVCG_ACC_READ  2
@@ -19,10 +20,15 @@ static inline int __devcgroup_check_permission(short type, u32 major, u32 minor,
 { return 0; }
 #endif
 
-#ifdef CONFIG_CGROUP_DEVICE
+#if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF)
 static inline int devcgroup_check_permission(short type, u32 major, u32 minor,
                                             short access)
 {
+       int rc = BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access);
+
+       if (rc)
+               return -EPERM;
+
        return __devcgroup_check_permission(type, major, minor, access);
 }
 
index 4455dd1952016a86d5465b520b669a587ef7c22a..e880ae6434eed9eb29db99169c716c94c7cf30aa 100644 (file)
@@ -132,6 +132,7 @@ enum bpf_prog_type {
        BPF_PROG_TYPE_LWT_XMIT,
        BPF_PROG_TYPE_SOCK_OPS,
        BPF_PROG_TYPE_SK_SKB,
+       BPF_PROG_TYPE_CGROUP_DEVICE,
 };
 
 enum bpf_attach_type {
@@ -141,6 +142,7 @@ enum bpf_attach_type {
        BPF_CGROUP_SOCK_OPS,
        BPF_SK_SKB_STREAM_PARSER,
        BPF_SK_SKB_STREAM_VERDICT,
+       BPF_CGROUP_DEVICE,
        __MAX_BPF_ATTACH_TYPE
 };
 
@@ -991,4 +993,17 @@ struct bpf_perf_event_value {
        __u64 running;
 };
 
+#define BPF_DEVCG_ACC_MKNOD    (1ULL << 0)
+#define BPF_DEVCG_ACC_READ     (1ULL << 1)
+#define BPF_DEVCG_ACC_WRITE    (1ULL << 2)
+
+#define BPF_DEVCG_DEV_BLOCK    (1ULL << 0)
+#define BPF_DEVCG_DEV_CHAR     (1ULL << 1)
+
+struct bpf_cgroup_dev_ctx {
+       __u32 access_type; /* (access << 16) | type */
+       __u32 major;
+       __u32 minor;
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
index 3db5a17fcfe8dc76a22c2f23576cc118f8c8ff83..b789ab78d28f0d85d4d2338a49bc3c41f5eeae38 100644 (file)
@@ -522,3 +522,70 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
        return ret == 1 ? 0 : -EPERM;
 }
 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
+
+int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
+                                     short access, enum bpf_attach_type type)
+{
+       struct cgroup *cgrp;
+       struct bpf_cgroup_dev_ctx ctx = {
+               .access_type = (access << 16) | dev_type,
+               .major = major,
+               .minor = minor,
+       };
+       int allow = 1;
+
+       rcu_read_lock();
+       cgrp = task_dfl_cgroup(current);
+       allow = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx,
+                                  BPF_PROG_RUN);
+       rcu_read_unlock();
+
+       return !allow;
+}
+EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission);
+
+static const struct bpf_func_proto *
+cgroup_dev_func_proto(enum bpf_func_id func_id)
+{
+       switch (func_id) {
+       case BPF_FUNC_map_lookup_elem:
+               return &bpf_map_lookup_elem_proto;
+       case BPF_FUNC_map_update_elem:
+               return &bpf_map_update_elem_proto;
+       case BPF_FUNC_map_delete_elem:
+               return &bpf_map_delete_elem_proto;
+       case BPF_FUNC_get_current_uid_gid:
+               return &bpf_get_current_uid_gid_proto;
+       case BPF_FUNC_trace_printk:
+               if (capable(CAP_SYS_ADMIN))
+                       return bpf_get_trace_printk_proto();
+       default:
+               return NULL;
+       }
+}
+
+static bool cgroup_dev_is_valid_access(int off, int size,
+                                      enum bpf_access_type type,
+                                      struct bpf_insn_access_aux *info)
+{
+       if (type == BPF_WRITE)
+               return false;
+
+       if (off < 0 || off + size > sizeof(struct bpf_cgroup_dev_ctx))
+               return false;
+       /* The verifier guarantees that size > 0. */
+       if (off % size != 0)
+               return false;
+       if (size != sizeof(__u32))
+               return false;
+
+       return true;
+}
+
+const struct bpf_prog_ops cg_dev_prog_ops = {
+};
+
+const struct bpf_verifier_ops cg_dev_verifier_ops = {
+       .get_func_proto         = cgroup_dev_func_proto,
+       .is_valid_access        = cgroup_dev_is_valid_access,
+};
index 416d70cdfc7618a7cf05e075ffe7c283ef9f2b29..09badc37e86467bdef5923c52b4448b9eccc1d18 100644 (file)
@@ -1326,6 +1326,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
        case BPF_CGROUP_SOCK_OPS:
                ptype = BPF_PROG_TYPE_SOCK_OPS;
                break;
+       case BPF_CGROUP_DEVICE:
+               ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
+               break;
        case BPF_SK_SKB_STREAM_PARSER:
        case BPF_SK_SKB_STREAM_VERDICT:
                return sockmap_get_from_fd(attr, true);
@@ -1378,6 +1381,9 @@ static int bpf_prog_detach(const union bpf_attr *attr)
        case BPF_CGROUP_SOCK_OPS:
                ptype = BPF_PROG_TYPE_SOCK_OPS;
                break;
+       case BPF_CGROUP_DEVICE:
+               ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
+               break;
        case BPF_SK_SKB_STREAM_PARSER:
        case BPF_SK_SKB_STREAM_VERDICT:
                return sockmap_get_from_fd(attr, false);
@@ -1420,6 +1426,7 @@ static int bpf_prog_query(const union bpf_attr *attr,
        case BPF_CGROUP_INET_EGRESS:
        case BPF_CGROUP_INET_SOCK_CREATE:
        case BPF_CGROUP_SOCK_OPS:
+       case BPF_CGROUP_DEVICE:
                break;
        default:
                return -EINVAL;
index add845fe788a8a481b6823c2191a693adfbad887..4a942e2e753d71904d00b317011b5f6c4c14ca24 100644 (file)
@@ -3124,6 +3124,7 @@ static int check_return_code(struct bpf_verifier_env *env)
        case BPF_PROG_TYPE_CGROUP_SKB:
        case BPF_PROG_TYPE_CGROUP_SOCK:
        case BPF_PROG_TYPE_SOCK_OPS:
+       case BPF_PROG_TYPE_CGROUP_DEVICE:
                break;
        default:
                return 0;
index e92f62cf933abde6f1311d9e80cba9cacfc2de58..b280f37cd0574f0815c58ecfd719460c06cdec6c 100644 (file)
@@ -131,6 +131,7 @@ enum bpf_prog_type {
        BPF_PROG_TYPE_LWT_XMIT,
        BPF_PROG_TYPE_SOCK_OPS,
        BPF_PROG_TYPE_SK_SKB,
+       BPF_PROG_TYPE_CGROUP_DEVICE,
 };
 
 enum bpf_attach_type {
@@ -140,6 +141,7 @@ enum bpf_attach_type {
        BPF_CGROUP_SOCK_OPS,
        BPF_SK_SKB_STREAM_PARSER,
        BPF_SK_SKB_STREAM_VERDICT,
+       BPF_CGROUP_DEVICE,
        __MAX_BPF_ATTACH_TYPE
 };
 
@@ -990,4 +992,17 @@ struct bpf_perf_event_value {
        __u64 running;
 };
 
+#define BPF_DEVCG_ACC_MKNOD    (1ULL << 0)
+#define BPF_DEVCG_ACC_READ     (1ULL << 1)
+#define BPF_DEVCG_ACC_WRITE    (1ULL << 2)
+
+#define BPF_DEVCG_DEV_BLOCK    (1ULL << 0)
+#define BPF_DEVCG_DEV_CHAR     (1ULL << 1)
+
+struct bpf_cgroup_dev_ctx {
+       __u32 access_type; /* (access << 16) | type */
+       __u32 major;
+       __u32 minor;
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */