bpf: Allow bpf_map_lookup_elem for SOCKMAP and SOCKHASH
authorJakub Sitnicki <jakub@cloudflare.com>
Wed, 29 Apr 2020 18:11:52 +0000 (20:11 +0200)
committerDaniel Borkmann <daniel@iogearbox.net>
Wed, 29 Apr 2020 21:30:59 +0000 (23:30 +0200)
White-list map lookup for SOCKMAP/SOCKHASH from BPF. Lookup returns a
pointer to a full socket and acquires a reference if necessary.

To support it we need to extend the verifier to know that:

 (1) register storing the lookup result holds a pointer to socket, if
     lookup was done on SOCKMAP/SOCKHASH, and that

 (2) map lookup on SOCKMAP/SOCKHASH is a reference acquiring operation,
     which needs a corresponding reference release with bpf_sk_release.

On sock_map side, lookup handlers exposed via bpf_map_ops now bump
sk_refcnt if socket is reference counted. In turn, bpf_sk_select_reuseport,
the only in-kernel user of SOCKMAP/SOCKHASH ops->map_lookup_elem, was
updated to release the reference.

Sockets fetched from a map can be used in the same way as ones returned by
BPF socket lookup helpers, such as bpf_sk_lookup_tcp. In particular, they
can be used with bpf_sk_assign to direct packets toward a socket on TC
ingress path.

Suggested-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200429181154.479310-2-jakub@cloudflare.com
kernel/bpf/verifier.c
net/core/filter.c
net/core/sock_map.c

index 2b337e32aa9482a2be6b130efac582b53137953f..70ad009577f8128dd80935b5ea62512f3ffcbcb0 100644 (file)
@@ -429,11 +429,30 @@ static bool is_release_function(enum bpf_func_id func_id)
        return func_id == BPF_FUNC_sk_release;
 }
 
-static bool is_acquire_function(enum bpf_func_id func_id)
+static bool may_be_acquire_function(enum bpf_func_id func_id)
 {
        return func_id == BPF_FUNC_sk_lookup_tcp ||
                func_id == BPF_FUNC_sk_lookup_udp ||
-               func_id == BPF_FUNC_skc_lookup_tcp;
+               func_id == BPF_FUNC_skc_lookup_tcp ||
+               func_id == BPF_FUNC_map_lookup_elem;
+}
+
+static bool is_acquire_function(enum bpf_func_id func_id,
+                               const struct bpf_map *map)
+{
+       enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
+
+       if (func_id == BPF_FUNC_sk_lookup_tcp ||
+           func_id == BPF_FUNC_sk_lookup_udp ||
+           func_id == BPF_FUNC_skc_lookup_tcp)
+               return true;
+
+       if (func_id == BPF_FUNC_map_lookup_elem &&
+           (map_type == BPF_MAP_TYPE_SOCKMAP ||
+            map_type == BPF_MAP_TYPE_SOCKHASH))
+               return true;
+
+       return false;
 }
 
 static bool is_ptr_cast_function(enum bpf_func_id func_id)
@@ -3934,7 +3953,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
                    func_id != BPF_FUNC_sock_map_update &&
                    func_id != BPF_FUNC_map_delete_elem &&
                    func_id != BPF_FUNC_msg_redirect_map &&
-                   func_id != BPF_FUNC_sk_select_reuseport)
+                   func_id != BPF_FUNC_sk_select_reuseport &&
+                   func_id != BPF_FUNC_map_lookup_elem)
                        goto error;
                break;
        case BPF_MAP_TYPE_SOCKHASH:
@@ -3942,7 +3962,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
                    func_id != BPF_FUNC_sock_hash_update &&
                    func_id != BPF_FUNC_map_delete_elem &&
                    func_id != BPF_FUNC_msg_redirect_hash &&
-                   func_id != BPF_FUNC_sk_select_reuseport)
+                   func_id != BPF_FUNC_sk_select_reuseport &&
+                   func_id != BPF_FUNC_map_lookup_elem)
                        goto error;
                break;
        case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
@@ -4112,7 +4133,7 @@ static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
        /* A reference acquiring function cannot acquire
         * another refcounted ptr.
         */
-       if (is_acquire_function(func_id) && count)
+       if (may_be_acquire_function(func_id) && count)
                return false;
 
        /* We only support one arg being unreferenced at the moment,
@@ -4623,7 +4644,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
        if (is_ptr_cast_function(func_id)) {
                /* For release_reference() */
                regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
-       } else if (is_acquire_function(func_id)) {
+       } else if (is_acquire_function(func_id, meta.map_ptr)) {
                int id = acquire_reference_state(env, insn_idx);
 
                if (id < 0)
@@ -6532,12 +6553,16 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
                if (is_null) {
                        reg->type = SCALAR_VALUE;
                } else if (reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
-                       if (reg->map_ptr->inner_map_meta) {
+                       const struct bpf_map *map = reg->map_ptr;
+
+                       if (map->inner_map_meta) {
                                reg->type = CONST_PTR_TO_MAP;
-                               reg->map_ptr = reg->map_ptr->inner_map_meta;
-                       } else if (reg->map_ptr->map_type ==
-                                  BPF_MAP_TYPE_XSKMAP) {
+                               reg->map_ptr = map->inner_map_meta;
+                       } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
                                reg->type = PTR_TO_XDP_SOCK;
+                       } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
+                                  map->map_type == BPF_MAP_TYPE_SOCKHASH) {
+                               reg->type = PTR_TO_SOCKET;
                        } else {
                                reg->type = PTR_TO_MAP_VALUE;
                        }
index da3b7a72c37cc26c518feca893b2a8640b6b5b0e..70b32723e6be7552128660064f1b2a57396ced5d 100644 (file)
@@ -8712,6 +8712,10 @@ BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern,
 
        reuse = rcu_dereference(selected_sk->sk_reuseport_cb);
        if (!reuse) {
+               /* Lookup in sock_map can return TCP ESTABLISHED sockets. */
+               if (sk_is_refcounted(selected_sk))
+                       sock_put(selected_sk);
+
                /* reuseport_array has only sk with non NULL sk_reuseport_cb.
                 * The only (!reuse) case here is - the sk has already been
                 * unhashed (e.g. by close()), so treat it as -ENOENT.
index b08dfae10f88ea0cfa00685e89b3c8ac936836ba..00a26cf2cfe923f4ac36c263d52474c44b93b0e7 100644 (file)
@@ -343,7 +343,14 @@ static struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key)
 
 static void *sock_map_lookup(struct bpf_map *map, void *key)
 {
-       return __sock_map_lookup_elem(map, *(u32 *)key);
+       struct sock *sk;
+
+       sk = __sock_map_lookup_elem(map, *(u32 *)key);
+       if (!sk || !sk_fullsock(sk))
+               return NULL;
+       if (sk_is_refcounted(sk) && !refcount_inc_not_zero(&sk->sk_refcnt))
+               return NULL;
+       return sk;
 }
 
 static void *sock_map_lookup_sys(struct bpf_map *map, void *key)
@@ -1051,7 +1058,14 @@ static void *sock_hash_lookup_sys(struct bpf_map *map, void *key)
 
 static void *sock_hash_lookup(struct bpf_map *map, void *key)
 {
-       return __sock_hash_lookup_elem(map, key);
+       struct sock *sk;
+
+       sk = __sock_hash_lookup_elem(map, key);
+       if (!sk || !sk_fullsock(sk))
+               return NULL;
+       if (sk_is_refcounted(sk) && !refcount_inc_not_zero(&sk->sk_refcnt))
+               return NULL;
+       return sk;
 }
 
 static void sock_hash_release_progs(struct bpf_map *map)