udp: correct reuseport selection with connected sockets
authorWillem de Bruijn <willemb@google.com>
Fri, 13 Sep 2019 01:16:39 +0000 (21:16 -0400)
committerDavid S. Miller <davem@davemloft.net>
Mon, 16 Sep 2019 07:02:18 +0000 (09:02 +0200)
UDP reuseport groups can hold a mix unconnected and connected sockets.
Ensure that connections only receive all traffic to their 4-tuple.

Fast reuseport returns on the first reuseport match on the assumption
that all matches are equal. Only if connections are present, return to
the previous behavior of scoring all sockets.

Record if connections are present and if so (1) treat such connected
sockets as an independent match from the group, (2) only return
2-tuple matches from reuseport and (3) do not return on the first
2-tuple reuseport match to allow for a higher scoring match later.

New field has_conns is set without locks. No other fields in the
bitmap are modified at runtime and the field is only ever set
unconditionally, so an RMW cannot miss a change.

Fixes: e32ea7e74727 ("soreuseport: fast reuseport UDP socket selection")
Link: http://lkml.kernel.org/r/CA+FuTSfRP09aJNYRt04SS6qj22ViiOEWaWmLAwX0psk8-PGNxw@mail.gmail.com
Signed-off-by: Willem de Bruijn <willemb@google.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Craig Gallek <kraig@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/net/sock_reuseport.h
net/core/sock_reuseport.c
net/ipv4/datagram.c
net/ipv4/udp.c
net/ipv6/datagram.c
net/ipv6/udp.c

index d9112de852614ebaa07e4e584e2782c2706458c4..43f4a818d88f19a78a9052c25f5972f1ff44e240 100644 (file)
@@ -21,7 +21,8 @@ struct sock_reuseport {
        unsigned int            synq_overflow_ts;
        /* ID stays the same even after the size of socks[] grows. */
        unsigned int            reuseport_id;
-       bool                    bind_inany;
+       unsigned int            bind_inany:1;
+       unsigned int            has_conns:1;
        struct bpf_prog __rcu   *prog;          /* optional BPF sock selector */
        struct sock             *socks[0];      /* array of sock pointers */
 };
@@ -37,6 +38,23 @@ extern struct sock *reuseport_select_sock(struct sock *sk,
 extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog);
 extern int reuseport_detach_prog(struct sock *sk);
 
+static inline bool reuseport_has_conns(struct sock *sk, bool set)
+{
+       struct sock_reuseport *reuse;
+       bool ret = false;
+
+       rcu_read_lock();
+       reuse = rcu_dereference(sk->sk_reuseport_cb);
+       if (reuse) {
+               if (set)
+                       reuse->has_conns = 1;
+               ret = reuse->has_conns;
+       }
+       rcu_read_unlock();
+
+       return ret;
+}
+
 int reuseport_get_id(struct sock_reuseport *reuse);
 
 #endif  /* _SOCK_REUSEPORT_H */
index 9408f9264d052298a4eb85e0fb232c89af533e5c..f3ceec93f3923d864bde1aba9c26d9b6bfe1f97e 100644 (file)
@@ -295,8 +295,19 @@ struct sock *reuseport_select_sock(struct sock *sk,
 
 select_by_hash:
                /* no bpf or invalid bpf result: fall back to hash usage */
-               if (!sk2)
-                       sk2 = reuse->socks[reciprocal_scale(hash, socks)];
+               if (!sk2) {
+                       int i, j;
+
+                       i = j = reciprocal_scale(hash, socks);
+                       while (reuse->socks[i]->sk_state == TCP_ESTABLISHED) {
+                               i++;
+                               if (i >= reuse->num_socks)
+                                       i = 0;
+                               if (i == j)
+                                       goto out;
+                       }
+                       sk2 = reuse->socks[i];
+               }
        }
 
 out:
index 7bd29e694603a8a19d7c114c2acae847e3fdd170..9a0fe0c2fa02c9707e6fc8c02529a48e84f7d680 100644 (file)
@@ -15,6 +15,7 @@
 #include <net/sock.h>
 #include <net/route.h>
 #include <net/tcp_states.h>
+#include <net/sock_reuseport.h>
 
 int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 {
@@ -69,6 +70,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
        }
        inet->inet_daddr = fl4->daddr;
        inet->inet_dport = usin->sin_port;
+       reuseport_has_conns(sk, true);
        sk->sk_state = TCP_ESTABLISHED;
        sk_set_txhash(sk);
        inet->inet_id = jiffies;
index d88821c794fbe3b2765bb8945fe5afb21c92ead5..16486c8b708b40f04863a443bde6c7c52e947d11 100644 (file)
@@ -423,12 +423,13 @@ static struct sock *udp4_lib_lookup2(struct net *net,
                score = compute_score(sk, net, saddr, sport,
                                      daddr, hnum, dif, sdif);
                if (score > badness) {
-                       if (sk->sk_reuseport) {
+                       if (sk->sk_reuseport &&
+                           sk->sk_state != TCP_ESTABLISHED) {
                                hash = udp_ehashfn(net, daddr, hnum,
                                                   saddr, sport);
                                result = reuseport_select_sock(sk, hash, skb,
                                                        sizeof(struct udphdr));
-                               if (result)
+                               if (result && !reuseport_has_conns(sk, false))
                                        return result;
                        }
                        badness = score;
index 9ab897ded4df52d882cda1414ef0159f3eb1765a..96f939248d2ff5e512c58cfdcb063d7b42af8766 100644 (file)
@@ -27,6 +27,7 @@
 #include <net/ip6_route.h>
 #include <net/tcp_states.h>
 #include <net/dsfield.h>
+#include <net/sock_reuseport.h>
 
 #include <linux/errqueue.h>
 #include <linux/uaccess.h>
@@ -254,6 +255,7 @@ ipv4_connected:
                goto out;
        }
 
+       reuseport_has_conns(sk, true);
        sk->sk_state = TCP_ESTABLISHED;
        sk_set_txhash(sk);
 out:
index 827fe738507887875f92c7dcc674f54e02e2b939..5995fdc99d3f3ef7b4f9cf59914976d248f28349 100644 (file)
@@ -158,13 +158,14 @@ static struct sock *udp6_lib_lookup2(struct net *net,
                score = compute_score(sk, net, saddr, sport,
                                      daddr, hnum, dif, sdif);
                if (score > badness) {
-                       if (sk->sk_reuseport) {
+                       if (sk->sk_reuseport &&
+                           sk->sk_state != TCP_ESTABLISHED) {
                                hash = udp6_ehashfn(net, daddr, hnum,
                                                    saddr, sport);
 
                                result = reuseport_select_sock(sk, hash, skb,
                                                        sizeof(struct udphdr));
-                               if (result)
+                               if (result && !reuseport_has_conns(sk, false))
                                        return result;
                        }
                        result = sk;