ipv6: lockless IPV6_MTU_DISCOVER implementation
authorEric Dumazet <edumazet@google.com>
Tue, 12 Sep 2023 16:02:11 +0000 (16:02 +0000)
committerDavid S. Miller <davem@davemloft.net>
Fri, 15 Sep 2023 09:33:48 +0000 (10:33 +0100)
Most np->pmtudisc reads are racy.

Move this 3bit field on a full byte, add annotations
and make IPV6_MTU_DISCOVER setsockopt() lockless.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/linux/ipv6.h
include/net/ip6_route.h
net/ipv6/ip6_output.c
net/ipv6/ipv6_sockglue.c
net/ipv6/raw.c
net/ipv6/udp.c
net/netfilter/ipvs/ip_vs_sync.c

index f288a35f157f73ded445639c30f3365047fd9ddc..10f521a6a9c8a881b4677d53597929622ae95b67 100644 (file)
@@ -243,13 +243,12 @@ struct ipv6_pinfo {
        } rxopt;
 
        /* sockopt flags */
-       __u16                   sndflow:1,
-                               pmtudisc:3,
-                               padding:1,      /* 1 bit hole */
+       __u8                    sndflow:1,
                                srcprefs:3;     /* 001: prefer temporary address
                                                 * 010: prefer public address
                                                 * 100: prefer care-of address
                                                 */
+       __u8                    pmtudisc;
        __u8                    min_hopcount;
        __u8                    tclass;
        __be32                  rcv_flowinfo;
index b32539bb0fb05c67b5849bb219be59fabe5bb51c..b1ea49900b4ae17cb3436f884e26f5ae3a7a761c 100644 (file)
@@ -266,7 +266,7 @@ static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb)
        const struct dst_entry *dst = skb_dst(skb);
        unsigned int mtu;
 
-       if (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) {
+       if (np && READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE) {
                mtu = READ_ONCE(dst->dev->mtu);
                mtu -= lwtunnel_headroom(dst->lwtstate, mtu);
        } else {
@@ -277,14 +277,18 @@ static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb)
 
 static inline bool ip6_sk_accept_pmtu(const struct sock *sk)
 {
-       return inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_INTERFACE &&
-              inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_OMIT;
+       u8 pmtudisc = READ_ONCE(inet6_sk(sk)->pmtudisc);
+
+       return pmtudisc != IPV6_PMTUDISC_INTERFACE &&
+              pmtudisc != IPV6_PMTUDISC_OMIT;
 }
 
 static inline bool ip6_sk_ignore_df(const struct sock *sk)
 {
-       return inet6_sk(sk)->pmtudisc < IPV6_PMTUDISC_DO ||
-              inet6_sk(sk)->pmtudisc == IPV6_PMTUDISC_OMIT;
+       u8 pmtudisc = READ_ONCE(inet6_sk(sk)->pmtudisc);
+
+       return pmtudisc < IPV6_PMTUDISC_DO ||
+              pmtudisc == IPV6_PMTUDISC_OMIT;
 }
 
 static inline const struct in6_addr *rt6_nexthop(const struct rt6_info *rt,
index f87d8491d7e273f167b7b144a7e134783e1b80f6..7e5d9eeb990fd4549be753fdaaf1e6c6c21d3f8d 100644 (file)
@@ -1436,10 +1436,10 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
        v6_cork->hop_limit = ipc6->hlimit;
        v6_cork->tclass = ipc6->tclass;
        if (rt->dst.flags & DST_XFRM_TUNNEL)
-               mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
+               mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ?
                      READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
        else
-               mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
+               mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ?
                        READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
 
        frag_size = READ_ONCE(np->frag_size);
index c22a492e05360b68ef6868707e363f2ce84a4c35..85ea42644dcbbe3ed8f625e51ffc6d55ada40156 100644 (file)
@@ -493,6 +493,13 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
                        return -EINVAL;
                inet6_assign_bit(RTALERT_ISOLATE, sk, valbool);
                return 0;
+       case IPV6_MTU_DISCOVER:
+               if (optlen < sizeof(int))
+                       return -EINVAL;
+               if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT)
+                       return -EINVAL;
+               WRITE_ONCE(np->pmtudisc, val);
+               return 0;
        }
        if (needs_rtnl)
                rtnl_lock();
@@ -941,14 +948,6 @@ done:
                        goto e_inval;
                retv = ip6_ra_control(sk, val);
                break;
-       case IPV6_MTU_DISCOVER:
-               if (optlen < sizeof(int))
-                       goto e_inval;
-               if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT)
-                       goto e_inval;
-               np->pmtudisc = val;
-               retv = 0;
-               break;
        case IPV6_FLOWINFO_SEND:
                if (optlen < sizeof(int))
                        goto e_inval;
@@ -1374,7 +1373,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
                break;
 
        case IPV6_MTU_DISCOVER:
-               val = np->pmtudisc;
+               val = READ_ONCE(np->pmtudisc);
                break;
 
        case IPV6_RECVERR:
index 71f6bdccfa1f39290e1b573ff8c647d91fd007a4..47372cceb98f6e606346b74230b03e76e303822c 100644 (file)
@@ -307,7 +307,7 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
        harderr = icmpv6_err_convert(type, code, &err);
        if (type == ICMPV6_PKT_TOOBIG) {
                ip6_sk_update_pmtu(skb, sk, info);
-               harderr = (np->pmtudisc == IPV6_PMTUDISC_DO);
+               harderr = (READ_ONCE(np->pmtudisc) == IPV6_PMTUDISC_DO);
        }
        if (type == NDISC_REDIRECT) {
                ip6_sk_redirect(skb, sk);
index 90e873689b885c13dd4eba4e1b6edd3df6b1daf2..c17e19fece1b8b899e566ca8e060f8e472911bb0 100644 (file)
@@ -598,7 +598,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
                if (!ip6_sk_accept_pmtu(sk))
                        goto out;
                ip6_sk_update_pmtu(skb, sk, info);
-               if (np->pmtudisc != IPV6_PMTUDISC_DONT)
+               if (READ_ONCE(np->pmtudisc) != IPV6_PMTUDISC_DONT)
                        harderr = 1;
        }
        if (type == NDISC_REDIRECT) {
index df1b33b61059eef1e86baefc63e138108a50a081..5820a8156c4701bb163f569d735c389d7a8e3820 100644 (file)
@@ -1341,7 +1341,7 @@ static void set_mcast_pmtudisc(struct sock *sk, int val)
                struct ipv6_pinfo *np = inet6_sk(sk);
 
                /* IPV6_MTU_DISCOVER */
-               np->pmtudisc = val;
+               WRITE_ONCE(np->pmtudisc, val);
        }
 #endif
        release_sock(sk);