ipv4: reset rt_iif for recirculated mcast/bcast out pkts
authorStephen Suryaputra <ssuryaextr@gmail.com>
Wed, 26 Jun 2019 06:21:16 +0000 (02:21 -0400)
committerDavid S. Miller <davem@davemloft.net>
Wed, 26 Jun 2019 19:40:10 +0000 (12:40 -0700)
Multicast or broadcast egress packets have rt_iif set to the oif. These
packets might be recirculated back as input and lookup to the raw
sockets may fail because they are bound to the incoming interface
(skb_iif). If rt_iif is not zero, during the lookup, inet_iif() function
returns rt_iif instead of skb_iif. Hence, the lookup fails.

v2: Make it non vrf specific (David Ahern). Reword the changelog to
    reflect it.
Signed-off-by: Stephen Suryaputra <ssuryaextr@gmail.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/net/route.h
net/ipv4/ip_output.c
net/ipv4/route.c

index 065b47754f05fd05efa991fe59bdd449c715cd87..55ff71ffb796acaa3e9e57f73b818dba2305e03d 100644 (file)
@@ -221,6 +221,7 @@ void ip_rt_get_source(u8 *src, struct sk_buff *skb, struct rtable *rt);
 struct rtable *rt_dst_alloc(struct net_device *dev,
                             unsigned int flags, u16 type,
                             bool nopolicy, bool noxfrm, bool will_cache);
+struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt);
 
 struct in_ifaddr;
 void fib_add_ifaddr(struct in_ifaddr *);
index 16f9159234a2014491fdd7f7371d6cb06b978adc..8c2ec35b6512f1486cf2ea01f4a19444c7422642 100644 (file)
@@ -318,6 +318,7 @@ static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *sk
 static int ip_mc_finish_output(struct net *net, struct sock *sk,
                               struct sk_buff *skb)
 {
+       struct rtable *new_rt;
        int ret;
 
        ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
@@ -326,6 +327,17 @@ static int ip_mc_finish_output(struct net *net, struct sock *sk,
                return ret;
        }
 
+       /* Reset rt_iif so that inet_iif() will return skb->skb_iif. Setting
+        * this to non-zero causes ipi_ifindex in in_pktinfo to be overwritten,
+        * see ipv4_pktinfo_prepare().
+        */
+       new_rt = rt_dst_clone(net->loopback_dev, skb_rtable(skb));
+       if (new_rt) {
+               new_rt->rt_iif = 0;
+               skb_dst_drop(skb);
+               skb_dst_set(skb, &new_rt->dst);
+       }
+
        return dev_loopback_xmit(net, sk, skb);
 }
 
index 6cb7cff22db9ca9a08d00fb2ccab732c145b4e9c..8ea0735a67546b745c2ef5ecd7dd0b5d06a22c77 100644 (file)
@@ -1647,6 +1647,39 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
 }
 EXPORT_SYMBOL(rt_dst_alloc);
 
+struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt)
+{
+       struct rtable *new_rt;
+
+       new_rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
+                          rt->dst.flags);
+
+       if (new_rt) {
+               new_rt->rt_genid = rt_genid_ipv4(dev_net(dev));
+               new_rt->rt_flags = rt->rt_flags;
+               new_rt->rt_type = rt->rt_type;
+               new_rt->rt_is_input = rt->rt_is_input;
+               new_rt->rt_iif = rt->rt_iif;
+               new_rt->rt_pmtu = rt->rt_pmtu;
+               new_rt->rt_mtu_locked = rt->rt_mtu_locked;
+               new_rt->rt_gw_family = rt->rt_gw_family;
+               if (rt->rt_gw_family == AF_INET)
+                       new_rt->rt_gw4 = rt->rt_gw4;
+               else if (rt->rt_gw_family == AF_INET6)
+                       new_rt->rt_gw6 = rt->rt_gw6;
+               INIT_LIST_HEAD(&new_rt->rt_uncached);
+
+               new_rt->dst.flags |= DST_HOST;
+               new_rt->dst.input = rt->dst.input;
+               new_rt->dst.output = rt->dst.output;
+               new_rt->dst.error = rt->dst.error;
+               new_rt->dst.lastuse = jiffies;
+               new_rt->dst.lwtstate = lwtstate_get(rt->dst.lwtstate);
+       }
+       return new_rt;
+}
+EXPORT_SYMBOL(rt_dst_clone);
+
 /* called in rcu_read_lock() section */
 int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
                          u8 tos, struct net_device *dev,