ipv4: remove fib_info_devhash[]
authorEric Dumazet <edumazet@google.com>
Fri, 4 Oct 2024 13:47:20 +0000 (13:47 +0000)
committerJakub Kicinski <kuba@kernel.org>
Mon, 7 Oct 2024 23:46:27 +0000 (16:46 -0700)
Upcoming per-netns RTNL conversion needs to get rid
of shared hash tables.

fib_info_devhash[] is one of them.

It is unclear why we used a hash table, because
a single hlist_head per net device was cheaper and scalable.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://patch.msgid.link/20241004134720.579244-5-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Documentation/networking/net_cachelines/net_device.rst
include/linux/netdevice.h
net/ipv4/fib_semantics.c

index 49f03cb78c6e25109af969654c86ebeb19d38e12..556711c4d3cf0979be4d334623bce9989c482d5f 100644 (file)
@@ -83,6 +83,7 @@ unsigned_int                        allmulti
 bool                                uc_promisc                                                      
 unsigned_char                       nested_level                                                    
 struct_in_device*                   ip_ptr                  read_mostly         read_mostly         __in_dev_get
+struct hlist_head                   fib_nh_head
 struct_inet6_dev*                   ip6_ptr                 read_mostly         read_mostly         __in6_dev_get
 struct_vlan_info*                   vlan_info                                                       
 struct_dsa_port*                    dsa_ptr                                                         
index 49a7e7db0883b4224f354bb81be13e4e9a799210..3baf8e539b6f33caaf83961c4cf619b799e5e41d 100644 (file)
@@ -2211,6 +2211,9 @@ struct net_device {
 
        /* Protocol-specific pointers */
        struct in_device __rcu  *ip_ptr;
+       /** @fib_nh_head: nexthops associated with this netdev */
+       struct hlist_head       fib_nh_head;
+
 #if IS_ENABLED(CONFIG_VLAN_8021Q)
        struct vlan_info __rcu  *vlan_info;
 #endif
index ece779bfb8f6bec67eb7751761df9a4f158020a8..d2cee5c314f5e76530ac564f49b433822bb0a272 100644 (file)
@@ -56,10 +56,6 @@ static unsigned int fib_info_hash_size;
 static unsigned int fib_info_hash_bits;
 static unsigned int fib_info_cnt;
 
-#define DEVINDEX_HASHBITS 8
-#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
-static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
-
 /* for_nexthops and change_nexthops only used when nexthop object
  * is not set in a fib_info. The logic within can reference fib_nh.
  */
@@ -319,12 +315,9 @@ static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi)
        return 0;
 }
 
-static struct hlist_head *
-fib_info_devhash_bucket(const struct net_device *dev)
+static struct hlist_head *fib_nh_head(struct net_device *dev)
 {
-       u32 val = net_hash_mix(dev_net(dev)) ^ dev->ifindex;
-
-       return &fib_info_devhash[hash_32(val, DEVINDEX_HASHBITS)];
+       return &dev->fib_nh_head;
 }
 
 static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope,
@@ -435,11 +428,11 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev)
        struct hlist_head *head;
        struct fib_nh *nh;
 
-       head = fib_info_devhash_bucket(dev);
+       head = fib_nh_head(dev);
 
        hlist_for_each_entry_rcu(nh, head, nh_hash) {
-               if (nh->fib_nh_dev == dev &&
-                   nh->fib_nh_gw4 == gw &&
+               DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev);
+               if (nh->fib_nh_gw4 == gw &&
                    !(nh->fib_nh_flags & RTNH_F_DEAD)) {
                        return 0;
                }
@@ -1595,7 +1588,7 @@ link_it:
 
                        if (!nexthop_nh->fib_nh_dev)
                                continue;
-                       head = fib_info_devhash_bucket(nexthop_nh->fib_nh_dev);
+                       head = fib_nh_head(nexthop_nh->fib_nh_dev);
                        hlist_add_head_rcu(&nexthop_nh->nh_hash, head);
                } endfor_nexthops(fi)
        }
@@ -1948,12 +1941,12 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig)
 
 void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
 {
-       struct hlist_head *head = fib_info_devhash_bucket(dev);
+       struct hlist_head *head = fib_nh_head(dev);
        struct fib_nh *nh;
 
        hlist_for_each_entry(nh, head, nh_hash) {
-               if (nh->fib_nh_dev == dev)
-                       fib_nhc_update_mtu(&nh->nh_common, dev->mtu, orig_mtu);
+               DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev);
+               fib_nhc_update_mtu(&nh->nh_common, dev->mtu, orig_mtu);
        }
 }
 
@@ -1967,7 +1960,7 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
  */
 int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
 {
-       struct hlist_head *head = fib_info_devhash_bucket(dev);
+       struct hlist_head *head = fib_nh_head(dev);
        struct fib_info *prev_fi = NULL;
        int scope = RT_SCOPE_NOWHERE;
        struct fib_nh *nh;
@@ -1981,7 +1974,8 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
                int dead;
 
                BUG_ON(!fi->fib_nhs);
-               if (nh->fib_nh_dev != dev || fi == prev_fi)
+               DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev);
+               if (fi == prev_fi)
                        continue;
                prev_fi = fi;
                dead = 0;
@@ -2131,7 +2125,7 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
        }
 
        prev_fi = NULL;
-       head = fib_info_devhash_bucket(dev);
+       head = fib_nh_head(dev);
        ret = 0;
 
        hlist_for_each_entry(nh, head, nh_hash) {
@@ -2139,7 +2133,8 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
                int alive;
 
                BUG_ON(!fi->fib_nhs);
-               if (nh->fib_nh_dev != dev || fi == prev_fi)
+               DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev);
+               if (fi == prev_fi)
                        continue;
 
                prev_fi = fi;