tcp: use RCU in __inet{6}_check_established()
authorEric Dumazet <edumazet@google.com>
Sun, 2 Mar 2025 12:42:34 +0000 (12:42 +0000)
committerJakub Kicinski <kuba@kernel.org>
Wed, 5 Mar 2025 01:46:26 +0000 (17:46 -0800)
When __inet_hash_connect() has to try many 4-tuples before
finding an available one, we see a high spinlock cost from
__inet_check_established() and/or __inet6_check_established().

This patch adds an RCU lookup to avoid the spinlock
acquisition when the 4-tuple is found in the hash table.

Note that there are still spin_lock_bh() calls in
__inet_hash_connect() to protect inet_bind_hashbucket,
this will be fixed later in this series.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Jason Xing <kerneljasonxing@gmail.com>
Tested-by: Jason Xing <kerneljasonxing@gmail.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://patch.msgid.link/20250302124237.3913746-2-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
net/ipv4/inet_hashtables.c
net/ipv6/inet6_hashtables.c

index 9bfcfd016e18275fb50fea8d77adc8a64fb12494..46d39aa2199ec3a405b50e8e85130e990d2c26b7 100644 (file)
@@ -551,11 +551,24 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
        unsigned int hash = inet_ehashfn(net, daddr, lport,
                                         saddr, inet->inet_dport);
        struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
-       spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
-       struct sock *sk2;
-       const struct hlist_nulls_node *node;
        struct inet_timewait_sock *tw = NULL;
+       const struct hlist_nulls_node *node;
+       struct sock *sk2;
+       spinlock_t *lock;
+
+       rcu_read_lock();
+       sk_nulls_for_each(sk2, node, &head->chain) {
+               if (sk2->sk_hash != hash ||
+                   !inet_match(net, sk2, acookie, ports, dif, sdif))
+                       continue;
+               if (sk2->sk_state == TCP_TIME_WAIT)
+                       break;
+               rcu_read_unlock();
+               return -EADDRNOTAVAIL;
+       }
+       rcu_read_unlock();
 
+       lock = inet_ehash_lockp(hinfo, hash);
        spin_lock(lock);
 
        sk_nulls_for_each(sk2, node, &head->chain) {
index 9ec05e354baa69d14e88da37f5a9fce11e874e35..3604a5cae5d29a25d24f9513308334ff8e64b083 100644 (file)
@@ -276,11 +276,24 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
        const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr,
                                                inet->inet_dport);
        struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
-       spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
-       struct sock *sk2;
-       const struct hlist_nulls_node *node;
        struct inet_timewait_sock *tw = NULL;
+       const struct hlist_nulls_node *node;
+       struct sock *sk2;
+       spinlock_t *lock;
+
+       rcu_read_lock();
+       sk_nulls_for_each(sk2, node, &head->chain) {
+               if (sk2->sk_hash != hash ||
+                   !inet6_match(net, sk2, saddr, daddr, ports, dif, sdif))
+                       continue;
+               if (sk2->sk_state == TCP_TIME_WAIT)
+                       break;
+               rcu_read_unlock();
+               return -EADDRNOTAVAIL;
+       }
+       rcu_read_unlock();
 
+       lock = inet_ehash_lockp(hinfo, hash);
        spin_lock(lock);
 
        sk_nulls_for_each(sk2, node, &head->chain) {