tcp: replace TCP_SKB_CB(skb)->tcp_tw_isn with a per-cpu field
authorEric Dumazet <edumazet@google.com>
Sun, 7 Apr 2024 09:33:22 +0000 (09:33 +0000)
committerPaolo Abeni <pabeni@redhat.com>
Tue, 9 Apr 2024 09:47:40 +0000 (11:47 +0200)
TCP can transform a TIMEWAIT socket into a SYN_RECV one from
a SYN packet, and the ISN of the SYNACK packet is normally
generated using TIMEWAIT tw_snd_nxt :

tcp_timewait_state_process()
...
    u32 isn = tcptw->tw_snd_nxt + 65535 + 2;
    if (isn == 0)
        isn++;
    TCP_SKB_CB(skb)->tcp_tw_isn = isn;
    return TCP_TW_SYN;

This SYN packet also bypasses normal checks against listen queue
being full or not.

tcp_conn_request()
...
       __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
...
        /* TW buckets are converted to open requests without
         * limitations, they conserve resources and peer is
         * evidently real one.
         */
        if ((syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) && !isn) {
                want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name);
                if (!want_cookie)
                        goto drop;
        }

This was using TCP_SKB_CB(skb)->tcp_tw_isn field in skb.

Unfortunately this field has been accidentally cleared
after the call to tcp_timewait_state_process() returning
TCP_TW_SYN.

Using a field in TCP_SKB_CB(skb) for a temporary state
is overkill.

Switch instead to a per-cpu variable.

As a bonus, we do not have to clear tcp_tw_isn in TCP receive
fast path.
It is temporarily set then cleared only in the TCP_TW_SYN dance.

Fixes: 4ad19de8774e ("net: tcp6: fix double call of tcp_v6_fill_cb()")
Fixes: eeea10b83a13 ("tcp: add tcp_v4_fill_cb()/tcp_v4_restore_cb()")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
include/net/tcp.h
net/ipv4/tcp.c
net/ipv4/tcp_input.c
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_minisocks.c
net/ipv6/tcp_ipv6.c

index fa0ab77acee23654b22e97615de983fc04eee319..ba6c5ae86e228a1633feaf39b0f1e053c3832b08 100644 (file)
@@ -52,6 +52,8 @@ extern struct inet_hashinfo tcp_hashinfo;
 DECLARE_PER_CPU(unsigned int, tcp_orphan_count);
 int tcp_orphan_count_sum(void);
 
+DECLARE_PER_CPU(u32, tcp_tw_isn);
+
 void tcp_time_wait(struct sock *sk, int state, int timeo);
 
 #define MAX_TCP_HEADER L1_CACHE_ALIGN(128 + MAX_HEADER)
@@ -392,7 +394,8 @@ enum tcp_tw_status {
 
 enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
                                              struct sk_buff *skb,
-                                             const struct tcphdr *th);
+                                             const struct tcphdr *th,
+                                             u32 *tw_isn);
 struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
                           struct request_sock *req, bool fastopen,
                           bool *lost_race);
@@ -935,13 +938,10 @@ struct tcp_skb_cb {
        __u32           seq;            /* Starting sequence number     */
        __u32           end_seq;        /* SEQ + FIN + SYN + datalen    */
        union {
-               /* Note : tcp_tw_isn is used in input path only
-                *        (isn chosen by tcp_timewait_state_process())
-                *
+               /* Note :
                 *        tcp_gso_segs/size are used in write queue only,
                 *        cf tcp_skb_pcount()/tcp_skb_mss()
                 */
-               __u32           tcp_tw_isn;
                struct {
                        u16     tcp_gso_segs;
                        u16     tcp_gso_size;
index 664c8ecb076bc2ae177d56e10f0d60b38d07ff8e..b07aa71b24ec147d2f1f148e288231b3a492fb5a 100644 (file)
@@ -290,6 +290,9 @@ enum {
 DEFINE_PER_CPU(unsigned int, tcp_orphan_count);
 EXPORT_PER_CPU_SYMBOL_GPL(tcp_orphan_count);
 
+DEFINE_PER_CPU(u32, tcp_tw_isn);
+EXPORT_PER_CPU_SYMBOL_GPL(tcp_tw_isn);
+
 long sysctl_tcp_mem[3] __read_mostly;
 EXPORT_SYMBOL(sysctl_tcp_mem);
 
index 48c275e6ef02bfc5dd98f0878c752841f949c714..5a45a0923a1f058cdc80255be0f76a71fd102d4d 100644 (file)
@@ -7097,7 +7097,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
                     struct sock *sk, struct sk_buff *skb)
 {
        struct tcp_fastopen_cookie foc = { .len = -1 };
-       __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
        struct tcp_options_received tmp_opt;
        struct tcp_sock *tp = tcp_sk(sk);
        struct net *net = sock_net(sk);
@@ -7107,21 +7106,28 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
        struct dst_entry *dst;
        struct flowi fl;
        u8 syncookies;
+       u32 isn;
 
 #ifdef CONFIG_TCP_AO
        const struct tcp_ao_hdr *aoh;
 #endif
 
-       syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
+       isn = __this_cpu_read(tcp_tw_isn);
+       if (isn) {
+               /* TW buckets are converted to open requests without
+                * limitations, they conserve resources and peer is
+                * evidently real one.
+                */
+               __this_cpu_write(tcp_tw_isn, 0);
+       } else {
+               syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
 
-       /* TW buckets are converted to open requests without
-        * limitations, they conserve resources and peer is
-        * evidently real one.
-        */
-       if ((syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) && !isn) {
-               want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name);
-               if (!want_cookie)
-                       goto drop;
+               if (syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) {
+                       want_cookie = tcp_syn_flood_action(sk,
+                                                          rsk_ops->slab_name);
+                       if (!want_cookie)
+                               goto drop;
+               }
        }
 
        if (sk_acceptq_is_full(sk)) {
index 81e2f05c244d1671980a34bb756f528f3e6debcc..1e650ec71d2fe5198b9dad9e6ea9c5eaf868277f 100644 (file)
@@ -2146,7 +2146,6 @@ static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
                                    skb->len - th->doff * 4);
        TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
        TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
-       TCP_SKB_CB(skb)->tcp_tw_isn = 0;
        TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
        TCP_SKB_CB(skb)->sacked  = 0;
        TCP_SKB_CB(skb)->has_rxtstamp =
@@ -2168,6 +2167,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
        bool refcounted;
        struct sock *sk;
        int ret;
+       u32 isn;
 
        drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
        if (skb->pkt_type != PACKET_HOST)
@@ -2383,7 +2383,7 @@ do_time_wait:
                inet_twsk_put(inet_twsk(sk));
                goto csum_error;
        }
-       switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
+       switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn)) {
        case TCP_TW_SYN: {
                struct sock *sk2 = inet_lookup_listener(net,
                                                        net->ipv4.tcp_death_row.hashinfo,
@@ -2397,6 +2397,7 @@ do_time_wait:
                        sk = sk2;
                        tcp_v4_restore_cb(skb);
                        refcounted = false;
+                       __this_cpu_write(tcp_tw_isn, isn);
                        goto process;
                }
        }
index 5b21a07ddf9aa5593d21cb856f0e0ea2f45b1eef..f53c7ada2ace4219917e75f806f39a00d5ab0123 100644 (file)
@@ -95,7 +95,7 @@ static void twsk_rcv_nxt_update(struct tcp_timewait_sock *tcptw, u32 seq)
  */
 enum tcp_tw_status
 tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
-                          const struct tcphdr *th)
+                          const struct tcphdr *th, u32 *tw_isn)
 {
        struct tcp_options_received tmp_opt;
        struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
@@ -228,7 +228,7 @@ kill:
                u32 isn = tcptw->tw_snd_nxt + 65535 + 2;
                if (isn == 0)
                        isn++;
-               TCP_SKB_CB(skb)->tcp_tw_isn = isn;
+               *tw_isn = isn;
                return TCP_TW_SYN;
        }
 
index 5141f7033abd8bb03bc4e162066ca4befe343bdc..3aa9da5c9a669d2754b421cfb704ad28def5a748 100644 (file)
@@ -1741,7 +1741,6 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
                                    skb->len - th->doff*4);
        TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
        TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
-       TCP_SKB_CB(skb)->tcp_tw_isn = 0;
        TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
        TCP_SKB_CB(skb)->sacked = 0;
        TCP_SKB_CB(skb)->has_rxtstamp =
@@ -1758,6 +1757,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
        bool refcounted;
        struct sock *sk;
        int ret;
+       u32 isn;
        struct net *net = dev_net(skb->dev);
 
        drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
@@ -1967,7 +1967,7 @@ do_time_wait:
                goto csum_error;
        }
 
-       switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
+       switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn)) {
        case TCP_TW_SYN:
        {
                struct sock *sk2;
@@ -1985,6 +1985,7 @@ do_time_wait:
                        sk = sk2;
                        tcp_v6_restore_cb(skb);
                        refcounted = false;
+                       __this_cpu_write(tcp_tw_isn, isn);
                        goto process;
                }
        }