tcp: bpf: Add TCP_BPF_DELACK_MAX setsockopt
authorMartin KaFai Lau <kafai@fb.com>
Thu, 20 Aug 2020 19:00:21 +0000 (12:00 -0700)
committerAlexei Starovoitov <ast@kernel.org>
Mon, 24 Aug 2020 21:34:59 +0000 (14:34 -0700)
This change is mostly from an internal patch and adapts it from sysctl
config to the bpf_setsockopt setup.

The bpf_prog can set the max delay ack by using
bpf_setsockopt(TCP_BPF_DELACK_MAX).  This max delay ack can be communicated
to its peer through bpf header option.  The receiving peer can then use
this max delay ack and set a potentially lower rto by using
bpf_setsockopt(TCP_BPF_RTO_MIN) which will be introduced
in the next patch.

Another later selftest patch will also use it like the above to show
how to write and parse bpf tcp header option.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200820190021.2884000-1-kafai@fb.com
include/net/inet_connection_sock.h
include/uapi/linux/bpf.h
net/core/filter.c
net/ipv4/tcp.c
net/ipv4/tcp_output.c
tools/include/uapi/linux/bpf.h

index aa8893c68c505ad16c0eae751e8cd3b69d0e4bee..da7264a1ebfc96d59fbf29e8d161f375a69b9ce6 100644 (file)
@@ -86,6 +86,7 @@ struct inet_connection_sock {
        struct timer_list         icsk_retransmit_timer;
        struct timer_list         icsk_delack_timer;
        __u32                     icsk_rto;
+       __u32                     icsk_delack_max;
        __u32                     icsk_pmtu_cookie;
        const struct tcp_congestion_ops *icsk_ca_ops;
        const struct inet_connection_sock_af_ops *icsk_af_ops;
index a1bbaff7a0af082ec157e421f535665e7b7486bd..7b905cb0213ee8ecf691cda830bab5f48857ed03 100644 (file)
@@ -4257,6 +4257,7 @@ enum {
 enum {
        TCP_BPF_IW              = 1001, /* Set TCP initial congestion window */
        TCP_BPF_SNDCWND_CLAMP   = 1002, /* Set sndcwnd_clamp */
+       TCP_BPF_DELACK_MAX      = 1003, /* Max delay ack in usecs */
 };
 
 struct bpf_perf_event_value {
index c847b1285acd493f7abdd34f5c71092125310887..80fe7420f609bd6141cfec594bce6b33365d73bf 100644 (file)
@@ -4459,6 +4459,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
                } else {
                        struct inet_connection_sock *icsk = inet_csk(sk);
                        struct tcp_sock *tp = tcp_sk(sk);
+                       unsigned long timeout;
 
                        if (optlen != sizeof(int))
                                return -EINVAL;
@@ -4480,6 +4481,13 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
                                        tp->snd_ssthresh = val;
                                }
                                break;
+                       case TCP_BPF_DELACK_MAX:
+                               timeout = usecs_to_jiffies(val);
+                               if (timeout > TCP_DELACK_MAX ||
+                                   timeout < TCP_TIMEOUT_MIN)
+                                       return -EINVAL;
+                               inet_csk(sk)->icsk_delack_max = timeout;
+                               break;
                        case TCP_SAVE_SYN:
                                if (val < 0 || val > 1)
                                        ret = -EINVAL;
index 87d3036d8bd8f5b51e605350cc453724a4ed6947..44c353a39ad4c389ee499923798ff0b4274113a1 100644 (file)
@@ -418,6 +418,7 @@ void tcp_init_sock(struct sock *sk)
        INIT_LIST_HEAD(&tp->tsorted_sent_queue);
 
        icsk->icsk_rto = TCP_TIMEOUT_INIT;
+       icsk->icsk_delack_max = TCP_DELACK_MAX;
        tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
        minmax_reset(&tp->rtt_min, tcp_jiffies32, ~0U);
 
@@ -2685,6 +2686,7 @@ int tcp_disconnect(struct sock *sk, int flags)
        icsk->icsk_backoff = 0;
        icsk->icsk_probes_out = 0;
        icsk->icsk_rto = TCP_TIMEOUT_INIT;
+       icsk->icsk_delack_max = TCP_DELACK_MAX;
        tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
        tp->snd_cwnd = TCP_INIT_CWND;
        tp->snd_cwnd_cnt = 0;
index 85ff417bda7f4738491aa487fc9634442be7022c..44ffa4891beb9daca9ba9f580e762f0855638a7d 100644 (file)
@@ -3741,6 +3741,8 @@ void tcp_send_delayed_ack(struct sock *sk)
                ato = min(ato, max_ato);
        }
 
+       ato = min_t(u32, ato, inet_csk(sk)->icsk_delack_max);
+
        /* Stay within the limit we were given */
        timeout = jiffies + ato;
 
index a1bbaff7a0af082ec157e421f535665e7b7486bd..7b905cb0213ee8ecf691cda830bab5f48857ed03 100644 (file)
@@ -4257,6 +4257,7 @@ enum {
 enum {
        TCP_BPF_IW              = 1001, /* Set TCP initial congestion window */
        TCP_BPF_SNDCWND_CLAMP   = 1002, /* Set sndcwnd_clamp */
+       TCP_BPF_DELACK_MAX      = 1003, /* Max delay ack in usecs */
 };
 
 struct bpf_perf_event_value {