bpf: add BPF_CGROUP_SOCK_OPS callback that is executed on every RTT
authorStanislav Fomichev <sdf@google.com>
Tue, 2 Jul 2019 16:13:56 +0000 (09:13 -0700)
committerDaniel Borkmann <daniel@iogearbox.net>
Wed, 3 Jul 2019 14:52:01 +0000 (16:52 +0200)
Performance impact should be minimal because it's under a new
BPF_SOCK_OPS_RTT_CB_FLAG flag that has to be explicitly enabled.

Suggested-by: Eric Dumazet <edumazet@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Priyaranjan Jha <priyarjha@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
include/net/tcp.h
include/uapi/linux/bpf.h
net/ipv4/tcp_input.c

index 9d36cc88d043468db278b2ed3bf57599dc9e3251..e16d8a3fd3b4eaba2fe7603f23427a50f08d9aa6 100644 (file)
@@ -2221,6 +2221,14 @@ static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk)
        return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN, 0, NULL) == 1);
 }
 
+static inline void tcp_bpf_rtt(struct sock *sk)
+{
+       struct tcp_sock *tp = tcp_sk(sk);
+
+       if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTT_CB_FLAG))
+               tcp_call_bpf(sk, BPF_SOCK_OPS_RTT_CB, 0, NULL);
+}
+
 #if IS_ENABLED(CONFIG_SMC)
 extern struct static_key_false tcp_have_smc;
 #endif
index cffea1826a1f2ac20b0d878c5a87f240c2f1e1ce..9cdd0aaeba06aa772dc95255c27bf2b3b72287d8 100644 (file)
@@ -1770,6 +1770,7 @@ union bpf_attr {
  *             * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out)
  *             * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission)
  *             * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change)
+ *             * **BPF_SOCK_OPS_RTT_CB_FLAG** (every RTT)
  *
  *             Therefore, this function can be used to clear a callback flag by
  *             setting the appropriate bit to zero. e.g. to disable the RTO
@@ -3314,7 +3315,8 @@ struct bpf_sock_ops {
 #define BPF_SOCK_OPS_RTO_CB_FLAG       (1<<0)
 #define BPF_SOCK_OPS_RETRANS_CB_FLAG   (1<<1)
 #define BPF_SOCK_OPS_STATE_CB_FLAG     (1<<2)
-#define BPF_SOCK_OPS_ALL_CB_FLAGS       0x7            /* Mask of all currently
+#define BPF_SOCK_OPS_RTT_CB_FLAG       (1<<3)
+#define BPF_SOCK_OPS_ALL_CB_FLAGS       0xF            /* Mask of all currently
                                                         * supported cb flags
                                                         */
 
@@ -3369,6 +3371,8 @@ enum {
        BPF_SOCK_OPS_TCP_LISTEN_CB,     /* Called on listen(2), right after
                                         * socket transition to LISTEN state.
                                         */
+       BPF_SOCK_OPS_RTT_CB,            /* Called on every RTT.
+                                        */
 };
 
 /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
index b71efeb0ae5bcade4e35edb4b9941a8b7a23226f..c21e8a22fb3bb39d06eb3ee7eb4cfae5066b6f48 100644 (file)
@@ -778,6 +778,8 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
                                tp->rttvar_us -= (tp->rttvar_us - tp->mdev_max_us) >> 2;
                        tp->rtt_seq = tp->snd_nxt;
                        tp->mdev_max_us = tcp_rto_min_us(sk);
+
+                       tcp_bpf_rtt(sk);
                }
        } else {
                /* no previous measure. */
@@ -786,6 +788,8 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
                tp->rttvar_us = max(tp->mdev_us, tcp_rto_min_us(sk));
                tp->mdev_max_us = tp->rttvar_us;
                tp->rtt_seq = tp->snd_nxt;
+
+               tcp_bpf_rtt(sk);
        }
        tp->srtt_us = max(1U, srtt);
 }