Commit | Line | Data |
---|---|---|
09903869 MKL |
1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | #ifndef __BPF_TCP_HELPERS_H | |
3 | #define __BPF_TCP_HELPERS_H | |
4 | ||
5 | #include <stdbool.h> | |
6 | #include <linux/types.h> | |
3e689141 THJ |
7 | #include <bpf/bpf_helpers.h> |
8 | #include <bpf/bpf_core_read.h> | |
df8ff353 | 9 | #include <bpf/bpf_tracing.h> |
6de4a9c4 MKL |
10 | |
11 | #define BPF_STRUCT_OPS(name, args...) \ | |
12 | SEC("struct_ops/"#name) \ | |
13 | BPF_PROG(name, args) | |
14 | ||
f2a6ee92 PG |
15 | #ifndef SOL_TCP |
16 | #define SOL_TCP 6 | |
17 | #endif | |
18 | ||
ccc090f4 GT |
19 | #ifndef TCP_CA_NAME_MAX |
20 | #define TCP_CA_NAME_MAX 16 | |
21 | #endif | |
22 | ||
6de4a9c4 | 23 | #define tcp_jiffies32 ((__u32)bpf_jiffies64()) |
09903869 MKL |
24 | |
25 | struct sock_common { | |
26 | unsigned char skc_state; | |
9a856cae | 27 | __u16 skc_num; |
09903869 MKL |
28 | } __attribute__((preserve_access_index)); |
29 | ||
6de4a9c4 MKL |
30 | enum sk_pacing { |
31 | SK_PACING_NONE = 0, | |
32 | SK_PACING_NEEDED = 1, | |
33 | SK_PACING_FQ = 2, | |
34 | }; | |
35 | ||
09903869 MKL |
36 | struct sock { |
37 | struct sock_common __sk_common; | |
700dcf0f | 38 | #define sk_state __sk_common.skc_state |
6de4a9c4 MKL |
39 | unsigned long sk_pacing_rate; |
40 | __u32 sk_pacing_status; /* see enum sk_pacing */ | |
09903869 MKL |
41 | } __attribute__((preserve_access_index)); |
42 | ||
43 | struct inet_sock { | |
44 | struct sock sk; | |
45 | } __attribute__((preserve_access_index)); | |
46 | ||
47 | struct inet_connection_sock { | |
48 | struct inet_sock icsk_inet; | |
49 | __u8 icsk_ca_state:6, | |
50 | icsk_ca_setsockopt:1, | |
51 | icsk_ca_dst_locked:1; | |
52 | struct { | |
53 | __u8 pending; | |
54 | } icsk_ack; | |
55 | __u64 icsk_ca_priv[104 / sizeof(__u64)]; | |
56 | } __attribute__((preserve_access_index)); | |
57 | ||
9a856cae MKL |
58 | struct request_sock { |
59 | struct sock_common __req_common; | |
60 | } __attribute__((preserve_access_index)); | |
61 | ||
09903869 MKL |
62 | struct tcp_sock { |
63 | struct inet_connection_sock inet_conn; | |
64 | ||
65 | __u32 rcv_nxt; | |
66 | __u32 snd_nxt; | |
67 | __u32 snd_una; | |
55144f31 | 68 | __u32 window_clamp; |
09903869 MKL |
69 | __u8 ecn_flags; |
70 | __u32 delivered; | |
71 | __u32 delivered_ce; | |
72 | __u32 snd_cwnd; | |
73 | __u32 snd_cwnd_cnt; | |
74 | __u32 snd_cwnd_clamp; | |
75 | __u32 snd_ssthresh; | |
76 | __u8 syn_data:1, /* SYN includes data */ | |
77 | syn_fastopen:1, /* SYN includes Fast Open option */ | |
78 | syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */ | |
79 | syn_fastopen_ch:1, /* Active TFO re-enabling probe */ | |
80 | syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ | |
81 | save_syn:1, /* Save headers of SYN packet */ | |
82 | is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */ | |
83 | syn_smc:1; /* SYN includes SMC */ | |
84 | __u32 max_packets_out; | |
85 | __u32 lsndtime; | |
86 | __u32 prior_cwnd; | |
6de4a9c4 | 87 | __u64 tcp_mstamp; /* most recent packet received/sent */ |
8039d353 | 88 | bool is_mptcp; |
09903869 MKL |
89 | } __attribute__((preserve_access_index)); |
90 | ||
91 | static __always_inline struct inet_connection_sock *inet_csk(const struct sock *sk) | |
92 | { | |
93 | return (struct inet_connection_sock *)sk; | |
94 | } | |
95 | ||
96 | static __always_inline void *inet_csk_ca(const struct sock *sk) | |
97 | { | |
98 | return (void *)inet_csk(sk)->icsk_ca_priv; | |
99 | } | |
100 | ||
101 | static __always_inline struct tcp_sock *tcp_sk(const struct sock *sk) | |
102 | { | |
103 | return (struct tcp_sock *)sk; | |
104 | } | |
105 | ||
106 | static __always_inline bool before(__u32 seq1, __u32 seq2) | |
107 | { | |
108 | return (__s32)(seq1-seq2) < 0; | |
109 | } | |
110 | #define after(seq2, seq1) before(seq1, seq2) | |
111 | ||
112 | #define TCP_ECN_OK 1 | |
113 | #define TCP_ECN_QUEUE_CWR 2 | |
114 | #define TCP_ECN_DEMAND_CWR 4 | |
115 | #define TCP_ECN_SEEN 8 | |
116 | ||
117 | enum inet_csk_ack_state_t { | |
118 | ICSK_ACK_SCHED = 1, | |
119 | ICSK_ACK_TIMER = 2, | |
120 | ICSK_ACK_PUSHED = 4, | |
121 | ICSK_ACK_PUSHED2 = 8, | |
122 | ICSK_ACK_NOW = 16 /* Send the next ACK immediately (once) */ | |
123 | }; | |
124 | ||
125 | enum tcp_ca_event { | |
126 | CA_EVENT_TX_START = 0, | |
127 | CA_EVENT_CWND_RESTART = 1, | |
128 | CA_EVENT_COMPLETE_CWR = 2, | |
129 | CA_EVENT_LOSS = 3, | |
130 | CA_EVENT_ECN_NO_CE = 4, | |
131 | CA_EVENT_ECN_IS_CE = 5, | |
132 | }; | |
133 | ||
09903869 MKL |
134 | struct ack_sample { |
135 | __u32 pkts_acked; | |
136 | __s32 rtt_us; | |
137 | __u32 in_flight; | |
138 | } __attribute__((preserve_access_index)); | |
139 | ||
140 | struct rate_sample { | |
141 | __u64 prior_mstamp; /* starting timestamp for interval */ | |
142 | __u32 prior_delivered; /* tp->delivered at "prior_mstamp" */ | |
143 | __s32 delivered; /* number of packets delivered over interval */ | |
144 | long interval_us; /* time for tp->delivered to incr "delivered" */ | |
145 | __u32 snd_interval_us; /* snd interval for delivered packets */ | |
146 | __u32 rcv_interval_us; /* rcv interval for delivered packets */ | |
147 | long rtt_us; /* RTT of last (S)ACKed packet (or -1) */ | |
148 | int losses; /* number of packets marked lost upon ACK */ | |
149 | __u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */ | |
150 | __u32 prior_in_flight; /* in flight before this ACK */ | |
151 | bool is_app_limited; /* is sample from packet with bubble in pipe? */ | |
152 | bool is_retrans; /* is sample from retransmission? */ | |
153 | bool is_ack_delayed; /* is this (likely) a delayed ACK? */ | |
154 | } __attribute__((preserve_access_index)); | |
155 | ||
156 | #define TCP_CA_NAME_MAX 16 | |
157 | #define TCP_CONG_NEEDS_ECN 0x2 | |
158 | ||
159 | struct tcp_congestion_ops { | |
160 | char name[TCP_CA_NAME_MAX]; | |
161 | __u32 flags; | |
162 | ||
163 | /* initialize private data (optional) */ | |
164 | void (*init)(struct sock *sk); | |
165 | /* cleanup private data (optional) */ | |
166 | void (*release)(struct sock *sk); | |
167 | ||
168 | /* return slow start threshold (required) */ | |
169 | __u32 (*ssthresh)(struct sock *sk); | |
170 | /* do new cwnd calculation (required) */ | |
171 | void (*cong_avoid)(struct sock *sk, __u32 ack, __u32 acked); | |
172 | /* call before changing ca_state (optional) */ | |
173 | void (*set_state)(struct sock *sk, __u8 new_state); | |
174 | /* call when cwnd event occurs (optional) */ | |
175 | void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev); | |
176 | /* call when ack arrives (optional) */ | |
177 | void (*in_ack_event)(struct sock *sk, __u32 flags); | |
178 | /* new value of cwnd after loss (required) */ | |
179 | __u32 (*undo_cwnd)(struct sock *sk); | |
180 | /* hook for packet ack accounting (optional) */ | |
181 | void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample); | |
182 | /* override sysctl_tcp_min_tso_segs */ | |
183 | __u32 (*min_tso_segs)(struct sock *sk); | |
184 | /* returns the multiplier used in tcp_sndbuf_expand (optional) */ | |
185 | __u32 (*sndbuf_expand)(struct sock *sk); | |
186 | /* call when packets are delivered to update cwnd and pacing rate, | |
187 | * after all the ca_state processing. (optional) | |
188 | */ | |
189 | void (*cong_control)(struct sock *sk, const struct rate_sample *rs); | |
a79e88dd | 190 | void *owner; |
09903869 MKL |
191 | }; |
192 | ||
193 | #define min(a, b) ((a) < (b) ? (a) : (b)) | |
194 | #define max(a, b) ((a) > (b) ? (a) : (b)) | |
195 | #define min_not_zero(x, y) ({ \ | |
196 | typeof(x) __x = (x); \ | |
197 | typeof(y) __y = (y); \ | |
198 | __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); }) | |
199 | ||
09903869 MKL |
200 | static __always_inline bool tcp_in_slow_start(const struct tcp_sock *tp) |
201 | { | |
202 | return tp->snd_cwnd < tp->snd_ssthresh; | |
203 | } | |
204 | ||
205 | static __always_inline bool tcp_is_cwnd_limited(const struct sock *sk) | |
206 | { | |
207 | const struct tcp_sock *tp = tcp_sk(sk); | |
208 | ||
209 | /* If in slow start, ensure cwnd grows to twice what was ACKed. */ | |
210 | if (tcp_in_slow_start(tp)) | |
211 | return tp->snd_cwnd < 2 * tp->max_packets_out; | |
212 | ||
213 | return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited); | |
214 | } | |
215 | ||
f2a6ee92 PG |
216 | static __always_inline bool tcp_cc_eq(const char *a, const char *b) |
217 | { | |
218 | int i; | |
219 | ||
220 | for (i = 0; i < TCP_CA_NAME_MAX; i++) { | |
221 | if (a[i] != b[i]) | |
222 | return false; | |
223 | if (!a[i]) | |
224 | break; | |
225 | } | |
226 | ||
227 | return true; | |
228 | } | |
229 | ||
78e60bbb MKL |
230 | extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym; |
231 | extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym; | |
09903869 | 232 | |
3bc48b56 GT |
233 | struct mptcp_sock { |
234 | struct inet_connection_sock sk; | |
02662234 GT |
235 | |
236 | __u32 token; | |
ccc090f4 | 237 | char ca_name[TCP_CA_NAME_MAX]; |
3bc48b56 GT |
238 | } __attribute__((preserve_access_index)); |
239 | ||
09903869 | 240 | #endif |