Commit | Line | Data |
---|---|---|
f870fa0b MM |
1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* Multipath TCP | |
3 | * | |
4 | * Copyright (c) 2017 - 2019, Intel Corporation. | |
5 | */ | |
6 | ||
7 | #ifndef __MPTCP_PROTOCOL_H | |
8 | #define __MPTCP_PROTOCOL_H | |
9 | ||
79c0949e PK |
10 | #include <linux/random.h> |
11 | #include <net/tcp.h> | |
12 | #include <net/inet_connection_sock.h> | |
13 | ||
cc7972ea | 14 | #define MPTCP_SUPPORTED_VERSION 1 |
eda7acdd PK |
15 | |
16 | /* MPTCP option bits */ | |
17 | #define OPTION_MPTCP_MPC_SYN BIT(0) | |
18 | #define OPTION_MPTCP_MPC_SYNACK BIT(1) | |
19 | #define OPTION_MPTCP_MPC_ACK BIT(2) | |
f296234c PK |
20 | #define OPTION_MPTCP_MPJ_SYN BIT(3) |
21 | #define OPTION_MPTCP_MPJ_SYNACK BIT(4) | |
22 | #define OPTION_MPTCP_MPJ_ACK BIT(5) | |
3df523ab PK |
23 | #define OPTION_MPTCP_ADD_ADDR BIT(6) |
24 | #define OPTION_MPTCP_ADD_ADDR6 BIT(7) | |
25 | #define OPTION_MPTCP_RM_ADDR BIT(8) | |
eda7acdd PK |
26 | |
27 | /* MPTCP option subtypes */ | |
28 | #define MPTCPOPT_MP_CAPABLE 0 | |
29 | #define MPTCPOPT_MP_JOIN 1 | |
30 | #define MPTCPOPT_DSS 2 | |
31 | #define MPTCPOPT_ADD_ADDR 3 | |
32 | #define MPTCPOPT_RM_ADDR 4 | |
33 | #define MPTCPOPT_MP_PRIO 5 | |
34 | #define MPTCPOPT_MP_FAIL 6 | |
35 | #define MPTCPOPT_MP_FASTCLOSE 7 | |
36 | ||
37 | /* MPTCP suboption lengths */ | |
cc7972ea | 38 | #define TCPOLEN_MPTCP_MPC_SYN 4 |
eda7acdd PK |
39 | #define TCPOLEN_MPTCP_MPC_SYNACK 12 |
40 | #define TCPOLEN_MPTCP_MPC_ACK 20 | |
cc7972ea | 41 | #define TCPOLEN_MPTCP_MPC_ACK_DATA 22 |
f296234c PK |
42 | #define TCPOLEN_MPTCP_MPJ_SYN 12 |
43 | #define TCPOLEN_MPTCP_MPJ_SYNACK 16 | |
44 | #define TCPOLEN_MPTCP_MPJ_ACK 24 | |
6d0060f6 | 45 | #define TCPOLEN_MPTCP_DSS_BASE 4 |
648ef4b8 | 46 | #define TCPOLEN_MPTCP_DSS_ACK32 4 |
6d0060f6 | 47 | #define TCPOLEN_MPTCP_DSS_ACK64 8 |
648ef4b8 | 48 | #define TCPOLEN_MPTCP_DSS_MAP32 10 |
6d0060f6 MM |
49 | #define TCPOLEN_MPTCP_DSS_MAP64 14 |
50 | #define TCPOLEN_MPTCP_DSS_CHECKSUM 2 | |
3df523ab PK |
51 | #define TCPOLEN_MPTCP_ADD_ADDR 16 |
52 | #define TCPOLEN_MPTCP_ADD_ADDR_PORT 18 | |
53 | #define TCPOLEN_MPTCP_ADD_ADDR_BASE 8 | |
54 | #define TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT 10 | |
55 | #define TCPOLEN_MPTCP_ADD_ADDR6 28 | |
56 | #define TCPOLEN_MPTCP_ADD_ADDR6_PORT 30 | |
57 | #define TCPOLEN_MPTCP_ADD_ADDR6_BASE 20 | |
58 | #define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT 22 | |
59 | #define TCPOLEN_MPTCP_PORT_LEN 2 | |
60 | #define TCPOLEN_MPTCP_RM_ADDR_BASE 4 | |
eda7acdd | 61 | |
f296234c PK |
62 | #define MPTCPOPT_BACKUP BIT(0) |
63 | #define MPTCPOPT_HMAC_LEN 20 | |
64 | ||
eda7acdd PK |
65 | /* MPTCP MP_CAPABLE flags */ |
66 | #define MPTCP_VERSION_MASK (0x0F) | |
67 | #define MPTCP_CAP_CHECKSUM_REQD BIT(7) | |
68 | #define MPTCP_CAP_EXTENSIBILITY BIT(6) | |
65492c5a | 69 | #define MPTCP_CAP_HMAC_SHA256 BIT(0) |
eda7acdd PK |
70 | #define MPTCP_CAP_FLAG_MASK (0x3F) |
71 | ||
6d0060f6 MM |
72 | /* MPTCP DSS flags */ |
73 | #define MPTCP_DSS_DATA_FIN BIT(4) | |
74 | #define MPTCP_DSS_DSN64 BIT(3) | |
75 | #define MPTCP_DSS_HAS_MAP BIT(2) | |
76 | #define MPTCP_DSS_ACK64 BIT(1) | |
77 | #define MPTCP_DSS_HAS_ACK BIT(0) | |
648ef4b8 MM |
78 | #define MPTCP_DSS_FLAG_MASK (0x1F) |
79 | ||
3df523ab PK |
80 | /* MPTCP ADD_ADDR flags */ |
81 | #define MPTCP_ADDR_ECHO BIT(0) | |
82 | #define MPTCP_ADDR_HMAC_LEN 20 | |
83 | #define MPTCP_ADDR_IPVERSION_4 4 | |
84 | #define MPTCP_ADDR_IPVERSION_6 6 | |
85 | ||
648ef4b8 | 86 | /* MPTCP socket flags */ |
d99bfed5 FW |
87 | #define MPTCP_DATA_READY 0 |
88 | #define MPTCP_SEND_SPACE 1 | |
6d0060f6 | 89 | |
3df523ab PK |
90 | static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field) |
91 | { | |
92 | return htonl((TCPOPT_MPTCP << 24) | (len << 16) | (subopt << 12) | | |
93 | ((nib & 0xF) << 8) | field); | |
94 | } | |
95 | ||
1b1c7a0e PK |
96 | #define MPTCP_PM_MAX_ADDR 4 |
97 | ||
98 | struct mptcp_addr_info { | |
99 | sa_family_t family; | |
100 | __be16 port; | |
101 | u8 id; | |
102 | union { | |
103 | struct in_addr addr; | |
104 | #if IS_ENABLED(CONFIG_MPTCP_IPV6) | |
105 | struct in6_addr addr6; | |
106 | #endif | |
107 | }; | |
108 | }; | |
109 | ||
110 | enum mptcp_pm_status { | |
111 | MPTCP_PM_ADD_ADDR_RECEIVED, | |
112 | MPTCP_PM_ESTABLISHED, | |
113 | MPTCP_PM_SUBFLOW_ESTABLISHED, | |
114 | }; | |
115 | ||
116 | struct mptcp_pm_data { | |
117 | struct mptcp_addr_info local; | |
118 | struct mptcp_addr_info remote; | |
119 | ||
120 | spinlock_t lock; /*protects the whole PM data */ | |
121 | ||
122 | bool addr_signal; | |
123 | bool server_side; | |
124 | bool work_pending; | |
125 | bool accept_addr; | |
126 | bool accept_subflow; | |
127 | u8 add_addr_signaled; | |
128 | u8 add_addr_accepted; | |
129 | u8 local_addr_used; | |
130 | u8 subflows; | |
131 | u8 add_addr_signal_max; | |
132 | u8 add_addr_accept_max; | |
133 | u8 local_addr_max; | |
134 | u8 subflows_max; | |
135 | u8 status; | |
136 | ||
137 | struct work_struct work; | |
138 | }; | |
139 | ||
f870fa0b MM |
140 | /* MPTCP connection sock */ |
141 | struct mptcp_sock { | |
142 | /* inet_connection_sock must be the first member */ | |
143 | struct inet_connection_sock sk; | |
cec37a6e PK |
144 | u64 local_key; |
145 | u64 remote_key; | |
6d0060f6 MM |
146 | u64 write_seq; |
147 | u64 ack_seq; | |
79c0949e | 148 | u32 token; |
648ef4b8 | 149 | unsigned long flags; |
d22f4988 | 150 | bool can_ack; |
80992017 | 151 | struct work_struct work; |
cec37a6e | 152 | struct list_head conn_list; |
6d0060f6 | 153 | struct skb_ext *cached_ext; /* for the next sendmsg */ |
f870fa0b | 154 | struct socket *subflow; /* outgoing connect/listener/!mp_capable */ |
8ab183de | 155 | struct sock *first; |
1b1c7a0e | 156 | struct mptcp_pm_data pm; |
f870fa0b MM |
157 | }; |
158 | ||
cec37a6e PK |
159 | #define mptcp_for_each_subflow(__msk, __subflow) \ |
160 | list_for_each_entry(__subflow, &((__msk)->conn_list), node) | |
161 | ||
f870fa0b MM |
162 | static inline struct mptcp_sock *mptcp_sk(const struct sock *sk) |
163 | { | |
164 | return (struct mptcp_sock *)sk; | |
165 | } | |
166 | ||
cec37a6e PK |
167 | struct mptcp_subflow_request_sock { |
168 | struct tcp_request_sock sk; | |
d22f4988 | 169 | u16 mp_capable : 1, |
cec37a6e | 170 | mp_join : 1, |
d22f4988 CP |
171 | backup : 1, |
172 | remote_key_valid : 1; | |
1b1c7a0e | 173 | u8 local_id; |
f296234c | 174 | u8 remote_id; |
cec37a6e PK |
175 | u64 local_key; |
176 | u64 remote_key; | |
79c0949e PK |
177 | u64 idsn; |
178 | u32 token; | |
648ef4b8 | 179 | u32 ssn_offset; |
f296234c PK |
180 | u64 thmac; |
181 | u32 local_nonce; | |
182 | u32 remote_nonce; | |
cec37a6e PK |
183 | }; |
184 | ||
185 | static inline struct mptcp_subflow_request_sock * | |
186 | mptcp_subflow_rsk(const struct request_sock *rsk) | |
187 | { | |
188 | return (struct mptcp_subflow_request_sock *)rsk; | |
189 | } | |
190 | ||
2303f994 PK |
191 | /* MPTCP subflow context */ |
192 | struct mptcp_subflow_context { | |
cec37a6e PK |
193 | struct list_head node;/* conn_list of subflows */ |
194 | u64 local_key; | |
195 | u64 remote_key; | |
79c0949e | 196 | u64 idsn; |
648ef4b8 | 197 | u64 map_seq; |
cc7972ea | 198 | u32 snd_isn; |
79c0949e | 199 | u32 token; |
6d0060f6 | 200 | u32 rel_write_seq; |
648ef4b8 MM |
201 | u32 map_subflow_seq; |
202 | u32 ssn_offset; | |
203 | u32 map_data_len; | |
cec37a6e PK |
204 | u32 request_mptcp : 1, /* send MP_CAPABLE */ |
205 | mp_capable : 1, /* remote is MPTCP capable */ | |
f296234c | 206 | mp_join : 1, /* remote is JOINing */ |
0be534f5 | 207 | fully_established : 1, /* path validated */ |
f296234c | 208 | pm_notified : 1, /* PM hook called for established status */ |
648ef4b8 MM |
209 | conn_finished : 1, |
210 | map_valid : 1, | |
d22f4988 | 211 | mpc_map : 1, |
f296234c | 212 | backup : 1, |
648ef4b8 | 213 | data_avail : 1, |
d22f4988 | 214 | rx_eof : 1, |
76c42a29 | 215 | data_fin_tx_enable : 1, |
d22f4988 | 216 | can_ack : 1; /* only after processing the remote a key */ |
76c42a29 | 217 | u64 data_fin_tx_seq; |
f296234c PK |
218 | u32 remote_nonce; |
219 | u64 thmac; | |
220 | u32 local_nonce; | |
221 | u8 local_id; | |
222 | u8 remote_id; | |
648ef4b8 | 223 | |
2303f994 PK |
224 | struct sock *tcp_sock; /* tcp sk backpointer */ |
225 | struct sock *conn; /* parent mptcp_sock */ | |
cec37a6e | 226 | const struct inet_connection_sock_af_ops *icsk_af_ops; |
648ef4b8 MM |
227 | void (*tcp_data_ready)(struct sock *sk); |
228 | void (*tcp_state_change)(struct sock *sk); | |
229 | void (*tcp_write_space)(struct sock *sk); | |
230 | ||
2303f994 PK |
231 | struct rcu_head rcu; |
232 | }; | |
233 | ||
234 | static inline struct mptcp_subflow_context * | |
235 | mptcp_subflow_ctx(const struct sock *sk) | |
236 | { | |
237 | struct inet_connection_sock *icsk = inet_csk(sk); | |
238 | ||
239 | /* Use RCU on icsk_ulp_data only for sock diag code */ | |
240 | return (__force struct mptcp_subflow_context *)icsk->icsk_ulp_data; | |
241 | } | |
242 | ||
243 | static inline struct sock * | |
244 | mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) | |
245 | { | |
246 | return subflow->tcp_sock; | |
247 | } | |
248 | ||
648ef4b8 MM |
249 | static inline u64 |
250 | mptcp_subflow_get_map_offset(const struct mptcp_subflow_context *subflow) | |
251 | { | |
252 | return tcp_sk(mptcp_subflow_tcp_sock(subflow))->copied_seq - | |
253 | subflow->ssn_offset - | |
254 | subflow->map_subflow_seq; | |
255 | } | |
256 | ||
257 | static inline u64 | |
258 | mptcp_subflow_get_mapped_dsn(const struct mptcp_subflow_context *subflow) | |
259 | { | |
260 | return subflow->map_seq + mptcp_subflow_get_map_offset(subflow); | |
261 | } | |
262 | ||
263 | int mptcp_is_enabled(struct net *net); | |
264 | bool mptcp_subflow_data_available(struct sock *sk); | |
2303f994 PK |
265 | void mptcp_subflow_init(void); |
266 | int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock); | |
267 | ||
648ef4b8 MM |
268 | static inline void mptcp_subflow_tcp_fallback(struct sock *sk, |
269 | struct mptcp_subflow_context *ctx) | |
270 | { | |
271 | sk->sk_data_ready = ctx->tcp_data_ready; | |
272 | sk->sk_state_change = ctx->tcp_state_change; | |
273 | sk->sk_write_space = ctx->tcp_write_space; | |
274 | ||
275 | inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops; | |
276 | } | |
277 | ||
cec37a6e PK |
278 | extern const struct inet_connection_sock_af_ops ipv4_specific; |
279 | #if IS_ENABLED(CONFIG_MPTCP_IPV6) | |
280 | extern const struct inet_connection_sock_af_ops ipv6_specific; | |
281 | #endif | |
282 | ||
648ef4b8 | 283 | void mptcp_proto_init(void); |
784325e9 MB |
284 | #if IS_ENABLED(CONFIG_MPTCP_IPV6) |
285 | int mptcp_proto_v6_init(void); | |
286 | #endif | |
648ef4b8 | 287 | |
58b09919 | 288 | struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req); |
cec37a6e PK |
289 | void mptcp_get_options(const struct sk_buff *skb, |
290 | struct tcp_options_received *opt_rx); | |
291 | ||
292 | void mptcp_finish_connect(struct sock *sk); | |
2e52213c | 293 | void mptcp_data_ready(struct sock *sk, struct sock *ssk); |
f296234c | 294 | bool mptcp_finish_join(struct sock *sk); |
cec37a6e | 295 | |
79c0949e PK |
296 | int mptcp_token_new_request(struct request_sock *req); |
297 | void mptcp_token_destroy_request(u32 token); | |
298 | int mptcp_token_new_connect(struct sock *sk); | |
58b09919 | 299 | int mptcp_token_new_accept(u32 token, struct sock *conn); |
f296234c | 300 | struct mptcp_sock *mptcp_token_get_sock(u32 token); |
79c0949e PK |
301 | void mptcp_token_destroy(u32 token); |
302 | ||
303 | void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn); | |
304 | static inline void mptcp_crypto_key_gen_sha(u64 *key, u32 *token, u64 *idsn) | |
305 | { | |
306 | /* we might consider a faster version that computes the key as a | |
307 | * hash of some information available in the MPTCP socket. Use | |
308 | * random data at the moment, as it's probably the safest option | |
309 | * in case multiple sockets are opened in different namespaces at | |
310 | * the same time. | |
311 | */ | |
312 | get_random_bytes(key, sizeof(u64)); | |
313 | mptcp_crypto_key_sha(*key, token, idsn); | |
314 | } | |
315 | ||
3df523ab | 316 | void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac); |
79c0949e | 317 | |
1b1c7a0e PK |
318 | void mptcp_pm_init(void); |
319 | void mptcp_pm_data_init(struct mptcp_sock *msk); | |
320 | void mptcp_pm_new_connection(struct mptcp_sock *msk, int server_side); | |
321 | void mptcp_pm_fully_established(struct mptcp_sock *msk); | |
322 | bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk); | |
323 | void mptcp_pm_connection_closed(struct mptcp_sock *msk); | |
324 | void mptcp_pm_subflow_established(struct mptcp_sock *msk, | |
325 | struct mptcp_subflow_context *subflow); | |
326 | void mptcp_pm_subflow_closed(struct mptcp_sock *msk, u8 id); | |
327 | void mptcp_pm_add_addr_received(struct mptcp_sock *msk, | |
328 | const struct mptcp_addr_info *addr); | |
329 | ||
330 | int mptcp_pm_announce_addr(struct mptcp_sock *msk, | |
331 | const struct mptcp_addr_info *addr); | |
332 | int mptcp_pm_remove_addr(struct mptcp_sock *msk, u8 local_id); | |
333 | int mptcp_pm_remove_subflow(struct mptcp_sock *msk, u8 remote_id); | |
334 | ||
335 | static inline bool mptcp_pm_should_signal(struct mptcp_sock *msk) | |
336 | { | |
337 | return READ_ONCE(msk->pm.addr_signal); | |
338 | } | |
339 | ||
340 | static inline unsigned int mptcp_add_addr_len(int family) | |
341 | { | |
342 | if (family == AF_INET) | |
343 | return TCPOLEN_MPTCP_ADD_ADDR; | |
344 | return TCPOLEN_MPTCP_ADD_ADDR6; | |
345 | } | |
346 | ||
347 | bool mptcp_pm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, | |
348 | struct mptcp_addr_info *saddr); | |
349 | int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); | |
350 | ||
6d0060f6 MM |
351 | static inline struct mptcp_ext *mptcp_get_ext(struct sk_buff *skb) |
352 | { | |
353 | return (struct mptcp_ext *)skb_ext_find(skb, SKB_EXT_MPTCP); | |
354 | } | |
355 | ||
648ef4b8 MM |
356 | static inline bool before64(__u64 seq1, __u64 seq2) |
357 | { | |
358 | return (__s64)(seq1 - seq2) < 0; | |
359 | } | |
360 | ||
361 | #define after64(seq2, seq1) before64(seq1, seq2) | |
362 | ||
f870fa0b | 363 | #endif /* __MPTCP_PROTOCOL_H */ |