mptcp: better msk receive window updates
[linux-2.6-block.git] / net / mptcp / options.c
CommitLineData
eda7acdd
PK
1// SPDX-License-Identifier: GPL-2.0
2/* Multipath TCP
3 *
4 * Copyright (c) 2017 - 2019, Intel Corporation.
5 */
6
c85adced
GT
7#define pr_fmt(fmt) "MPTCP: " fmt
8
eda7acdd 9#include <linux/kernel.h>
a24d22b2 10#include <crypto/sha2.h>
eda7acdd
PK
11#include <net/tcp.h>
12#include <net/mptcp.h>
13#include "protocol.h"
a877de06 14#include "mib.h"
eda7acdd 15
65492c5a
PA
16static bool mptcp_cap_flag_sha256(u8 flags)
17{
18 return (flags & MPTCP_CAP_FLAG_MASK) == MPTCP_CAP_HMAC_SHA256;
19}
20
cfde141e
PA
21static void mptcp_parse_option(const struct sk_buff *skb,
22 const unsigned char *ptr, int opsize,
23 struct mptcp_options_received *mp_opt)
eda7acdd 24{
eda7acdd 25 u8 subtype = *ptr >> 4;
648ef4b8 26 int expected_opsize;
eda7acdd
PK
27 u8 version;
28 u8 flags;
29
30 switch (subtype) {
31 case MPTCPOPT_MP_CAPABLE:
cc7972ea
CP
32 /* strict size checking */
33 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
34 if (skb->len > tcp_hdr(skb)->doff << 2)
35 expected_opsize = TCPOLEN_MPTCP_MPC_ACK_DATA;
36 else
37 expected_opsize = TCPOLEN_MPTCP_MPC_ACK;
38 } else {
39 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)
40 expected_opsize = TCPOLEN_MPTCP_MPC_SYNACK;
41 else
42 expected_opsize = TCPOLEN_MPTCP_MPC_SYN;
43 }
44 if (opsize != expected_opsize)
eda7acdd
PK
45 break;
46
cc7972ea 47 /* try to be gentle vs future versions on the initial syn */
eda7acdd 48 version = *ptr++ & MPTCP_VERSION_MASK;
cc7972ea
CP
49 if (opsize != TCPOLEN_MPTCP_MPC_SYN) {
50 if (version != MPTCP_SUPPORTED_VERSION)
51 break;
52 } else if (version < MPTCP_SUPPORTED_VERSION) {
eda7acdd 53 break;
cc7972ea 54 }
eda7acdd
PK
55
56 flags = *ptr++;
65492c5a 57 if (!mptcp_cap_flag_sha256(flags) ||
eda7acdd
PK
58 (flags & MPTCP_CAP_EXTENSIBILITY))
59 break;
60
61 /* RFC 6824, Section 3.1:
62 * "For the Checksum Required bit (labeled "A"), if either
63 * host requires the use of checksums, checksums MUST be used.
64 * In other words, the only way for checksums not to be used
65 * is if both hosts in their SYNs set A=0."
66 *
67 * Section 3.3.0:
68 * "If a checksum is not present when its use has been
69 * negotiated, the receiver MUST close the subflow with a RST as
70 * it is considered broken."
71 *
72 * We don't implement DSS checksum - fall back to TCP.
73 */
74 if (flags & MPTCP_CAP_CHECKSUM_REQD)
75 break;
76
77 mp_opt->mp_capable = 1;
cc7972ea
CP
78 if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) {
79 mp_opt->sndr_key = get_unaligned_be64(ptr);
80 ptr += 8;
81 }
82 if (opsize >= TCPOLEN_MPTCP_MPC_ACK) {
eda7acdd
PK
83 mp_opt->rcvr_key = get_unaligned_be64(ptr);
84 ptr += 8;
eda7acdd 85 }
cc7972ea
CP
86 if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA) {
87 /* Section 3.1.:
88 * "the data parameters in a MP_CAPABLE are semantically
89 * equivalent to those in a DSS option and can be used
90 * interchangeably."
91 */
92 mp_opt->dss = 1;
93 mp_opt->use_map = 1;
94 mp_opt->mpc_map = 1;
95 mp_opt->data_len = get_unaligned_be16(ptr);
96 ptr += 2;
97 }
98 pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d",
99 version, flags, opsize, mp_opt->sndr_key,
100 mp_opt->rcvr_key, mp_opt->data_len);
eda7acdd
PK
101 break;
102
f296234c
PK
103 case MPTCPOPT_MP_JOIN:
104 mp_opt->mp_join = 1;
105 if (opsize == TCPOLEN_MPTCP_MPJ_SYN) {
106 mp_opt->backup = *ptr++ & MPTCPOPT_BACKUP;
107 mp_opt->join_id = *ptr++;
108 mp_opt->token = get_unaligned_be32(ptr);
109 ptr += 4;
110 mp_opt->nonce = get_unaligned_be32(ptr);
111 ptr += 4;
112 pr_debug("MP_JOIN bkup=%u, id=%u, token=%u, nonce=%u",
113 mp_opt->backup, mp_opt->join_id,
114 mp_opt->token, mp_opt->nonce);
115 } else if (opsize == TCPOLEN_MPTCP_MPJ_SYNACK) {
116 mp_opt->backup = *ptr++ & MPTCPOPT_BACKUP;
117 mp_opt->join_id = *ptr++;
118 mp_opt->thmac = get_unaligned_be64(ptr);
119 ptr += 8;
120 mp_opt->nonce = get_unaligned_be32(ptr);
121 ptr += 4;
122 pr_debug("MP_JOIN bkup=%u, id=%u, thmac=%llu, nonce=%u",
123 mp_opt->backup, mp_opt->join_id,
124 mp_opt->thmac, mp_opt->nonce);
125 } else if (opsize == TCPOLEN_MPTCP_MPJ_ACK) {
126 ptr += 2;
127 memcpy(mp_opt->hmac, ptr, MPTCPOPT_HMAC_LEN);
128 pr_debug("MP_JOIN hmac");
129 } else {
130 pr_warn("MP_JOIN bad option size");
131 mp_opt->mp_join = 0;
132 }
133 break;
134
eda7acdd
PK
135 case MPTCPOPT_DSS:
136 pr_debug("DSS");
648ef4b8
MM
137 ptr++;
138
cc7972ea
CP
139 /* we must clear 'mpc_map' be able to detect MP_CAPABLE
140 * map vs DSS map in mptcp_incoming_options(), and reconstruct
141 * map info accordingly
142 */
143 mp_opt->mpc_map = 0;
648ef4b8
MM
144 flags = (*ptr++) & MPTCP_DSS_FLAG_MASK;
145 mp_opt->data_fin = (flags & MPTCP_DSS_DATA_FIN) != 0;
146 mp_opt->dsn64 = (flags & MPTCP_DSS_DSN64) != 0;
147 mp_opt->use_map = (flags & MPTCP_DSS_HAS_MAP) != 0;
148 mp_opt->ack64 = (flags & MPTCP_DSS_ACK64) != 0;
149 mp_opt->use_ack = (flags & MPTCP_DSS_HAS_ACK);
150
151 pr_debug("data_fin=%d dsn64=%d use_map=%d ack64=%d use_ack=%d",
152 mp_opt->data_fin, mp_opt->dsn64,
153 mp_opt->use_map, mp_opt->ack64,
154 mp_opt->use_ack);
155
156 expected_opsize = TCPOLEN_MPTCP_DSS_BASE;
157
158 if (mp_opt->use_ack) {
159 if (mp_opt->ack64)
160 expected_opsize += TCPOLEN_MPTCP_DSS_ACK64;
161 else
162 expected_opsize += TCPOLEN_MPTCP_DSS_ACK32;
163 }
164
165 if (mp_opt->use_map) {
166 if (mp_opt->dsn64)
167 expected_opsize += TCPOLEN_MPTCP_DSS_MAP64;
168 else
169 expected_opsize += TCPOLEN_MPTCP_DSS_MAP32;
170 }
171
172 /* RFC 6824, Section 3.3:
173 * If a checksum is present, but its use had
174 * not been negotiated in the MP_CAPABLE handshake,
175 * the checksum field MUST be ignored.
176 */
177 if (opsize != expected_opsize &&
178 opsize != expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM)
179 break;
180
eda7acdd 181 mp_opt->dss = 1;
648ef4b8
MM
182
183 if (mp_opt->use_ack) {
184 if (mp_opt->ack64) {
185 mp_opt->data_ack = get_unaligned_be64(ptr);
186 ptr += 8;
187 } else {
188 mp_opt->data_ack = get_unaligned_be32(ptr);
189 ptr += 4;
190 }
191
192 pr_debug("data_ack=%llu", mp_opt->data_ack);
193 }
194
195 if (mp_opt->use_map) {
196 if (mp_opt->dsn64) {
197 mp_opt->data_seq = get_unaligned_be64(ptr);
198 ptr += 8;
199 } else {
200 mp_opt->data_seq = get_unaligned_be32(ptr);
201 ptr += 4;
202 }
203
204 mp_opt->subflow_seq = get_unaligned_be32(ptr);
205 ptr += 4;
206
207 mp_opt->data_len = get_unaligned_be16(ptr);
208 ptr += 2;
209
210 pr_debug("data_seq=%llu subflow_seq=%u data_len=%u",
211 mp_opt->data_seq, mp_opt->subflow_seq,
212 mp_opt->data_len);
213 }
214
eda7acdd
PK
215 break;
216
3df523ab
PK
217 case MPTCPOPT_ADD_ADDR:
218 mp_opt->echo = (*ptr++) & MPTCP_ADDR_ECHO;
219 if (!mp_opt->echo) {
220 if (opsize == TCPOLEN_MPTCP_ADD_ADDR ||
221 opsize == TCPOLEN_MPTCP_ADD_ADDR_PORT)
222 mp_opt->family = MPTCP_ADDR_IPVERSION_4;
223#if IS_ENABLED(CONFIG_MPTCP_IPV6)
224 else if (opsize == TCPOLEN_MPTCP_ADD_ADDR6 ||
225 opsize == TCPOLEN_MPTCP_ADD_ADDR6_PORT)
226 mp_opt->family = MPTCP_ADDR_IPVERSION_6;
227#endif
228 else
229 break;
230 } else {
231 if (opsize == TCPOLEN_MPTCP_ADD_ADDR_BASE ||
232 opsize == TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT)
233 mp_opt->family = MPTCP_ADDR_IPVERSION_4;
234#if IS_ENABLED(CONFIG_MPTCP_IPV6)
235 else if (opsize == TCPOLEN_MPTCP_ADD_ADDR6_BASE ||
236 opsize == TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT)
237 mp_opt->family = MPTCP_ADDR_IPVERSION_6;
238#endif
239 else
240 break;
241 }
242
243 mp_opt->add_addr = 1;
3df523ab 244 mp_opt->addr_id = *ptr++;
3df523ab
PK
245 if (mp_opt->family == MPTCP_ADDR_IPVERSION_4) {
246 memcpy((u8 *)&mp_opt->addr.s_addr, (u8 *)ptr, 4);
247 ptr += 4;
248 if (opsize == TCPOLEN_MPTCP_ADD_ADDR_PORT ||
249 opsize == TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT) {
250 mp_opt->port = get_unaligned_be16(ptr);
251 ptr += 2;
252 }
253 }
254#if IS_ENABLED(CONFIG_MPTCP_IPV6)
255 else {
256 memcpy(mp_opt->addr6.s6_addr, (u8 *)ptr, 16);
257 ptr += 16;
258 if (opsize == TCPOLEN_MPTCP_ADD_ADDR6_PORT ||
259 opsize == TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT) {
260 mp_opt->port = get_unaligned_be16(ptr);
261 ptr += 2;
262 }
263 }
264#endif
265 if (!mp_opt->echo) {
266 mp_opt->ahmac = get_unaligned_be64(ptr);
267 ptr += 8;
268 }
90a4aea8
GT
269 pr_debug("ADD_ADDR%s: id=%d, ahmac=%llu, echo=%d, port=%d",
270 (mp_opt->family == MPTCP_ADDR_IPVERSION_6) ? "6" : "",
271 mp_opt->addr_id, mp_opt->ahmac, mp_opt->echo, mp_opt->port);
3df523ab
PK
272 break;
273
274 case MPTCPOPT_RM_ADDR:
275 if (opsize != TCPOLEN_MPTCP_RM_ADDR_BASE)
276 break;
277
8e60eed6
GT
278 ptr++;
279
3df523ab
PK
280 mp_opt->rm_addr = 1;
281 mp_opt->rm_id = *ptr++;
282 pr_debug("RM_ADDR: id=%d", mp_opt->rm_id);
283 break;
284
50c504a2
FW
285 case MPTCPOPT_MP_FASTCLOSE:
286 if (opsize != TCPOLEN_MPTCP_FASTCLOSE)
287 break;
288
289 ptr += 2;
290 mp_opt->rcvr_key = get_unaligned_be64(ptr);
291 ptr += 8;
292 mp_opt->fastclose = 1;
293 break;
294
eda7acdd
PK
295 default:
296 break;
297 }
298}
299
cec37a6e 300void mptcp_get_options(const struct sk_buff *skb,
cfde141e 301 struct mptcp_options_received *mp_opt)
cec37a6e 302{
cec37a6e 303 const struct tcphdr *th = tcp_hdr(skb);
cfde141e
PA
304 const unsigned char *ptr;
305 int length;
cec37a6e 306
cfde141e
PA
307 /* initialize option status */
308 mp_opt->mp_capable = 0;
309 mp_opt->mp_join = 0;
310 mp_opt->add_addr = 0;
fe2d9b1a 311 mp_opt->ahmac = 0;
50c504a2 312 mp_opt->fastclose = 0;
65b8c8a6 313 mp_opt->port = 0;
cfde141e
PA
314 mp_opt->rm_addr = 0;
315 mp_opt->dss = 0;
316
317 length = (th->doff * 4) - sizeof(struct tcphdr);
cec37a6e
PK
318 ptr = (const unsigned char *)(th + 1);
319
320 while (length > 0) {
321 int opcode = *ptr++;
322 int opsize;
323
324 switch (opcode) {
325 case TCPOPT_EOL:
326 return;
327 case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
328 length--;
329 continue;
330 default:
331 opsize = *ptr++;
332 if (opsize < 2) /* "silly options" */
333 return;
334 if (opsize > length)
335 return; /* don't parse partial options */
336 if (opcode == TCPOPT_MPTCP)
cfde141e 337 mptcp_parse_option(skb, ptr, opsize, mp_opt);
cec37a6e
PK
338 ptr += opsize - 2;
339 length -= opsize;
340 }
341 }
342}
343
cc7972ea
CP
344bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
345 unsigned int *size, struct mptcp_out_options *opts)
cec37a6e
PK
346{
347 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
348
cc7972ea
CP
349 /* we will use snd_isn to detect first pkt [re]transmission
350 * in mptcp_established_options_mp()
351 */
352 subflow->snd_isn = TCP_SKB_CB(skb)->end_seq;
cec37a6e 353 if (subflow->request_mptcp) {
cec37a6e 354 opts->suboptions = OPTION_MPTCP_MPC_SYN;
cec37a6e
PK
355 *size = TCPOLEN_MPTCP_MPC_SYN;
356 return true;
ec3edaa7
PK
357 } else if (subflow->request_join) {
358 pr_debug("remote_token=%u, nonce=%u", subflow->remote_token,
359 subflow->local_nonce);
360 opts->suboptions = OPTION_MPTCP_MPJ_SYN;
361 opts->join_id = subflow->local_id;
362 opts->token = subflow->remote_token;
363 opts->nonce = subflow->local_nonce;
364 opts->backup = subflow->request_bkup;
365 *size = TCPOLEN_MPTCP_MPJ_SYN;
366 return true;
cec37a6e
PK
367 }
368 return false;
369}
370
ec3edaa7
PK
371/* MP_JOIN client subflow must wait for 4th ack before sending any data:
372 * TCP can't schedule delack timer before the subflow is fully established.
373 * MPTCP uses the delack timer to do 3rd ack retransmissions
374 */
375static void schedule_3rdack_retransmission(struct sock *sk)
376{
377 struct inet_connection_sock *icsk = inet_csk(sk);
378 struct tcp_sock *tp = tcp_sk(sk);
379 unsigned long timeout;
380
381 /* reschedule with a timeout above RTT, as we must look only for drop */
382 if (tp->srtt_us)
383 timeout = tp->srtt_us << 1;
384 else
385 timeout = TCP_TIMEOUT_INIT;
386
387 WARN_ON_ONCE(icsk->icsk_ack.pending & ICSK_ACK_TIMER);
388 icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
389 icsk->icsk_ack.timeout = timeout;
390 sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
391}
392
393static void clear_3rdack_retransmission(struct sock *sk)
394{
395 struct inet_connection_sock *icsk = inet_csk(sk);
396
397 sk_stop_timer(sk, &icsk->icsk_delack_timer);
398 icsk->icsk_ack.timeout = 0;
399 icsk->icsk_ack.ato = 0;
400 icsk->icsk_ack.pending &= ~(ICSK_ACK_SCHED | ICSK_ACK_TIMER);
401}
402
cc7972ea
CP
403static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
404 unsigned int *size,
6d0060f6
MM
405 unsigned int remaining,
406 struct mptcp_out_options *opts)
cec37a6e
PK
407{
408 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
cc7972ea
CP
409 struct mptcp_ext *mpext;
410 unsigned int data_len;
411
ec3edaa7
PK
412 /* When skb is not available, we better over-estimate the emitted
413 * options len. A full DSS option (28 bytes) is longer than
414 * TCPOLEN_MPTCP_MPC_ACK_DATA(22) or TCPOLEN_MPTCP_MPJ_ACK(24), so
415 * tell the caller to defer the estimate to
416 * mptcp_established_options_dss(), which will reserve enough space.
417 */
418 if (!skb)
419 return false;
cc7972ea 420
ec3edaa7
PK
421 /* MPC/MPJ needed only on 3rd ack packet */
422 if (subflow->fully_established ||
423 subflow->snd_isn != TCP_SKB_CB(skb)->seq)
424 return false;
425
426 if (subflow->mp_capable) {
cc7972ea
CP
427 mpext = mptcp_get_ext(skb);
428 data_len = mpext ? mpext->data_len : 0;
cec37a6e 429
cc7972ea
CP
430 /* we will check ext_copy.data_len in mptcp_write_options() to
431 * discriminate between TCPOLEN_MPTCP_MPC_ACK_DATA and
432 * TCPOLEN_MPTCP_MPC_ACK
433 */
434 opts->ext_copy.data_len = data_len;
cec37a6e
PK
435 opts->suboptions = OPTION_MPTCP_MPC_ACK;
436 opts->sndr_key = subflow->local_key;
437 opts->rcvr_key = subflow->remote_key;
cc7972ea
CP
438
439 /* Section 3.1.
440 * The MP_CAPABLE option is carried on the SYN, SYN/ACK, and ACK
441 * packets that start the first subflow of an MPTCP connection,
442 * as well as the first packet that carries data
443 */
444 if (data_len > 0)
445 *size = ALIGN(TCPOLEN_MPTCP_MPC_ACK_DATA, 4);
446 else
447 *size = TCPOLEN_MPTCP_MPC_ACK;
448
449 pr_debug("subflow=%p, local_key=%llu, remote_key=%llu map_len=%d",
450 subflow, subflow->local_key, subflow->remote_key,
451 data_len);
452
cec37a6e 453 return true;
ec3edaa7
PK
454 } else if (subflow->mp_join) {
455 opts->suboptions = OPTION_MPTCP_MPJ_ACK;
456 memcpy(opts->hmac, subflow->hmac, MPTCPOPT_HMAC_LEN);
457 *size = TCPOLEN_MPTCP_MPJ_ACK;
458 pr_debug("subflow=%p", subflow);
459
460 schedule_3rdack_retransmission(sk);
461 return true;
cec37a6e
PK
462 }
463 return false;
464}
465
6d0060f6 466static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow,
9c29e361 467 struct sk_buff *skb, struct mptcp_ext *ext)
6d0060f6 468{
017512a0
PA
469 /* The write_seq value has already been incremented, so the actual
470 * sequence number for the DATA_FIN is one less.
471 */
472 u64 data_fin_tx_seq = READ_ONCE(mptcp_sk(subflow->conn)->write_seq) - 1;
7279da61 473
9c29e361 474 if (!ext->use_map || !skb->len) {
6d0060f6
MM
475 /* RFC6824 requires a DSS mapping with specific values
476 * if DATA_FIN is set but no data payload is mapped
477 */
6d37a0b8 478 ext->data_fin = 1;
6d0060f6
MM
479 ext->use_map = 1;
480 ext->dsn64 = 1;
017512a0 481 ext->data_seq = data_fin_tx_seq;
6d0060f6
MM
482 ext->subflow_seq = 0;
483 ext->data_len = 1;
7279da61 484 } else if (ext->data_seq + ext->data_len == data_fin_tx_seq) {
6d37a0b8
MM
485 /* If there's an existing DSS mapping and it is the
486 * final mapping, DATA_FIN consumes 1 additional byte of
487 * mapping space.
6d0060f6 488 */
6d37a0b8 489 ext->data_fin = 1;
6d0060f6
MM
490 ext->data_len++;
491 }
492}
493
494static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
495 unsigned int *size,
496 unsigned int remaining,
497 struct mptcp_out_options *opts)
498{
499 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
7279da61 500 struct mptcp_sock *msk = mptcp_sk(subflow->conn);
e3859603 501 u64 snd_data_fin_enable, ack_seq;
6d0060f6
MM
502 unsigned int dss_size = 0;
503 struct mptcp_ext *mpext;
6d0060f6 504 unsigned int ack_size;
d22f4988 505 bool ret = false;
6d0060f6 506
0bac966a 507 mpext = skb ? mptcp_get_ext(skb) : NULL;
e16163b6 508 snd_data_fin_enable = mptcp_data_fin_enabled(msk);
6d0060f6 509
7279da61 510 if (!skb || (mpext && mpext->use_map) || snd_data_fin_enable) {
6d0060f6
MM
511 unsigned int map_size;
512
513 map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64;
514
515 remaining -= map_size;
516 dss_size = map_size;
517 if (mpext)
518 opts->ext_copy = *mpext;
519
7279da61 520 if (skb && snd_data_fin_enable)
9c29e361 521 mptcp_write_data_fin(subflow, skb, &opts->ext_copy);
d22f4988
CP
522 ret = true;
523 }
524
2398e399
PA
525 /* passive sockets msk will set the 'can_ack' after accept(), even
526 * if the first subflow may have the already the remote key handy
527 */
d22f4988 528 opts->ext_copy.use_ack = 0;
dc093db5 529 if (!READ_ONCE(msk->can_ack)) {
d22f4988
CP
530 *size = ALIGN(dss_size, 4);
531 return ret;
6d0060f6
MM
532 }
533
e3859603 534 ack_seq = READ_ONCE(msk->ack_seq);
37198e93 535 if (READ_ONCE(msk->use_64bit_ack)) {
a0c1d0ea 536 ack_size = TCPOLEN_MPTCP_DSS_ACK64;
e3859603 537 opts->ext_copy.data_ack = ack_seq;
a0c1d0ea
CP
538 opts->ext_copy.ack64 = 1;
539 } else {
540 ack_size = TCPOLEN_MPTCP_DSS_ACK32;
e3859603 541 opts->ext_copy.data_ack32 = (uint32_t)ack_seq;
a0c1d0ea
CP
542 opts->ext_copy.ack64 = 0;
543 }
544 opts->ext_copy.use_ack = 1;
ea4ca586 545 WRITE_ONCE(msk->old_wspace, __mptcp_space((struct sock *)msk));
6d0060f6
MM
546
547 /* Add kind/length/subtype/flag overhead if mapping is not populated */
548 if (dss_size == 0)
549 ack_size += TCPOLEN_MPTCP_DSS_BASE;
550
551 dss_size += ack_size;
552
6d0060f6
MM
553 *size = ALIGN(dss_size, 4);
554 return true;
555}
556
3df523ab
PK
557static u64 add_addr_generate_hmac(u64 key1, u64 key2, u8 addr_id,
558 struct in_addr *addr)
559{
bd697222 560 u8 hmac[SHA256_DIGEST_SIZE];
3df523ab
PK
561 u8 msg[7];
562
563 msg[0] = addr_id;
564 memcpy(&msg[1], &addr->s_addr, 4);
565 msg[5] = 0;
566 msg[6] = 0;
567
568 mptcp_crypto_hmac_sha(key1, key2, msg, 7, hmac);
569
bd697222 570 return get_unaligned_be64(&hmac[SHA256_DIGEST_SIZE - sizeof(u64)]);
3df523ab
PK
571}
572
573#if IS_ENABLED(CONFIG_MPTCP_IPV6)
574static u64 add_addr6_generate_hmac(u64 key1, u64 key2, u8 addr_id,
575 struct in6_addr *addr)
576{
bd697222 577 u8 hmac[SHA256_DIGEST_SIZE];
3df523ab
PK
578 u8 msg[19];
579
580 msg[0] = addr_id;
581 memcpy(&msg[1], &addr->s6_addr, 16);
582 msg[17] = 0;
583 msg[18] = 0;
584
585 mptcp_crypto_hmac_sha(key1, key2, msg, 19, hmac);
586
bd697222 587 return get_unaligned_be64(&hmac[SHA256_DIGEST_SIZE - sizeof(u64)]);
3df523ab
PK
588}
589#endif
590
84dfe367 591static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *skb,
f643b803
GT
592 unsigned int *size,
593 unsigned int remaining,
594 struct mptcp_out_options *opts)
3df523ab
PK
595{
596 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
597 struct mptcp_sock *msk = mptcp_sk(subflow->conn);
84dfe367
GT
598 bool drop_other_suboptions = false;
599 unsigned int opt_size = *size;
1b1c7a0e 600 struct mptcp_addr_info saddr;
6a6c05a8 601 bool echo;
4a2777a8 602 bool port;
1b1c7a0e 603 int len;
3df523ab 604
fbe0f87a
GT
605 if ((mptcp_pm_should_add_signal_ipv6(msk) ||
606 mptcp_pm_should_add_signal_port(msk)) &&
84dfe367
GT
607 skb && skb_is_tcp_pure_ack(skb)) {
608 pr_debug("drop other suboptions");
609 opts->suboptions = 0;
3ae32c07
GT
610 opts->ext_copy.use_ack = 0;
611 opts->ext_copy.use_map = 0;
84dfe367
GT
612 remaining += opt_size;
613 drop_other_suboptions = true;
614 }
615
f643b803 616 if (!mptcp_pm_should_add_signal(msk) ||
4a2777a8 617 !(mptcp_pm_add_addr_signal(msk, remaining, &saddr, &echo, &port)))
1b1c7a0e
PK
618 return false;
619
4a2777a8 620 len = mptcp_add_addr_len(saddr.family, echo, port);
1b1c7a0e
PK
621 if (remaining < len)
622 return false;
3df523ab 623
1b1c7a0e 624 *size = len;
84dfe367
GT
625 if (drop_other_suboptions)
626 *size -= opt_size;
1b1c7a0e 627 opts->addr_id = saddr.id;
4a2777a8
GT
628 if (port)
629 opts->port = ntohs(saddr.port);
1b1c7a0e 630 if (saddr.family == AF_INET) {
3df523ab 631 opts->suboptions |= OPTION_MPTCP_ADD_ADDR;
1b1c7a0e 632 opts->addr = saddr.addr;
6a6c05a8
GT
633 if (!echo) {
634 opts->ahmac = add_addr_generate_hmac(msk->local_key,
635 msk->remote_key,
636 opts->addr_id,
637 &opts->addr);
638 }
3df523ab
PK
639 }
640#if IS_ENABLED(CONFIG_MPTCP_IPV6)
1b1c7a0e 641 else if (saddr.family == AF_INET6) {
3df523ab 642 opts->suboptions |= OPTION_MPTCP_ADD_ADDR6;
1b1c7a0e 643 opts->addr6 = saddr.addr6;
6a6c05a8
GT
644 if (!echo) {
645 opts->ahmac = add_addr6_generate_hmac(msk->local_key,
646 msk->remote_key,
647 opts->addr_id,
648 &opts->addr6);
649 }
3df523ab
PK
650 }
651#endif
4a2777a8
GT
652 pr_debug("addr_id=%d, ahmac=%llu, echo=%d, port=%d",
653 opts->addr_id, opts->ahmac, echo, opts->port);
3df523ab
PK
654
655 return true;
656}
657
5cb104ae
GT
658static bool mptcp_established_options_rm_addr(struct sock *sk,
659 unsigned int *size,
660 unsigned int remaining,
661 struct mptcp_out_options *opts)
662{
663 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
664 struct mptcp_sock *msk = mptcp_sk(subflow->conn);
665 u8 rm_id;
666
667 if (!mptcp_pm_should_rm_signal(msk) ||
668 !(mptcp_pm_rm_addr_signal(msk, remaining, &rm_id)))
669 return false;
670
671 if (remaining < TCPOLEN_MPTCP_RM_ADDR_BASE)
672 return false;
673
674 *size = TCPOLEN_MPTCP_RM_ADDR_BASE;
675 opts->suboptions |= OPTION_MPTCP_RM_ADDR;
676 opts->rm_id = rm_id;
677
678 pr_debug("rm_id=%d", opts->rm_id);
679
680 return true;
681}
682
6d0060f6
MM
683bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
684 unsigned int *size, unsigned int remaining,
685 struct mptcp_out_options *opts)
686{
687 unsigned int opt_size = 0;
688 bool ret = false;
689
3df523ab
PK
690 opts->suboptions = 0;
691
e1ff9e82
DC
692 if (unlikely(mptcp_check_fallback(sk)))
693 return false;
694
d5824847
PA
695 /* prevent adding of any MPTCP related options on reset packet
696 * until we support MP_TCPRST/MP_FASTCLOSE
697 */
698 if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST))
699 return false;
700
cc7972ea 701 if (mptcp_established_options_mp(sk, skb, &opt_size, remaining, opts))
6d0060f6
MM
702 ret = true;
703 else if (mptcp_established_options_dss(sk, skb, &opt_size, remaining,
704 opts))
705 ret = true;
706
707 /* we reserved enough space for the above options, and exceeding the
708 * TCP option space would be fatal
709 */
710 if (WARN_ON_ONCE(opt_size > remaining))
711 return false;
712
713 *size += opt_size;
714 remaining -= opt_size;
84dfe367 715 if (mptcp_established_options_add_addr(sk, skb, &opt_size, remaining, opts)) {
3df523ab
PK
716 *size += opt_size;
717 remaining -= opt_size;
718 ret = true;
5cb104ae
GT
719 } else if (mptcp_established_options_rm_addr(sk, &opt_size, remaining, opts)) {
720 *size += opt_size;
721 remaining -= opt_size;
722 ret = true;
3df523ab 723 }
6d0060f6
MM
724
725 return ret;
726}
727
cec37a6e
PK
728bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
729 struct mptcp_out_options *opts)
730{
731 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
732
733 if (subflow_req->mp_capable) {
734 opts->suboptions = OPTION_MPTCP_MPC_SYNACK;
735 opts->sndr_key = subflow_req->local_key;
736 *size = TCPOLEN_MPTCP_MPC_SYNACK;
737 pr_debug("subflow_req=%p, local_key=%llu",
738 subflow_req, subflow_req->local_key);
739 return true;
f296234c
PK
740 } else if (subflow_req->mp_join) {
741 opts->suboptions = OPTION_MPTCP_MPJ_SYNACK;
742 opts->backup = subflow_req->backup;
743 opts->join_id = subflow_req->local_id;
744 opts->thmac = subflow_req->thmac;
745 opts->nonce = subflow_req->local_nonce;
746 pr_debug("req=%p, bkup=%u, id=%u, thmac=%llu, nonce=%u",
747 subflow_req, opts->backup, opts->join_id,
748 opts->thmac, opts->nonce);
749 *size = TCPOLEN_MPTCP_MPJ_SYNACK;
750 return true;
cec37a6e
PK
751 }
752 return false;
753}
754
d5824847 755static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
f296234c 756 struct mptcp_subflow_context *subflow,
0be534f5
PA
757 struct sk_buff *skb,
758 struct mptcp_options_received *mp_opt)
d22f4988
CP
759{
760 /* here we can process OoO, in-window pkts, only in-sequence 4th ack
f296234c 761 * will make the subflow fully established
d22f4988 762 */
f296234c
PK
763 if (likely(subflow->fully_established)) {
764 /* on passive sockets, check for 3rd ack retransmission
765 * note that msk is always set by subflow_syn_recv_sock()
766 * for mp_join subflows
767 */
768 if (TCP_SKB_CB(skb)->seq == subflow->ssn_offset + 1 &&
769 TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq &&
770 subflow->mp_join && mp_opt->mp_join &&
771 READ_ONCE(msk->pm.server_side))
d5824847 772 tcp_send_ack(ssk);
f296234c
PK
773 goto fully_established;
774 }
775
d5824847
PA
776 /* we must process OoO packets before the first subflow is fully
777 * established. OoO packets are instead a protocol violation
778 * for MP_JOIN subflows as the peer must not send any data
779 * before receiving the forth ack - cfr. RFC 8684 section 3.2.
f296234c 780 */
d5824847
PA
781 if (TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1) {
782 if (subflow->mp_join)
783 goto reset;
f296234c 784 return subflow->mp_capable;
d5824847 785 }
d22f4988 786
5a91e32b 787 if (mp_opt->dss && mp_opt->use_ack) {
f296234c
PK
788 /* subflows are fully established as soon as we get any
789 * additional ack.
790 */
0be534f5 791 subflow->fully_established = 1;
b93df08c 792 WRITE_ONCE(msk->fully_established, true);
f296234c
PK
793 goto fully_established;
794 }
d22f4988 795
84dfe367
GT
796 if (mp_opt->add_addr) {
797 WRITE_ONCE(msk->fully_established, true);
798 return true;
799 }
800
d22f4988 801 /* If the first established packet does not contain MP_CAPABLE + data
d5824847
PA
802 * then fallback to TCP. Fallback scenarios requires a reset for
803 * MP_JOIN subflows.
d22f4988
CP
804 */
805 if (!mp_opt->mp_capable) {
d5824847
PA
806 if (subflow->mp_join)
807 goto reset;
d22f4988 808 subflow->mp_capable = 0;
e1ff9e82
DC
809 pr_fallback(msk);
810 __mptcp_do_fallback(msk);
d22f4988
CP
811 return false;
812 }
f296234c 813
d6085fe1
PA
814 if (unlikely(!READ_ONCE(msk->pm.server_side)))
815 pr_warn_once("bogus mpc option on established client sk");
b93df08c 816 mptcp_subflow_fully_established(subflow, mp_opt);
f296234c
PK
817
818fully_established:
5b950ff4
PA
819 /* if the subflow is not already linked into the conn_list, we can't
820 * notify the PM: this subflow is still on the listener queue
821 * and the PM possibly acquiring the subflow lock could race with
822 * the listener close
823 */
824 if (likely(subflow->pm_notified) || list_empty(&subflow->node))
f296234c
PK
825 return true;
826
827 subflow->pm_notified = 1;
ec3edaa7 828 if (subflow->mp_join) {
d5824847 829 clear_3rdack_retransmission(ssk);
f296234c 830 mptcp_pm_subflow_established(msk, subflow);
ec3edaa7 831 } else {
f296234c 832 mptcp_pm_fully_established(msk);
ec3edaa7 833 }
d22f4988 834 return true;
d5824847
PA
835
836reset:
837 mptcp_subflow_reset(ssk);
838 return false;
d22f4988
CP
839}
840
cc9d2566
PA
841static u64 expand_ack(u64 old_ack, u64 cur_ack, bool use_64bit)
842{
843 u32 old_ack32, cur_ack32;
844
845 if (use_64bit)
846 return cur_ack;
847
848 old_ack32 = (u32)old_ack;
849 cur_ack32 = (u32)cur_ack;
850 cur_ack = (old_ack & GENMASK_ULL(63, 32)) + cur_ack32;
851 if (unlikely(before(cur_ack32, old_ack32)))
852 return cur_ack + (1LL << 32);
853 return cur_ack;
854}
855
6f8a612a 856static void ack_update_msk(struct mptcp_sock *msk,
6e628cd3 857 struct sock *ssk,
6f8a612a 858 struct mptcp_options_received *mp_opt)
cc9d2566 859{
7439d687 860 u64 new_wnd_end, new_snd_una, snd_nxt = READ_ONCE(msk->snd_nxt);
6f8a612a 861 struct sock *sk = (struct sock *)msk;
7439d687
PA
862 u64 old_snd_una;
863
864 mptcp_data_lock(sk);
cc9d2566
PA
865
866 /* avoid ack expansion on update conflict, to reduce the risk of
867 * wrongly expanding to a future ack sequence number, which is way
868 * more dangerous than missing an ack
869 */
7439d687 870 old_snd_una = msk->snd_una;
cc9d2566
PA
871 new_snd_una = expand_ack(old_snd_una, mp_opt->data_ack, mp_opt->ack64);
872
873 /* ACK for data not even sent yet? Ignore. */
eaa2ffab 874 if (after64(new_snd_una, snd_nxt))
cc9d2566
PA
875 new_snd_una = old_snd_una;
876
6f8a612a
FW
877 new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd;
878
219d0499 879 if (after64(new_wnd_end, msk->wnd_end))
7439d687 880 msk->wnd_end = new_wnd_end;
219d0499
PA
881
882 /* this assumes mptcp_incoming_options() is invoked after tcp_ack() */
883 if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt)) &&
884 sk_stream_memory_free(ssk))
885 __mptcp_check_push(sk, ssk);
6f8a612a 886
7439d687
PA
887 if (after64(new_snd_una, old_snd_una)) {
888 msk->snd_una = new_snd_una;
889 __mptcp_data_acked(sk);
cc9d2566 890 }
7439d687 891 mptcp_data_unlock(sk);
cc9d2566
PA
892}
893
1a49b2c2 894bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit)
3721b9b6
MM
895{
896 /* Skip if DATA_FIN was already received.
897 * If updating simultaneously with the recvmsg loop, values
898 * should match. If they mismatch, the peer is misbehaving and
899 * we will prefer the most recent information.
900 */
901 if (READ_ONCE(msk->rcv_data_fin) || !READ_ONCE(msk->first))
902 return false;
903
1a49b2c2
MM
904 WRITE_ONCE(msk->rcv_data_fin_seq,
905 expand_ack(READ_ONCE(msk->ack_seq), data_fin_seq, use_64bit));
3721b9b6
MM
906 WRITE_ONCE(msk->rcv_data_fin, 1);
907
908 return true;
909}
910
1b1c7a0e
PK
911static bool add_addr_hmac_valid(struct mptcp_sock *msk,
912 struct mptcp_options_received *mp_opt)
913{
914 u64 hmac = 0;
915
916 if (mp_opt->echo)
917 return true;
918
919 if (mp_opt->family == MPTCP_ADDR_IPVERSION_4)
920 hmac = add_addr_generate_hmac(msk->remote_key,
921 msk->local_key,
922 mp_opt->addr_id, &mp_opt->addr);
923#if IS_ENABLED(CONFIG_MPTCP_IPV6)
924 else
925 hmac = add_addr6_generate_hmac(msk->remote_key,
926 msk->local_key,
927 mp_opt->addr_id, &mp_opt->addr6);
928#endif
929
930 pr_debug("msk=%p, ahmac=%llu, mp_opt->ahmac=%llu\n",
931 msk, (unsigned long long)hmac,
932 (unsigned long long)mp_opt->ahmac);
933
934 return hmac == mp_opt->ahmac;
935}
936
77d0cab9 937void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
648ef4b8 938{
d22f4988 939 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
1b1c7a0e 940 struct mptcp_sock *msk = mptcp_sk(subflow->conn);
cfde141e 941 struct mptcp_options_received mp_opt;
648ef4b8
MM
942 struct mptcp_ext *mpext;
943
6e628cd3
PA
944 if (__mptcp_check_fallback(msk)) {
945 /* Keep it simple and unconditionally trigger send data cleanup and
946 * pending queue spooling. We will need to acquire the data lock
947 * for more accurate checks, and once the lock is acquired, such
948 * helpers are cheap.
949 */
950 mptcp_data_lock(subflow->conn);
219d0499
PA
951 if (sk_stream_memory_free(sk))
952 __mptcp_check_push(subflow->conn, sk);
6e628cd3
PA
953 __mptcp_data_acked(subflow->conn);
954 mptcp_data_unlock(subflow->conn);
e1ff9e82 955 return;
6e628cd3 956 }
e1ff9e82 957
cfde141e
PA
958 mptcp_get_options(skb, &mp_opt);
959 if (!check_fully_established(msk, sk, subflow, skb, &mp_opt))
d22f4988 960 return;
648ef4b8 961
50c504a2
FW
962 if (mp_opt.fastclose &&
963 msk->local_key == mp_opt.rcvr_key) {
964 WRITE_ONCE(msk->rcv_fastclose, true);
965 mptcp_schedule_work((struct sock *)msk);
966 }
967
cfde141e 968 if (mp_opt.add_addr && add_addr_hmac_valid(msk, &mp_opt)) {
1b1c7a0e
PK
969 struct mptcp_addr_info addr;
970
cfde141e
PA
971 addr.port = htons(mp_opt.port);
972 addr.id = mp_opt.addr_id;
973 if (mp_opt.family == MPTCP_ADDR_IPVERSION_4) {
1b1c7a0e 974 addr.family = AF_INET;
cfde141e 975 addr.addr = mp_opt.addr;
1b1c7a0e
PK
976 }
977#if IS_ENABLED(CONFIG_MPTCP_IPV6)
cfde141e 978 else if (mp_opt.family == MPTCP_ADDR_IPVERSION_6) {
1b1c7a0e 979 addr.family = AF_INET6;
cfde141e 980 addr.addr6 = mp_opt.addr6;
1b1c7a0e
PK
981 }
982#endif
a877de06 983 if (!mp_opt.echo) {
1b1c7a0e 984 mptcp_pm_add_addr_received(msk, &addr);
a877de06
GT
985 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR);
986 } else {
00cfd77b 987 mptcp_pm_del_add_timer(msk, &addr);
a877de06
GT
988 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADD);
989 }
cfde141e 990 mp_opt.add_addr = 0;
1b1c7a0e
PK
991 }
992
d0876b22
GT
993 if (mp_opt.rm_addr) {
994 mptcp_pm_rm_addr_received(msk, mp_opt.rm_id);
995 mp_opt.rm_addr = 0;
996 }
997
cfde141e 998 if (!mp_opt.dss)
648ef4b8
MM
999 return;
1000
cc9d2566
PA
1001 /* we can't wait for recvmsg() to update the ack_seq, otherwise
1002 * monodirectional flows will stuck
1003 */
cfde141e 1004 if (mp_opt.use_ack)
6f8a612a 1005 ack_update_msk(msk, sk, &mp_opt);
cc9d2566 1006
06827b34
MM
1007 /* Zero-data-length packets are dropped by the caller and not
1008 * propagated to the MPTCP layer, so the skb extension does not
1009 * need to be allocated or populated. DATA_FIN information, if
1010 * present, needs to be updated here before the skb is freed.
43b54c6e
MM
1011 */
1012 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
1013 if (mp_opt.data_fin && mp_opt.data_len == 1 &&
1a49b2c2 1014 mptcp_update_rcv_data_fin(msk, mp_opt.data_seq, mp_opt.dsn64) &&
43b54c6e
MM
1015 schedule_work(&msk->work))
1016 sock_hold(subflow->conn);
06827b34
MM
1017
1018 return;
43b54c6e
MM
1019 }
1020
648ef4b8
MM
1021 mpext = skb_ext_add(skb, SKB_EXT_MPTCP);
1022 if (!mpext)
1023 return;
1024
1025 memset(mpext, 0, sizeof(*mpext));
1026
cfde141e
PA
1027 if (mp_opt.use_map) {
1028 if (mp_opt.mpc_map) {
cc7972ea
CP
1029 /* this is an MP_CAPABLE carrying MPTCP data
1030 * we know this map the first chunk of data
1031 */
1032 mptcp_crypto_key_sha(subflow->remote_key, NULL,
1033 &mpext->data_seq);
1034 mpext->data_seq++;
1035 mpext->subflow_seq = 1;
1036 mpext->dsn64 = 1;
1037 mpext->mpc_map = 1;
a77895db 1038 mpext->data_fin = 0;
cc7972ea 1039 } else {
cfde141e
PA
1040 mpext->data_seq = mp_opt.data_seq;
1041 mpext->subflow_seq = mp_opt.subflow_seq;
1042 mpext->dsn64 = mp_opt.dsn64;
1043 mpext->data_fin = mp_opt.data_fin;
cc7972ea 1044 }
cfde141e 1045 mpext->data_len = mp_opt.data_len;
648ef4b8 1046 mpext->use_map = 1;
648ef4b8 1047 }
648ef4b8
MM
1048}
1049
fa3fe2b1
FW
1050static void mptcp_set_rwin(const struct tcp_sock *tp)
1051{
1052 const struct sock *ssk = (const struct sock *)tp;
1053 const struct mptcp_subflow_context *subflow;
1054 struct mptcp_sock *msk;
1055 u64 ack_seq;
1056
1057 subflow = mptcp_subflow_ctx(ssk);
1058 msk = mptcp_sk(subflow->conn);
1059
1060 ack_seq = READ_ONCE(msk->ack_seq) + tp->rcv_wnd;
1061
1062 if (after64(ack_seq, READ_ONCE(msk->rcv_wnd_sent)))
1063 WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
1064}
1065
1066void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
1067 struct mptcp_out_options *opts)
eda7acdd 1068{
cc7972ea 1069 if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
eda7acdd
PK
1070 OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
1071 u8 len;
1072
1073 if (OPTION_MPTCP_MPC_SYN & opts->suboptions)
1074 len = TCPOLEN_MPTCP_MPC_SYN;
cec37a6e
PK
1075 else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions)
1076 len = TCPOLEN_MPTCP_MPC_SYNACK;
cc7972ea
CP
1077 else if (opts->ext_copy.data_len)
1078 len = TCPOLEN_MPTCP_MPC_ACK_DATA;
eda7acdd
PK
1079 else
1080 len = TCPOLEN_MPTCP_MPC_ACK;
1081
3df523ab
PK
1082 *ptr++ = mptcp_option(MPTCPOPT_MP_CAPABLE, len,
1083 MPTCP_SUPPORTED_VERSION,
1084 MPTCP_CAP_HMAC_SHA256);
cc7972ea
CP
1085
1086 if (!((OPTION_MPTCP_MPC_SYNACK | OPTION_MPTCP_MPC_ACK) &
1087 opts->suboptions))
1088 goto mp_capable_done;
1089
eda7acdd
PK
1090 put_unaligned_be64(opts->sndr_key, ptr);
1091 ptr += 2;
cc7972ea
CP
1092 if (!((OPTION_MPTCP_MPC_ACK) & opts->suboptions))
1093 goto mp_capable_done;
1094
1095 put_unaligned_be64(opts->rcvr_key, ptr);
1096 ptr += 2;
1097 if (!opts->ext_copy.data_len)
1098 goto mp_capable_done;
1099
1100 put_unaligned_be32(opts->ext_copy.data_len << 16 |
1101 TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
1102 ptr += 1;
eda7acdd 1103 }
6d0060f6 1104
cc7972ea 1105mp_capable_done:
e1ef6832
GT
1106 if ((OPTION_MPTCP_ADD_ADDR
1107#if IS_ENABLED(CONFIG_MPTCP_IPV6)
1108 | OPTION_MPTCP_ADD_ADDR6
1109#endif
1110 ) & opts->suboptions) {
6eb3d1e3
GT
1111 u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE;
1112 u8 echo = MPTCP_ADDR_ECHO;
1113
e1ef6832
GT
1114#if IS_ENABLED(CONFIG_MPTCP_IPV6)
1115 if (OPTION_MPTCP_ADD_ADDR6 & opts->suboptions)
1116 len = TCPOLEN_MPTCP_ADD_ADDR6_BASE;
1117#endif
1118
22fb85ff
GT
1119 if (opts->port)
1120 len += TCPOLEN_MPTCP_PORT_LEN;
1121
3df523ab 1122 if (opts->ahmac) {
6eb3d1e3
GT
1123 len += sizeof(opts->ahmac);
1124 echo = 0;
3df523ab 1125 }
3df523ab 1126
6eb3d1e3
GT
1127 *ptr++ = mptcp_option(MPTCPOPT_ADD_ADDR,
1128 len, echo, opts->addr_id);
e1ef6832
GT
1129 if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) {
1130 memcpy((u8 *)ptr, (u8 *)&opts->addr.s_addr, 4);
1131 ptr += 1;
3df523ab 1132 }
3df523ab 1133#if IS_ENABLED(CONFIG_MPTCP_IPV6)
e1ef6832
GT
1134 else if (OPTION_MPTCP_ADD_ADDR6 & opts->suboptions) {
1135 memcpy((u8 *)ptr, opts->addr6.s6_addr, 16);
1136 ptr += 4;
3df523ab 1137 }
3df523ab
PK
1138#endif
1139
22fb85ff
GT
1140 if (!opts->port) {
1141 if (opts->ahmac) {
1142 put_unaligned_be64(opts->ahmac, ptr);
1143 ptr += 2;
1144 }
1145 } else {
1146 if (opts->ahmac) {
1147 u8 *bptr = (u8 *)ptr;
1148
1149 put_unaligned_be16(opts->port, bptr);
1150 bptr += 2;
1151 put_unaligned_be64(opts->ahmac, bptr);
1152 bptr += 8;
1153 put_unaligned_be16(TCPOPT_NOP << 8 |
1154 TCPOPT_NOP, bptr);
1155
1156 ptr += 3;
1157 } else {
1158 put_unaligned_be32(opts->port << 16 |
1159 TCPOPT_NOP << 8 |
1160 TCPOPT_NOP, ptr);
1161 ptr += 1;
1162 }
3df523ab
PK
1163 }
1164 }
3df523ab
PK
1165
1166 if (OPTION_MPTCP_RM_ADDR & opts->suboptions) {
1167 *ptr++ = mptcp_option(MPTCPOPT_RM_ADDR,
1168 TCPOLEN_MPTCP_RM_ADDR_BASE,
1169 0, opts->rm_id);
1170 }
1171
ec3edaa7
PK
1172 if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) {
1173 *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
1174 TCPOLEN_MPTCP_MPJ_SYN,
1175 opts->backup, opts->join_id);
1176 put_unaligned_be32(opts->token, ptr);
1177 ptr += 1;
1178 put_unaligned_be32(opts->nonce, ptr);
1179 ptr += 1;
1180 }
1181
f296234c
PK
1182 if (OPTION_MPTCP_MPJ_SYNACK & opts->suboptions) {
1183 *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
1184 TCPOLEN_MPTCP_MPJ_SYNACK,
1185 opts->backup, opts->join_id);
1186 put_unaligned_be64(opts->thmac, ptr);
1187 ptr += 2;
1188 put_unaligned_be32(opts->nonce, ptr);
1189 ptr += 1;
1190 }
1191
ec3edaa7
PK
1192 if (OPTION_MPTCP_MPJ_ACK & opts->suboptions) {
1193 *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
1194 TCPOLEN_MPTCP_MPJ_ACK, 0, 0);
1195 memcpy(ptr, opts->hmac, MPTCPOPT_HMAC_LEN);
1196 ptr += 5;
1197 }
1198
6d0060f6
MM
1199 if (opts->ext_copy.use_ack || opts->ext_copy.use_map) {
1200 struct mptcp_ext *mpext = &opts->ext_copy;
1201 u8 len = TCPOLEN_MPTCP_DSS_BASE;
1202 u8 flags = 0;
1203
1204 if (mpext->use_ack) {
a0c1d0ea
CP
1205 flags = MPTCP_DSS_HAS_ACK;
1206 if (mpext->ack64) {
1207 len += TCPOLEN_MPTCP_DSS_ACK64;
1208 flags |= MPTCP_DSS_ACK64;
1209 } else {
1210 len += TCPOLEN_MPTCP_DSS_ACK32;
1211 }
6d0060f6
MM
1212 }
1213
1214 if (mpext->use_map) {
1215 len += TCPOLEN_MPTCP_DSS_MAP64;
1216
1217 /* Use only 64-bit mapping flags for now, add
1218 * support for optional 32-bit mappings later.
1219 */
1220 flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64;
1221 if (mpext->data_fin)
1222 flags |= MPTCP_DSS_DATA_FIN;
1223 }
1224
3df523ab 1225 *ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags);
6d0060f6
MM
1226
1227 if (mpext->use_ack) {
a0c1d0ea
CP
1228 if (mpext->ack64) {
1229 put_unaligned_be64(mpext->data_ack, ptr);
1230 ptr += 2;
1231 } else {
1232 put_unaligned_be32(mpext->data_ack32, ptr);
1233 ptr += 1;
1234 }
6d0060f6
MM
1235 }
1236
1237 if (mpext->use_map) {
1238 put_unaligned_be64(mpext->data_seq, ptr);
1239 ptr += 2;
1240 put_unaligned_be32(mpext->subflow_seq, ptr);
1241 ptr += 1;
1242 put_unaligned_be32(mpext->data_len << 16 |
1243 TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
1244 }
1245 }
fa3fe2b1
FW
1246
1247 if (tp)
1248 mptcp_set_rwin(tp);
eda7acdd 1249}