mptcp: drop unused subflow in mptcp_pm_subflow_established
[linux-block.git] / net / mptcp / options.c
CommitLineData
eda7acdd
PK
1// SPDX-License-Identifier: GPL-2.0
2/* Multipath TCP
3 *
4 * Copyright (c) 2017 - 2019, Intel Corporation.
5 */
6
c85adced
GT
7#define pr_fmt(fmt) "MPTCP: " fmt
8
eda7acdd 9#include <linux/kernel.h>
a24d22b2 10#include <crypto/sha2.h>
eda7acdd
PK
11#include <net/tcp.h>
12#include <net/mptcp.h>
13#include "protocol.h"
a877de06 14#include "mib.h"
eda7acdd 15
65492c5a
PA
16static bool mptcp_cap_flag_sha256(u8 flags)
17{
18 return (flags & MPTCP_CAP_FLAG_MASK) == MPTCP_CAP_HMAC_SHA256;
19}
20
cfde141e
PA
21static void mptcp_parse_option(const struct sk_buff *skb,
22 const unsigned char *ptr, int opsize,
23 struct mptcp_options_received *mp_opt)
eda7acdd 24{
eda7acdd 25 u8 subtype = *ptr >> 4;
648ef4b8 26 int expected_opsize;
eda7acdd
PK
27 u8 version;
28 u8 flags;
5c4a824d 29 u8 i;
eda7acdd
PK
30
31 switch (subtype) {
32 case MPTCPOPT_MP_CAPABLE:
cc7972ea
CP
33 /* strict size checking */
34 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
35 if (skb->len > tcp_hdr(skb)->doff << 2)
36 expected_opsize = TCPOLEN_MPTCP_MPC_ACK_DATA;
37 else
38 expected_opsize = TCPOLEN_MPTCP_MPC_ACK;
39 } else {
40 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)
41 expected_opsize = TCPOLEN_MPTCP_MPC_SYNACK;
42 else
43 expected_opsize = TCPOLEN_MPTCP_MPC_SYN;
44 }
45 if (opsize != expected_opsize)
eda7acdd
PK
46 break;
47
cc7972ea 48 /* try to be gentle vs future versions on the initial syn */
eda7acdd 49 version = *ptr++ & MPTCP_VERSION_MASK;
cc7972ea
CP
50 if (opsize != TCPOLEN_MPTCP_MPC_SYN) {
51 if (version != MPTCP_SUPPORTED_VERSION)
52 break;
53 } else if (version < MPTCP_SUPPORTED_VERSION) {
eda7acdd 54 break;
cc7972ea 55 }
eda7acdd
PK
56
57 flags = *ptr++;
65492c5a 58 if (!mptcp_cap_flag_sha256(flags) ||
eda7acdd
PK
59 (flags & MPTCP_CAP_EXTENSIBILITY))
60 break;
61
62 /* RFC 6824, Section 3.1:
63 * "For the Checksum Required bit (labeled "A"), if either
64 * host requires the use of checksums, checksums MUST be used.
65 * In other words, the only way for checksums not to be used
66 * is if both hosts in their SYNs set A=0."
67 *
68 * Section 3.3.0:
69 * "If a checksum is not present when its use has been
70 * negotiated, the receiver MUST close the subflow with a RST as
71 * it is considered broken."
72 *
73 * We don't implement DSS checksum - fall back to TCP.
74 */
75 if (flags & MPTCP_CAP_CHECKSUM_REQD)
76 break;
77
78 mp_opt->mp_capable = 1;
cc7972ea
CP
79 if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) {
80 mp_opt->sndr_key = get_unaligned_be64(ptr);
81 ptr += 8;
82 }
83 if (opsize >= TCPOLEN_MPTCP_MPC_ACK) {
eda7acdd
PK
84 mp_opt->rcvr_key = get_unaligned_be64(ptr);
85 ptr += 8;
eda7acdd 86 }
cc7972ea
CP
87 if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA) {
88 /* Section 3.1.:
89 * "the data parameters in a MP_CAPABLE are semantically
90 * equivalent to those in a DSS option and can be used
91 * interchangeably."
92 */
93 mp_opt->dss = 1;
94 mp_opt->use_map = 1;
95 mp_opt->mpc_map = 1;
96 mp_opt->data_len = get_unaligned_be16(ptr);
97 ptr += 2;
98 }
99 pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d",
100 version, flags, opsize, mp_opt->sndr_key,
101 mp_opt->rcvr_key, mp_opt->data_len);
eda7acdd
PK
102 break;
103
f296234c
PK
104 case MPTCPOPT_MP_JOIN:
105 mp_opt->mp_join = 1;
106 if (opsize == TCPOLEN_MPTCP_MPJ_SYN) {
107 mp_opt->backup = *ptr++ & MPTCPOPT_BACKUP;
108 mp_opt->join_id = *ptr++;
109 mp_opt->token = get_unaligned_be32(ptr);
110 ptr += 4;
111 mp_opt->nonce = get_unaligned_be32(ptr);
112 ptr += 4;
113 pr_debug("MP_JOIN bkup=%u, id=%u, token=%u, nonce=%u",
114 mp_opt->backup, mp_opt->join_id,
115 mp_opt->token, mp_opt->nonce);
116 } else if (opsize == TCPOLEN_MPTCP_MPJ_SYNACK) {
117 mp_opt->backup = *ptr++ & MPTCPOPT_BACKUP;
118 mp_opt->join_id = *ptr++;
119 mp_opt->thmac = get_unaligned_be64(ptr);
120 ptr += 8;
121 mp_opt->nonce = get_unaligned_be32(ptr);
122 ptr += 4;
123 pr_debug("MP_JOIN bkup=%u, id=%u, thmac=%llu, nonce=%u",
124 mp_opt->backup, mp_opt->join_id,
125 mp_opt->thmac, mp_opt->nonce);
126 } else if (opsize == TCPOLEN_MPTCP_MPJ_ACK) {
127 ptr += 2;
128 memcpy(mp_opt->hmac, ptr, MPTCPOPT_HMAC_LEN);
129 pr_debug("MP_JOIN hmac");
130 } else {
131 pr_warn("MP_JOIN bad option size");
132 mp_opt->mp_join = 0;
133 }
134 break;
135
eda7acdd
PK
136 case MPTCPOPT_DSS:
137 pr_debug("DSS");
648ef4b8
MM
138 ptr++;
139
cc7972ea
CP
140 /* we must clear 'mpc_map' be able to detect MP_CAPABLE
141 * map vs DSS map in mptcp_incoming_options(), and reconstruct
142 * map info accordingly
143 */
144 mp_opt->mpc_map = 0;
648ef4b8
MM
145 flags = (*ptr++) & MPTCP_DSS_FLAG_MASK;
146 mp_opt->data_fin = (flags & MPTCP_DSS_DATA_FIN) != 0;
147 mp_opt->dsn64 = (flags & MPTCP_DSS_DSN64) != 0;
148 mp_opt->use_map = (flags & MPTCP_DSS_HAS_MAP) != 0;
149 mp_opt->ack64 = (flags & MPTCP_DSS_ACK64) != 0;
150 mp_opt->use_ack = (flags & MPTCP_DSS_HAS_ACK);
151
152 pr_debug("data_fin=%d dsn64=%d use_map=%d ack64=%d use_ack=%d",
153 mp_opt->data_fin, mp_opt->dsn64,
154 mp_opt->use_map, mp_opt->ack64,
155 mp_opt->use_ack);
156
157 expected_opsize = TCPOLEN_MPTCP_DSS_BASE;
158
159 if (mp_opt->use_ack) {
160 if (mp_opt->ack64)
161 expected_opsize += TCPOLEN_MPTCP_DSS_ACK64;
162 else
163 expected_opsize += TCPOLEN_MPTCP_DSS_ACK32;
164 }
165
166 if (mp_opt->use_map) {
167 if (mp_opt->dsn64)
168 expected_opsize += TCPOLEN_MPTCP_DSS_MAP64;
169 else
170 expected_opsize += TCPOLEN_MPTCP_DSS_MAP32;
171 }
172
173 /* RFC 6824, Section 3.3:
174 * If a checksum is present, but its use had
175 * not been negotiated in the MP_CAPABLE handshake,
176 * the checksum field MUST be ignored.
177 */
178 if (opsize != expected_opsize &&
179 opsize != expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM)
180 break;
181
eda7acdd 182 mp_opt->dss = 1;
648ef4b8
MM
183
184 if (mp_opt->use_ack) {
185 if (mp_opt->ack64) {
186 mp_opt->data_ack = get_unaligned_be64(ptr);
187 ptr += 8;
188 } else {
189 mp_opt->data_ack = get_unaligned_be32(ptr);
190 ptr += 4;
191 }
192
193 pr_debug("data_ack=%llu", mp_opt->data_ack);
194 }
195
196 if (mp_opt->use_map) {
197 if (mp_opt->dsn64) {
198 mp_opt->data_seq = get_unaligned_be64(ptr);
199 ptr += 8;
200 } else {
201 mp_opt->data_seq = get_unaligned_be32(ptr);
202 ptr += 4;
203 }
204
205 mp_opt->subflow_seq = get_unaligned_be32(ptr);
206 ptr += 4;
207
208 mp_opt->data_len = get_unaligned_be16(ptr);
209 ptr += 2;
210
211 pr_debug("data_seq=%llu subflow_seq=%u data_len=%u",
212 mp_opt->data_seq, mp_opt->subflow_seq,
213 mp_opt->data_len);
214 }
215
eda7acdd
PK
216 break;
217
3df523ab
PK
218 case MPTCPOPT_ADD_ADDR:
219 mp_opt->echo = (*ptr++) & MPTCP_ADDR_ECHO;
220 if (!mp_opt->echo) {
221 if (opsize == TCPOLEN_MPTCP_ADD_ADDR ||
222 opsize == TCPOLEN_MPTCP_ADD_ADDR_PORT)
223 mp_opt->family = MPTCP_ADDR_IPVERSION_4;
224#if IS_ENABLED(CONFIG_MPTCP_IPV6)
225 else if (opsize == TCPOLEN_MPTCP_ADD_ADDR6 ||
226 opsize == TCPOLEN_MPTCP_ADD_ADDR6_PORT)
227 mp_opt->family = MPTCP_ADDR_IPVERSION_6;
228#endif
229 else
230 break;
231 } else {
232 if (opsize == TCPOLEN_MPTCP_ADD_ADDR_BASE ||
233 opsize == TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT)
234 mp_opt->family = MPTCP_ADDR_IPVERSION_4;
235#if IS_ENABLED(CONFIG_MPTCP_IPV6)
236 else if (opsize == TCPOLEN_MPTCP_ADD_ADDR6_BASE ||
237 opsize == TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT)
238 mp_opt->family = MPTCP_ADDR_IPVERSION_6;
239#endif
240 else
241 break;
242 }
243
244 mp_opt->add_addr = 1;
3df523ab 245 mp_opt->addr_id = *ptr++;
3df523ab
PK
246 if (mp_opt->family == MPTCP_ADDR_IPVERSION_4) {
247 memcpy((u8 *)&mp_opt->addr.s_addr, (u8 *)ptr, 4);
248 ptr += 4;
249 if (opsize == TCPOLEN_MPTCP_ADD_ADDR_PORT ||
250 opsize == TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT) {
251 mp_opt->port = get_unaligned_be16(ptr);
252 ptr += 2;
253 }
254 }
255#if IS_ENABLED(CONFIG_MPTCP_IPV6)
256 else {
257 memcpy(mp_opt->addr6.s6_addr, (u8 *)ptr, 16);
258 ptr += 16;
259 if (opsize == TCPOLEN_MPTCP_ADD_ADDR6_PORT ||
260 opsize == TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT) {
261 mp_opt->port = get_unaligned_be16(ptr);
262 ptr += 2;
263 }
264 }
265#endif
266 if (!mp_opt->echo) {
267 mp_opt->ahmac = get_unaligned_be64(ptr);
268 ptr += 8;
269 }
90a4aea8
GT
270 pr_debug("ADD_ADDR%s: id=%d, ahmac=%llu, echo=%d, port=%d",
271 (mp_opt->family == MPTCP_ADDR_IPVERSION_6) ? "6" : "",
272 mp_opt->addr_id, mp_opt->ahmac, mp_opt->echo, mp_opt->port);
3df523ab
PK
273 break;
274
275 case MPTCPOPT_RM_ADDR:
5c4a824d
GT
276 if (opsize < TCPOLEN_MPTCP_RM_ADDR_BASE + 1 ||
277 opsize > TCPOLEN_MPTCP_RM_ADDR_BASE + MPTCP_RM_IDS_MAX)
3df523ab
PK
278 break;
279
8e60eed6
GT
280 ptr++;
281
3df523ab 282 mp_opt->rm_addr = 1;
5c4a824d
GT
283 mp_opt->rm_list.nr = opsize - TCPOLEN_MPTCP_RM_ADDR_BASE;
284 for (i = 0; i < mp_opt->rm_list.nr; i++)
285 mp_opt->rm_list.ids[i] = *ptr++;
286 pr_debug("RM_ADDR: rm_list_nr=%d", mp_opt->rm_list.nr);
3df523ab
PK
287 break;
288
40453a5c
GT
289 case MPTCPOPT_MP_PRIO:
290 if (opsize != TCPOLEN_MPTCP_PRIO)
291 break;
292
293 mp_opt->mp_prio = 1;
294 mp_opt->backup = *ptr++ & MPTCP_PRIO_BKUP;
295 pr_debug("MP_PRIO: prio=%d", mp_opt->backup);
296 break;
297
50c504a2
FW
298 case MPTCPOPT_MP_FASTCLOSE:
299 if (opsize != TCPOLEN_MPTCP_FASTCLOSE)
300 break;
301
302 ptr += 2;
303 mp_opt->rcvr_key = get_unaligned_be64(ptr);
304 ptr += 8;
305 mp_opt->fastclose = 1;
306 break;
307
eda7acdd
PK
308 default:
309 break;
310 }
311}
312
cec37a6e 313void mptcp_get_options(const struct sk_buff *skb,
cfde141e 314 struct mptcp_options_received *mp_opt)
cec37a6e 315{
cec37a6e 316 const struct tcphdr *th = tcp_hdr(skb);
cfde141e
PA
317 const unsigned char *ptr;
318 int length;
cec37a6e 319
cfde141e
PA
320 /* initialize option status */
321 mp_opt->mp_capable = 0;
322 mp_opt->mp_join = 0;
323 mp_opt->add_addr = 0;
fe2d9b1a 324 mp_opt->ahmac = 0;
50c504a2 325 mp_opt->fastclose = 0;
65b8c8a6 326 mp_opt->port = 0;
cfde141e
PA
327 mp_opt->rm_addr = 0;
328 mp_opt->dss = 0;
40453a5c 329 mp_opt->mp_prio = 0;
cfde141e
PA
330
331 length = (th->doff * 4) - sizeof(struct tcphdr);
cec37a6e
PK
332 ptr = (const unsigned char *)(th + 1);
333
334 while (length > 0) {
335 int opcode = *ptr++;
336 int opsize;
337
338 switch (opcode) {
339 case TCPOPT_EOL:
340 return;
341 case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
342 length--;
343 continue;
344 default:
345 opsize = *ptr++;
346 if (opsize < 2) /* "silly options" */
347 return;
348 if (opsize > length)
349 return; /* don't parse partial options */
350 if (opcode == TCPOPT_MPTCP)
cfde141e 351 mptcp_parse_option(skb, ptr, opsize, mp_opt);
cec37a6e
PK
352 ptr += opsize - 2;
353 length -= opsize;
354 }
355 }
356}
357
cc7972ea
CP
358bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
359 unsigned int *size, struct mptcp_out_options *opts)
cec37a6e
PK
360{
361 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
362
cc7972ea
CP
363 /* we will use snd_isn to detect first pkt [re]transmission
364 * in mptcp_established_options_mp()
365 */
366 subflow->snd_isn = TCP_SKB_CB(skb)->end_seq;
cec37a6e 367 if (subflow->request_mptcp) {
cec37a6e 368 opts->suboptions = OPTION_MPTCP_MPC_SYN;
cec37a6e
PK
369 *size = TCPOLEN_MPTCP_MPC_SYN;
370 return true;
ec3edaa7
PK
371 } else if (subflow->request_join) {
372 pr_debug("remote_token=%u, nonce=%u", subflow->remote_token,
373 subflow->local_nonce);
374 opts->suboptions = OPTION_MPTCP_MPJ_SYN;
375 opts->join_id = subflow->local_id;
376 opts->token = subflow->remote_token;
377 opts->nonce = subflow->local_nonce;
378 opts->backup = subflow->request_bkup;
379 *size = TCPOLEN_MPTCP_MPJ_SYN;
380 return true;
cec37a6e
PK
381 }
382 return false;
383}
384
ec3edaa7
PK
385/* MP_JOIN client subflow must wait for 4th ack before sending any data:
386 * TCP can't schedule delack timer before the subflow is fully established.
387 * MPTCP uses the delack timer to do 3rd ack retransmissions
388 */
389static void schedule_3rdack_retransmission(struct sock *sk)
390{
391 struct inet_connection_sock *icsk = inet_csk(sk);
392 struct tcp_sock *tp = tcp_sk(sk);
393 unsigned long timeout;
394
395 /* reschedule with a timeout above RTT, as we must look only for drop */
396 if (tp->srtt_us)
397 timeout = tp->srtt_us << 1;
398 else
399 timeout = TCP_TIMEOUT_INIT;
400
401 WARN_ON_ONCE(icsk->icsk_ack.pending & ICSK_ACK_TIMER);
402 icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
403 icsk->icsk_ack.timeout = timeout;
404 sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
405}
406
407static void clear_3rdack_retransmission(struct sock *sk)
408{
409 struct inet_connection_sock *icsk = inet_csk(sk);
410
411 sk_stop_timer(sk, &icsk->icsk_delack_timer);
412 icsk->icsk_ack.timeout = 0;
413 icsk->icsk_ack.ato = 0;
414 icsk->icsk_ack.pending &= ~(ICSK_ACK_SCHED | ICSK_ACK_TIMER);
415}
416
cc7972ea 417static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
d87903b6 418 bool snd_data_fin_enable,
cc7972ea 419 unsigned int *size,
6d0060f6
MM
420 unsigned int remaining,
421 struct mptcp_out_options *opts)
cec37a6e
PK
422{
423 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
cc7972ea
CP
424 struct mptcp_ext *mpext;
425 unsigned int data_len;
426
ec3edaa7
PK
427 /* When skb is not available, we better over-estimate the emitted
428 * options len. A full DSS option (28 bytes) is longer than
429 * TCPOLEN_MPTCP_MPC_ACK_DATA(22) or TCPOLEN_MPTCP_MPJ_ACK(24), so
430 * tell the caller to defer the estimate to
431 * mptcp_established_options_dss(), which will reserve enough space.
432 */
433 if (!skb)
434 return false;
cc7972ea 435
d87903b6
PA
436 /* MPC/MPJ needed only on 3rd ack packet, DATA_FIN and TCP shutdown take precedence */
437 if (subflow->fully_established || snd_data_fin_enable ||
438 subflow->snd_isn != TCP_SKB_CB(skb)->seq ||
439 sk->sk_state != TCP_ESTABLISHED)
ec3edaa7
PK
440 return false;
441
442 if (subflow->mp_capable) {
cc7972ea
CP
443 mpext = mptcp_get_ext(skb);
444 data_len = mpext ? mpext->data_len : 0;
cec37a6e 445
cc7972ea
CP
446 /* we will check ext_copy.data_len in mptcp_write_options() to
447 * discriminate between TCPOLEN_MPTCP_MPC_ACK_DATA and
448 * TCPOLEN_MPTCP_MPC_ACK
449 */
450 opts->ext_copy.data_len = data_len;
cec37a6e
PK
451 opts->suboptions = OPTION_MPTCP_MPC_ACK;
452 opts->sndr_key = subflow->local_key;
453 opts->rcvr_key = subflow->remote_key;
cc7972ea
CP
454
455 /* Section 3.1.
456 * The MP_CAPABLE option is carried on the SYN, SYN/ACK, and ACK
457 * packets that start the first subflow of an MPTCP connection,
458 * as well as the first packet that carries data
459 */
460 if (data_len > 0)
461 *size = ALIGN(TCPOLEN_MPTCP_MPC_ACK_DATA, 4);
462 else
463 *size = TCPOLEN_MPTCP_MPC_ACK;
464
465 pr_debug("subflow=%p, local_key=%llu, remote_key=%llu map_len=%d",
466 subflow, subflow->local_key, subflow->remote_key,
467 data_len);
468
cec37a6e 469 return true;
ec3edaa7
PK
470 } else if (subflow->mp_join) {
471 opts->suboptions = OPTION_MPTCP_MPJ_ACK;
472 memcpy(opts->hmac, subflow->hmac, MPTCPOPT_HMAC_LEN);
473 *size = TCPOLEN_MPTCP_MPJ_ACK;
474 pr_debug("subflow=%p", subflow);
475
476 schedule_3rdack_retransmission(sk);
477 return true;
cec37a6e
PK
478 }
479 return false;
480}
481
6d0060f6 482static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow,
9c29e361 483 struct sk_buff *skb, struct mptcp_ext *ext)
6d0060f6 484{
017512a0
PA
485 /* The write_seq value has already been incremented, so the actual
486 * sequence number for the DATA_FIN is one less.
487 */
488 u64 data_fin_tx_seq = READ_ONCE(mptcp_sk(subflow->conn)->write_seq) - 1;
7279da61 489
9c29e361 490 if (!ext->use_map || !skb->len) {
6d0060f6
MM
491 /* RFC6824 requires a DSS mapping with specific values
492 * if DATA_FIN is set but no data payload is mapped
493 */
6d37a0b8 494 ext->data_fin = 1;
6d0060f6
MM
495 ext->use_map = 1;
496 ext->dsn64 = 1;
017512a0 497 ext->data_seq = data_fin_tx_seq;
6d0060f6
MM
498 ext->subflow_seq = 0;
499 ext->data_len = 1;
7279da61 500 } else if (ext->data_seq + ext->data_len == data_fin_tx_seq) {
6d37a0b8
MM
501 /* If there's an existing DSS mapping and it is the
502 * final mapping, DATA_FIN consumes 1 additional byte of
503 * mapping space.
6d0060f6 504 */
6d37a0b8 505 ext->data_fin = 1;
6d0060f6
MM
506 ext->data_len++;
507 }
508}
509
510static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
d87903b6 511 bool snd_data_fin_enable,
6d0060f6
MM
512 unsigned int *size,
513 unsigned int remaining,
514 struct mptcp_out_options *opts)
515{
516 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
7279da61 517 struct mptcp_sock *msk = mptcp_sk(subflow->conn);
6d0060f6
MM
518 unsigned int dss_size = 0;
519 struct mptcp_ext *mpext;
6d0060f6 520 unsigned int ack_size;
d22f4988 521 bool ret = false;
d87903b6 522 u64 ack_seq;
6d0060f6 523
0bac966a 524 mpext = skb ? mptcp_get_ext(skb) : NULL;
6d0060f6 525
7279da61 526 if (!skb || (mpext && mpext->use_map) || snd_data_fin_enable) {
6d0060f6
MM
527 unsigned int map_size;
528
529 map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64;
530
531 remaining -= map_size;
532 dss_size = map_size;
533 if (mpext)
534 opts->ext_copy = *mpext;
535
7279da61 536 if (skb && snd_data_fin_enable)
9c29e361 537 mptcp_write_data_fin(subflow, skb, &opts->ext_copy);
d22f4988
CP
538 ret = true;
539 }
540
2398e399
PA
541 /* passive sockets msk will set the 'can_ack' after accept(), even
542 * if the first subflow may have the already the remote key handy
543 */
d22f4988 544 opts->ext_copy.use_ack = 0;
dc093db5 545 if (!READ_ONCE(msk->can_ack)) {
d22f4988
CP
546 *size = ALIGN(dss_size, 4);
547 return ret;
6d0060f6
MM
548 }
549
e3859603 550 ack_seq = READ_ONCE(msk->ack_seq);
37198e93 551 if (READ_ONCE(msk->use_64bit_ack)) {
a0c1d0ea 552 ack_size = TCPOLEN_MPTCP_DSS_ACK64;
e3859603 553 opts->ext_copy.data_ack = ack_seq;
a0c1d0ea
CP
554 opts->ext_copy.ack64 = 1;
555 } else {
556 ack_size = TCPOLEN_MPTCP_DSS_ACK32;
e3859603 557 opts->ext_copy.data_ack32 = (uint32_t)ack_seq;
a0c1d0ea
CP
558 opts->ext_copy.ack64 = 0;
559 }
560 opts->ext_copy.use_ack = 1;
ea4ca586 561 WRITE_ONCE(msk->old_wspace, __mptcp_space((struct sock *)msk));
6d0060f6
MM
562
563 /* Add kind/length/subtype/flag overhead if mapping is not populated */
564 if (dss_size == 0)
565 ack_size += TCPOLEN_MPTCP_DSS_BASE;
566
567 dss_size += ack_size;
568
6d0060f6
MM
569 *size = ALIGN(dss_size, 4);
570 return true;
571}
572
3df523ab 573static u64 add_addr_generate_hmac(u64 key1, u64 key2, u8 addr_id,
13832ae2 574 struct in_addr *addr, u16 port)
3df523ab 575{
bd697222 576 u8 hmac[SHA256_DIGEST_SIZE];
3df523ab
PK
577 u8 msg[7];
578
579 msg[0] = addr_id;
580 memcpy(&msg[1], &addr->s_addr, 4);
13832ae2
DC
581 msg[5] = port >> 8;
582 msg[6] = port & 0xFF;
3df523ab
PK
583
584 mptcp_crypto_hmac_sha(key1, key2, msg, 7, hmac);
585
bd697222 586 return get_unaligned_be64(&hmac[SHA256_DIGEST_SIZE - sizeof(u64)]);
3df523ab
PK
587}
588
589#if IS_ENABLED(CONFIG_MPTCP_IPV6)
590static u64 add_addr6_generate_hmac(u64 key1, u64 key2, u8 addr_id,
13832ae2 591 struct in6_addr *addr, u16 port)
3df523ab 592{
bd697222 593 u8 hmac[SHA256_DIGEST_SIZE];
3df523ab
PK
594 u8 msg[19];
595
596 msg[0] = addr_id;
597 memcpy(&msg[1], &addr->s6_addr, 16);
13832ae2
DC
598 msg[17] = port >> 8;
599 msg[18] = port & 0xFF;
3df523ab
PK
600
601 mptcp_crypto_hmac_sha(key1, key2, msg, 19, hmac);
602
bd697222 603 return get_unaligned_be64(&hmac[SHA256_DIGEST_SIZE - sizeof(u64)]);
3df523ab
PK
604}
605#endif
606
84dfe367 607static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *skb,
f643b803
GT
608 unsigned int *size,
609 unsigned int remaining,
610 struct mptcp_out_options *opts)
3df523ab
PK
611{
612 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
613 struct mptcp_sock *msk = mptcp_sk(subflow->conn);
84dfe367
GT
614 bool drop_other_suboptions = false;
615 unsigned int opt_size = *size;
1b1c7a0e 616 struct mptcp_addr_info saddr;
6a6c05a8 617 bool echo;
4a2777a8 618 bool port;
1b1c7a0e 619 int len;
3df523ab 620
fbe0f87a
GT
621 if ((mptcp_pm_should_add_signal_ipv6(msk) ||
622 mptcp_pm_should_add_signal_port(msk)) &&
84dfe367
GT
623 skb && skb_is_tcp_pure_ack(skb)) {
624 pr_debug("drop other suboptions");
625 opts->suboptions = 0;
3ae32c07
GT
626 opts->ext_copy.use_ack = 0;
627 opts->ext_copy.use_map = 0;
84dfe367
GT
628 remaining += opt_size;
629 drop_other_suboptions = true;
630 }
631
f643b803 632 if (!mptcp_pm_should_add_signal(msk) ||
4a2777a8 633 !(mptcp_pm_add_addr_signal(msk, remaining, &saddr, &echo, &port)))
1b1c7a0e
PK
634 return false;
635
4a2777a8 636 len = mptcp_add_addr_len(saddr.family, echo, port);
1b1c7a0e
PK
637 if (remaining < len)
638 return false;
3df523ab 639
1b1c7a0e 640 *size = len;
84dfe367
GT
641 if (drop_other_suboptions)
642 *size -= opt_size;
1b1c7a0e 643 opts->addr_id = saddr.id;
4a2777a8
GT
644 if (port)
645 opts->port = ntohs(saddr.port);
1b1c7a0e 646 if (saddr.family == AF_INET) {
3df523ab 647 opts->suboptions |= OPTION_MPTCP_ADD_ADDR;
1b1c7a0e 648 opts->addr = saddr.addr;
6a6c05a8
GT
649 if (!echo) {
650 opts->ahmac = add_addr_generate_hmac(msk->local_key,
651 msk->remote_key,
652 opts->addr_id,
13832ae2
DC
653 &opts->addr,
654 opts->port);
6a6c05a8 655 }
3df523ab
PK
656 }
657#if IS_ENABLED(CONFIG_MPTCP_IPV6)
1b1c7a0e 658 else if (saddr.family == AF_INET6) {
3df523ab 659 opts->suboptions |= OPTION_MPTCP_ADD_ADDR6;
1b1c7a0e 660 opts->addr6 = saddr.addr6;
6a6c05a8
GT
661 if (!echo) {
662 opts->ahmac = add_addr6_generate_hmac(msk->local_key,
663 msk->remote_key,
664 opts->addr_id,
13832ae2
DC
665 &opts->addr6,
666 opts->port);
6a6c05a8 667 }
3df523ab
PK
668 }
669#endif
4a2777a8
GT
670 pr_debug("addr_id=%d, ahmac=%llu, echo=%d, port=%d",
671 opts->addr_id, opts->ahmac, echo, opts->port);
3df523ab
PK
672
673 return true;
674}
675
5cb104ae
GT
676static bool mptcp_established_options_rm_addr(struct sock *sk,
677 unsigned int *size,
678 unsigned int remaining,
679 struct mptcp_out_options *opts)
680{
681 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
682 struct mptcp_sock *msk = mptcp_sk(subflow->conn);
6445e17a
GT
683 struct mptcp_rm_list rm_list;
684 int i, len;
5cb104ae
GT
685
686 if (!mptcp_pm_should_rm_signal(msk) ||
6445e17a 687 !(mptcp_pm_rm_addr_signal(msk, remaining, &rm_list)))
5cb104ae
GT
688 return false;
689
6445e17a
GT
690 len = mptcp_rm_addr_len(&rm_list);
691 if (len < 0)
692 return false;
693 if (remaining < len)
5cb104ae
GT
694 return false;
695
6445e17a 696 *size = len;
5cb104ae 697 opts->suboptions |= OPTION_MPTCP_RM_ADDR;
6445e17a 698 opts->rm_list = rm_list;
5cb104ae 699
6445e17a
GT
700 for (i = 0; i < opts->rm_list.nr; i++)
701 pr_debug("rm_list_ids[%d]=%d", i, opts->rm_list.ids[i]);
5cb104ae
GT
702
703 return true;
704}
705
06706542
GT
706static bool mptcp_established_options_mp_prio(struct sock *sk,
707 unsigned int *size,
708 unsigned int remaining,
709 struct mptcp_out_options *opts)
710{
711 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
712
713 if (!subflow->send_mp_prio)
714 return false;
715
ec99a470
DC
716 /* account for the trailing 'nop' option */
717 if (remaining < TCPOLEN_MPTCP_PRIO_ALIGN)
06706542
GT
718 return false;
719
ec99a470 720 *size = TCPOLEN_MPTCP_PRIO_ALIGN;
06706542
GT
721 opts->suboptions |= OPTION_MPTCP_PRIO;
722 opts->backup = subflow->request_bkup;
723
724 pr_debug("prio=%d", opts->backup);
725
726 return true;
727}
728
6d0060f6
MM
729bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
730 unsigned int *size, unsigned int remaining,
731 struct mptcp_out_options *opts)
732{
d87903b6
PA
733 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
734 struct mptcp_sock *msk = mptcp_sk(subflow->conn);
6d0060f6 735 unsigned int opt_size = 0;
d87903b6 736 bool snd_data_fin;
6d0060f6
MM
737 bool ret = false;
738
3df523ab
PK
739 opts->suboptions = 0;
740
d87903b6 741 if (unlikely(__mptcp_check_fallback(msk)))
e1ff9e82
DC
742 return false;
743
d5824847
PA
744 /* prevent adding of any MPTCP related options on reset packet
745 * until we support MP_TCPRST/MP_FASTCLOSE
746 */
747 if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST))
748 return false;
749
d87903b6
PA
750 snd_data_fin = mptcp_data_fin_enabled(msk);
751 if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, remaining, opts))
6d0060f6 752 ret = true;
d87903b6 753 else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts))
6d0060f6
MM
754 ret = true;
755
756 /* we reserved enough space for the above options, and exceeding the
757 * TCP option space would be fatal
758 */
759 if (WARN_ON_ONCE(opt_size > remaining))
760 return false;
761
762 *size += opt_size;
763 remaining -= opt_size;
84dfe367 764 if (mptcp_established_options_add_addr(sk, skb, &opt_size, remaining, opts)) {
3df523ab
PK
765 *size += opt_size;
766 remaining -= opt_size;
767 ret = true;
5cb104ae
GT
768 } else if (mptcp_established_options_rm_addr(sk, &opt_size, remaining, opts)) {
769 *size += opt_size;
770 remaining -= opt_size;
771 ret = true;
3df523ab 772 }
6d0060f6 773
06706542
GT
774 if (mptcp_established_options_mp_prio(sk, &opt_size, remaining, opts)) {
775 *size += opt_size;
776 remaining -= opt_size;
777 ret = true;
778 }
779
6d0060f6
MM
780 return ret;
781}
782
cec37a6e
PK
783bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
784 struct mptcp_out_options *opts)
785{
786 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
787
788 if (subflow_req->mp_capable) {
789 opts->suboptions = OPTION_MPTCP_MPC_SYNACK;
790 opts->sndr_key = subflow_req->local_key;
791 *size = TCPOLEN_MPTCP_MPC_SYNACK;
792 pr_debug("subflow_req=%p, local_key=%llu",
793 subflow_req, subflow_req->local_key);
794 return true;
f296234c
PK
795 } else if (subflow_req->mp_join) {
796 opts->suboptions = OPTION_MPTCP_MPJ_SYNACK;
797 opts->backup = subflow_req->backup;
798 opts->join_id = subflow_req->local_id;
799 opts->thmac = subflow_req->thmac;
800 opts->nonce = subflow_req->local_nonce;
801 pr_debug("req=%p, bkup=%u, id=%u, thmac=%llu, nonce=%u",
802 subflow_req, opts->backup, opts->join_id,
803 opts->thmac, opts->nonce);
804 *size = TCPOLEN_MPTCP_MPJ_SYNACK;
805 return true;
cec37a6e
PK
806 }
807 return false;
808}
809
d5824847 810static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
f296234c 811 struct mptcp_subflow_context *subflow,
0be534f5
PA
812 struct sk_buff *skb,
813 struct mptcp_options_received *mp_opt)
d22f4988
CP
814{
815 /* here we can process OoO, in-window pkts, only in-sequence 4th ack
f296234c 816 * will make the subflow fully established
d22f4988 817 */
f296234c
PK
818 if (likely(subflow->fully_established)) {
819 /* on passive sockets, check for 3rd ack retransmission
820 * note that msk is always set by subflow_syn_recv_sock()
821 * for mp_join subflows
822 */
823 if (TCP_SKB_CB(skb)->seq == subflow->ssn_offset + 1 &&
824 TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq &&
825 subflow->mp_join && mp_opt->mp_join &&
826 READ_ONCE(msk->pm.server_side))
d5824847 827 tcp_send_ack(ssk);
f296234c
PK
828 goto fully_established;
829 }
830
d5824847
PA
831 /* we must process OoO packets before the first subflow is fully
832 * established. OoO packets are instead a protocol violation
833 * for MP_JOIN subflows as the peer must not send any data
834 * before receiving the forth ack - cfr. RFC 8684 section 3.2.
f296234c 835 */
d5824847
PA
836 if (TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1) {
837 if (subflow->mp_join)
838 goto reset;
f296234c 839 return subflow->mp_capable;
d5824847 840 }
d22f4988 841
5a91e32b 842 if (mp_opt->dss && mp_opt->use_ack) {
f296234c
PK
843 /* subflows are fully established as soon as we get any
844 * additional ack.
845 */
0be534f5 846 subflow->fully_established = 1;
b93df08c 847 WRITE_ONCE(msk->fully_established, true);
f296234c
PK
848 goto fully_established;
849 }
d22f4988 850
84dfe367
GT
851 if (mp_opt->add_addr) {
852 WRITE_ONCE(msk->fully_established, true);
853 return true;
854 }
855
d22f4988 856 /* If the first established packet does not contain MP_CAPABLE + data
d5824847
PA
857 * then fallback to TCP. Fallback scenarios requires a reset for
858 * MP_JOIN subflows.
d22f4988
CP
859 */
860 if (!mp_opt->mp_capable) {
d5824847
PA
861 if (subflow->mp_join)
862 goto reset;
d22f4988 863 subflow->mp_capable = 0;
e1ff9e82
DC
864 pr_fallback(msk);
865 __mptcp_do_fallback(msk);
d22f4988
CP
866 return false;
867 }
f296234c 868
d6085fe1
PA
869 if (unlikely(!READ_ONCE(msk->pm.server_side)))
870 pr_warn_once("bogus mpc option on established client sk");
b93df08c 871 mptcp_subflow_fully_established(subflow, mp_opt);
f296234c
PK
872
873fully_established:
5b950ff4
PA
874 /* if the subflow is not already linked into the conn_list, we can't
875 * notify the PM: this subflow is still on the listener queue
876 * and the PM possibly acquiring the subflow lock could race with
877 * the listener close
878 */
879 if (likely(subflow->pm_notified) || list_empty(&subflow->node))
f296234c
PK
880 return true;
881
882 subflow->pm_notified = 1;
ec3edaa7 883 if (subflow->mp_join) {
d5824847 884 clear_3rdack_retransmission(ssk);
62535200 885 mptcp_pm_subflow_established(msk);
ec3edaa7 886 } else {
6c714f1b 887 mptcp_pm_fully_established(msk, ssk, GFP_ATOMIC);
ec3edaa7 888 }
d22f4988 889 return true;
d5824847
PA
890
891reset:
892 mptcp_subflow_reset(ssk);
893 return false;
d22f4988
CP
894}
895
cc9d2566
PA
896static u64 expand_ack(u64 old_ack, u64 cur_ack, bool use_64bit)
897{
898 u32 old_ack32, cur_ack32;
899
900 if (use_64bit)
901 return cur_ack;
902
903 old_ack32 = (u32)old_ack;
904 cur_ack32 = (u32)cur_ack;
905 cur_ack = (old_ack & GENMASK_ULL(63, 32)) + cur_ack32;
906 if (unlikely(before(cur_ack32, old_ack32)))
907 return cur_ack + (1LL << 32);
908 return cur_ack;
909}
910
6f8a612a 911static void ack_update_msk(struct mptcp_sock *msk,
6e628cd3 912 struct sock *ssk,
6f8a612a 913 struct mptcp_options_received *mp_opt)
cc9d2566 914{
7439d687 915 u64 new_wnd_end, new_snd_una, snd_nxt = READ_ONCE(msk->snd_nxt);
6f8a612a 916 struct sock *sk = (struct sock *)msk;
7439d687
PA
917 u64 old_snd_una;
918
919 mptcp_data_lock(sk);
cc9d2566
PA
920
921 /* avoid ack expansion on update conflict, to reduce the risk of
922 * wrongly expanding to a future ack sequence number, which is way
923 * more dangerous than missing an ack
924 */
7439d687 925 old_snd_una = msk->snd_una;
cc9d2566
PA
926 new_snd_una = expand_ack(old_snd_una, mp_opt->data_ack, mp_opt->ack64);
927
928 /* ACK for data not even sent yet? Ignore. */
eaa2ffab 929 if (after64(new_snd_una, snd_nxt))
cc9d2566
PA
930 new_snd_una = old_snd_una;
931
6f8a612a
FW
932 new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd;
933
219d0499 934 if (after64(new_wnd_end, msk->wnd_end))
7439d687 935 msk->wnd_end = new_wnd_end;
219d0499
PA
936
937 /* this assumes mptcp_incoming_options() is invoked after tcp_ack() */
d09d818e 938 if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt)))
219d0499 939 __mptcp_check_push(sk, ssk);
6f8a612a 940
7439d687
PA
941 if (after64(new_snd_una, old_snd_una)) {
942 msk->snd_una = new_snd_una;
943 __mptcp_data_acked(sk);
cc9d2566 944 }
7439d687 945 mptcp_data_unlock(sk);
cc9d2566
PA
946}
947
1a49b2c2 948bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit)
3721b9b6
MM
949{
950 /* Skip if DATA_FIN was already received.
951 * If updating simultaneously with the recvmsg loop, values
952 * should match. If they mismatch, the peer is misbehaving and
953 * we will prefer the most recent information.
954 */
955 if (READ_ONCE(msk->rcv_data_fin) || !READ_ONCE(msk->first))
956 return false;
957
1a49b2c2
MM
958 WRITE_ONCE(msk->rcv_data_fin_seq,
959 expand_ack(READ_ONCE(msk->ack_seq), data_fin_seq, use_64bit));
3721b9b6
MM
960 WRITE_ONCE(msk->rcv_data_fin, 1);
961
962 return true;
963}
964
1b1c7a0e
PK
965static bool add_addr_hmac_valid(struct mptcp_sock *msk,
966 struct mptcp_options_received *mp_opt)
967{
968 u64 hmac = 0;
969
970 if (mp_opt->echo)
971 return true;
972
973 if (mp_opt->family == MPTCP_ADDR_IPVERSION_4)
974 hmac = add_addr_generate_hmac(msk->remote_key,
975 msk->local_key,
13832ae2
DC
976 mp_opt->addr_id, &mp_opt->addr,
977 mp_opt->port);
1b1c7a0e
PK
978#if IS_ENABLED(CONFIG_MPTCP_IPV6)
979 else
980 hmac = add_addr6_generate_hmac(msk->remote_key,
981 msk->local_key,
13832ae2
DC
982 mp_opt->addr_id, &mp_opt->addr6,
983 mp_opt->port);
1b1c7a0e
PK
984#endif
985
986 pr_debug("msk=%p, ahmac=%llu, mp_opt->ahmac=%llu\n",
987 msk, (unsigned long long)hmac,
988 (unsigned long long)mp_opt->ahmac);
989
990 return hmac == mp_opt->ahmac;
991}
992
77d0cab9 993void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
648ef4b8 994{
d22f4988 995 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
1b1c7a0e 996 struct mptcp_sock *msk = mptcp_sk(subflow->conn);
cfde141e 997 struct mptcp_options_received mp_opt;
648ef4b8
MM
998 struct mptcp_ext *mpext;
999
6e628cd3
PA
1000 if (__mptcp_check_fallback(msk)) {
1001 /* Keep it simple and unconditionally trigger send data cleanup and
1002 * pending queue spooling. We will need to acquire the data lock
1003 * for more accurate checks, and once the lock is acquired, such
1004 * helpers are cheap.
1005 */
1006 mptcp_data_lock(subflow->conn);
219d0499
PA
1007 if (sk_stream_memory_free(sk))
1008 __mptcp_check_push(subflow->conn, sk);
6e628cd3
PA
1009 __mptcp_data_acked(subflow->conn);
1010 mptcp_data_unlock(subflow->conn);
e1ff9e82 1011 return;
6e628cd3 1012 }
e1ff9e82 1013
cfde141e
PA
1014 mptcp_get_options(skb, &mp_opt);
1015 if (!check_fully_established(msk, sk, subflow, skb, &mp_opt))
d22f4988 1016 return;
648ef4b8 1017
50c504a2
FW
1018 if (mp_opt.fastclose &&
1019 msk->local_key == mp_opt.rcvr_key) {
1020 WRITE_ONCE(msk->rcv_fastclose, true);
1021 mptcp_schedule_work((struct sock *)msk);
1022 }
1023
cfde141e 1024 if (mp_opt.add_addr && add_addr_hmac_valid(msk, &mp_opt)) {
1b1c7a0e
PK
1025 struct mptcp_addr_info addr;
1026
cfde141e
PA
1027 addr.port = htons(mp_opt.port);
1028 addr.id = mp_opt.addr_id;
1029 if (mp_opt.family == MPTCP_ADDR_IPVERSION_4) {
1b1c7a0e 1030 addr.family = AF_INET;
cfde141e 1031 addr.addr = mp_opt.addr;
1b1c7a0e
PK
1032 }
1033#if IS_ENABLED(CONFIG_MPTCP_IPV6)
cfde141e 1034 else if (mp_opt.family == MPTCP_ADDR_IPVERSION_6) {
1b1c7a0e 1035 addr.family = AF_INET6;
cfde141e 1036 addr.addr6 = mp_opt.addr6;
1b1c7a0e
PK
1037 }
1038#endif
a877de06 1039 if (!mp_opt.echo) {
1b1c7a0e 1040 mptcp_pm_add_addr_received(msk, &addr);
a877de06
GT
1041 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR);
1042 } else {
00cfd77b 1043 mptcp_pm_del_add_timer(msk, &addr);
a877de06
GT
1044 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADD);
1045 }
2fbdd9ea
GT
1046
1047 if (mp_opt.port)
1048 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_PORTADD);
1049
cfde141e 1050 mp_opt.add_addr = 0;
1b1c7a0e
PK
1051 }
1052
d0876b22 1053 if (mp_opt.rm_addr) {
5c4a824d 1054 mptcp_pm_rm_addr_received(msk, &mp_opt.rm_list);
d0876b22
GT
1055 mp_opt.rm_addr = 0;
1056 }
1057
40453a5c
GT
1058 if (mp_opt.mp_prio) {
1059 mptcp_pm_mp_prio_received(sk, mp_opt.backup);
0be2ac28 1060 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPPRIORX);
40453a5c
GT
1061 mp_opt.mp_prio = 0;
1062 }
1063
cfde141e 1064 if (!mp_opt.dss)
648ef4b8
MM
1065 return;
1066
cc9d2566
PA
1067 /* we can't wait for recvmsg() to update the ack_seq, otherwise
1068 * monodirectional flows will stuck
1069 */
cfde141e 1070 if (mp_opt.use_ack)
6f8a612a 1071 ack_update_msk(msk, sk, &mp_opt);
cc9d2566 1072
06827b34
MM
1073 /* Zero-data-length packets are dropped by the caller and not
1074 * propagated to the MPTCP layer, so the skb extension does not
1075 * need to be allocated or populated. DATA_FIN information, if
1076 * present, needs to be updated here before the skb is freed.
43b54c6e
MM
1077 */
1078 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
1079 if (mp_opt.data_fin && mp_opt.data_len == 1 &&
1a49b2c2 1080 mptcp_update_rcv_data_fin(msk, mp_opt.data_seq, mp_opt.dsn64) &&
43b54c6e
MM
1081 schedule_work(&msk->work))
1082 sock_hold(subflow->conn);
06827b34
MM
1083
1084 return;
43b54c6e
MM
1085 }
1086
648ef4b8
MM
1087 mpext = skb_ext_add(skb, SKB_EXT_MPTCP);
1088 if (!mpext)
1089 return;
1090
1091 memset(mpext, 0, sizeof(*mpext));
1092
cfde141e
PA
1093 if (mp_opt.use_map) {
1094 if (mp_opt.mpc_map) {
cc7972ea
CP
1095 /* this is an MP_CAPABLE carrying MPTCP data
1096 * we know this map the first chunk of data
1097 */
1098 mptcp_crypto_key_sha(subflow->remote_key, NULL,
1099 &mpext->data_seq);
1100 mpext->data_seq++;
1101 mpext->subflow_seq = 1;
1102 mpext->dsn64 = 1;
1103 mpext->mpc_map = 1;
a77895db 1104 mpext->data_fin = 0;
cc7972ea 1105 } else {
cfde141e
PA
1106 mpext->data_seq = mp_opt.data_seq;
1107 mpext->subflow_seq = mp_opt.subflow_seq;
1108 mpext->dsn64 = mp_opt.dsn64;
1109 mpext->data_fin = mp_opt.data_fin;
cc7972ea 1110 }
cfde141e 1111 mpext->data_len = mp_opt.data_len;
648ef4b8 1112 mpext->use_map = 1;
648ef4b8 1113 }
648ef4b8
MM
1114}
1115
fa3fe2b1
FW
1116static void mptcp_set_rwin(const struct tcp_sock *tp)
1117{
1118 const struct sock *ssk = (const struct sock *)tp;
1119 const struct mptcp_subflow_context *subflow;
1120 struct mptcp_sock *msk;
1121 u64 ack_seq;
1122
1123 subflow = mptcp_subflow_ctx(ssk);
1124 msk = mptcp_sk(subflow->conn);
1125
1126 ack_seq = READ_ONCE(msk->ack_seq) + tp->rcv_wnd;
1127
1128 if (after64(ack_seq, READ_ONCE(msk->rcv_wnd_sent)))
1129 WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
1130}
1131
1132void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
1133 struct mptcp_out_options *opts)
eda7acdd 1134{
cc7972ea 1135 if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
eda7acdd
PK
1136 OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
1137 u8 len;
1138
1139 if (OPTION_MPTCP_MPC_SYN & opts->suboptions)
1140 len = TCPOLEN_MPTCP_MPC_SYN;
cec37a6e
PK
1141 else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions)
1142 len = TCPOLEN_MPTCP_MPC_SYNACK;
cc7972ea
CP
1143 else if (opts->ext_copy.data_len)
1144 len = TCPOLEN_MPTCP_MPC_ACK_DATA;
eda7acdd
PK
1145 else
1146 len = TCPOLEN_MPTCP_MPC_ACK;
1147
3df523ab
PK
1148 *ptr++ = mptcp_option(MPTCPOPT_MP_CAPABLE, len,
1149 MPTCP_SUPPORTED_VERSION,
1150 MPTCP_CAP_HMAC_SHA256);
cc7972ea
CP
1151
1152 if (!((OPTION_MPTCP_MPC_SYNACK | OPTION_MPTCP_MPC_ACK) &
1153 opts->suboptions))
1154 goto mp_capable_done;
1155
eda7acdd
PK
1156 put_unaligned_be64(opts->sndr_key, ptr);
1157 ptr += 2;
cc7972ea
CP
1158 if (!((OPTION_MPTCP_MPC_ACK) & opts->suboptions))
1159 goto mp_capable_done;
1160
1161 put_unaligned_be64(opts->rcvr_key, ptr);
1162 ptr += 2;
1163 if (!opts->ext_copy.data_len)
1164 goto mp_capable_done;
1165
1166 put_unaligned_be32(opts->ext_copy.data_len << 16 |
1167 TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
1168 ptr += 1;
eda7acdd 1169 }
6d0060f6 1170
cc7972ea 1171mp_capable_done:
e1ef6832
GT
1172 if ((OPTION_MPTCP_ADD_ADDR
1173#if IS_ENABLED(CONFIG_MPTCP_IPV6)
1174 | OPTION_MPTCP_ADD_ADDR6
1175#endif
1176 ) & opts->suboptions) {
6eb3d1e3
GT
1177 u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE;
1178 u8 echo = MPTCP_ADDR_ECHO;
1179
e1ef6832
GT
1180#if IS_ENABLED(CONFIG_MPTCP_IPV6)
1181 if (OPTION_MPTCP_ADD_ADDR6 & opts->suboptions)
1182 len = TCPOLEN_MPTCP_ADD_ADDR6_BASE;
1183#endif
1184
22fb85ff
GT
1185 if (opts->port)
1186 len += TCPOLEN_MPTCP_PORT_LEN;
1187
3df523ab 1188 if (opts->ahmac) {
6eb3d1e3
GT
1189 len += sizeof(opts->ahmac);
1190 echo = 0;
3df523ab 1191 }
3df523ab 1192
6eb3d1e3
GT
1193 *ptr++ = mptcp_option(MPTCPOPT_ADD_ADDR,
1194 len, echo, opts->addr_id);
e1ef6832
GT
1195 if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) {
1196 memcpy((u8 *)ptr, (u8 *)&opts->addr.s_addr, 4);
1197 ptr += 1;
3df523ab 1198 }
3df523ab 1199#if IS_ENABLED(CONFIG_MPTCP_IPV6)
e1ef6832
GT
1200 else if (OPTION_MPTCP_ADD_ADDR6 & opts->suboptions) {
1201 memcpy((u8 *)ptr, opts->addr6.s6_addr, 16);
1202 ptr += 4;
3df523ab 1203 }
3df523ab
PK
1204#endif
1205
22fb85ff
GT
1206 if (!opts->port) {
1207 if (opts->ahmac) {
1208 put_unaligned_be64(opts->ahmac, ptr);
1209 ptr += 2;
1210 }
1211 } else {
1212 if (opts->ahmac) {
1213 u8 *bptr = (u8 *)ptr;
1214
1215 put_unaligned_be16(opts->port, bptr);
1216 bptr += 2;
1217 put_unaligned_be64(opts->ahmac, bptr);
1218 bptr += 8;
1219 put_unaligned_be16(TCPOPT_NOP << 8 |
1220 TCPOPT_NOP, bptr);
1221
1222 ptr += 3;
1223 } else {
1224 put_unaligned_be32(opts->port << 16 |
1225 TCPOPT_NOP << 8 |
1226 TCPOPT_NOP, ptr);
1227 ptr += 1;
1228 }
3df523ab
PK
1229 }
1230 }
3df523ab
PK
1231
1232 if (OPTION_MPTCP_RM_ADDR & opts->suboptions) {
6445e17a
GT
1233 u8 i = 1;
1234
3df523ab 1235 *ptr++ = mptcp_option(MPTCPOPT_RM_ADDR,
6445e17a
GT
1236 TCPOLEN_MPTCP_RM_ADDR_BASE + opts->rm_list.nr,
1237 0, opts->rm_list.ids[0]);
1238
1239 while (i < opts->rm_list.nr) {
1240 u8 id1, id2, id3, id4;
1241
1242 id1 = opts->rm_list.ids[i];
1243 id2 = i + 1 < opts->rm_list.nr ? opts->rm_list.ids[i + 1] : TCPOPT_NOP;
1244 id3 = i + 2 < opts->rm_list.nr ? opts->rm_list.ids[i + 2] : TCPOPT_NOP;
1245 id4 = i + 3 < opts->rm_list.nr ? opts->rm_list.ids[i + 3] : TCPOPT_NOP;
1246 put_unaligned_be32(id1 << 24 | id2 << 16 | id3 << 8 | id4, ptr);
1247 ptr += 1;
1248 i += 4;
1249 }
3df523ab
PK
1250 }
1251
06706542
GT
1252 if (OPTION_MPTCP_PRIO & opts->suboptions) {
1253 const struct sock *ssk = (const struct sock *)tp;
1254 struct mptcp_subflow_context *subflow;
1255
1256 subflow = mptcp_subflow_ctx(ssk);
1257 subflow->send_mp_prio = 0;
1258
1259 *ptr++ = mptcp_option(MPTCPOPT_MP_PRIO,
1260 TCPOLEN_MPTCP_PRIO,
1261 opts->backup, TCPOPT_NOP);
1262 }
1263
ec3edaa7
PK
1264 if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) {
1265 *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
1266 TCPOLEN_MPTCP_MPJ_SYN,
1267 opts->backup, opts->join_id);
1268 put_unaligned_be32(opts->token, ptr);
1269 ptr += 1;
1270 put_unaligned_be32(opts->nonce, ptr);
1271 ptr += 1;
1272 }
1273
f296234c
PK
1274 if (OPTION_MPTCP_MPJ_SYNACK & opts->suboptions) {
1275 *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
1276 TCPOLEN_MPTCP_MPJ_SYNACK,
1277 opts->backup, opts->join_id);
1278 put_unaligned_be64(opts->thmac, ptr);
1279 ptr += 2;
1280 put_unaligned_be32(opts->nonce, ptr);
1281 ptr += 1;
1282 }
1283
ec3edaa7
PK
1284 if (OPTION_MPTCP_MPJ_ACK & opts->suboptions) {
1285 *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
1286 TCPOLEN_MPTCP_MPJ_ACK, 0, 0);
1287 memcpy(ptr, opts->hmac, MPTCPOPT_HMAC_LEN);
1288 ptr += 5;
1289 }
1290
6d0060f6
MM
1291 if (opts->ext_copy.use_ack || opts->ext_copy.use_map) {
1292 struct mptcp_ext *mpext = &opts->ext_copy;
1293 u8 len = TCPOLEN_MPTCP_DSS_BASE;
1294 u8 flags = 0;
1295
1296 if (mpext->use_ack) {
a0c1d0ea
CP
1297 flags = MPTCP_DSS_HAS_ACK;
1298 if (mpext->ack64) {
1299 len += TCPOLEN_MPTCP_DSS_ACK64;
1300 flags |= MPTCP_DSS_ACK64;
1301 } else {
1302 len += TCPOLEN_MPTCP_DSS_ACK32;
1303 }
6d0060f6
MM
1304 }
1305
1306 if (mpext->use_map) {
1307 len += TCPOLEN_MPTCP_DSS_MAP64;
1308
1309 /* Use only 64-bit mapping flags for now, add
1310 * support for optional 32-bit mappings later.
1311 */
1312 flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64;
1313 if (mpext->data_fin)
1314 flags |= MPTCP_DSS_DATA_FIN;
1315 }
1316
3df523ab 1317 *ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags);
6d0060f6
MM
1318
1319 if (mpext->use_ack) {
a0c1d0ea
CP
1320 if (mpext->ack64) {
1321 put_unaligned_be64(mpext->data_ack, ptr);
1322 ptr += 2;
1323 } else {
1324 put_unaligned_be32(mpext->data_ack32, ptr);
1325 ptr += 1;
1326 }
6d0060f6
MM
1327 }
1328
1329 if (mpext->use_map) {
1330 put_unaligned_be64(mpext->data_seq, ptr);
1331 ptr += 2;
1332 put_unaligned_be32(mpext->subflow_seq, ptr);
1333 ptr += 1;
1334 put_unaligned_be32(mpext->data_len << 16 |
1335 TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
1336 }
1337 }
fa3fe2b1
FW
1338
1339 if (tp)
1340 mptcp_set_rwin(tp);
eda7acdd 1341}