mptcp: parse and emit MP_CAPABLE option according to v1 spec
[linux-2.6-block.git] / net / mptcp / options.c
CommitLineData
eda7acdd
PK
1// SPDX-License-Identifier: GPL-2.0
2/* Multipath TCP
3 *
4 * Copyright (c) 2017 - 2019, Intel Corporation.
5 */
6
7#include <linux/kernel.h>
8#include <net/tcp.h>
9#include <net/mptcp.h>
10#include "protocol.h"
11
65492c5a
PA
12static bool mptcp_cap_flag_sha256(u8 flags)
13{
14 return (flags & MPTCP_CAP_FLAG_MASK) == MPTCP_CAP_HMAC_SHA256;
15}
16
cc7972ea
CP
17void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr,
18 int opsize, struct tcp_options_received *opt_rx)
eda7acdd
PK
19{
20 struct mptcp_options_received *mp_opt = &opt_rx->mptcp;
21 u8 subtype = *ptr >> 4;
648ef4b8 22 int expected_opsize;
eda7acdd
PK
23 u8 version;
24 u8 flags;
25
26 switch (subtype) {
27 case MPTCPOPT_MP_CAPABLE:
cc7972ea
CP
28 /* strict size checking */
29 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
30 if (skb->len > tcp_hdr(skb)->doff << 2)
31 expected_opsize = TCPOLEN_MPTCP_MPC_ACK_DATA;
32 else
33 expected_opsize = TCPOLEN_MPTCP_MPC_ACK;
34 } else {
35 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)
36 expected_opsize = TCPOLEN_MPTCP_MPC_SYNACK;
37 else
38 expected_opsize = TCPOLEN_MPTCP_MPC_SYN;
39 }
40 if (opsize != expected_opsize)
eda7acdd
PK
41 break;
42
cc7972ea 43 /* try to be gentle vs future versions on the initial syn */
eda7acdd 44 version = *ptr++ & MPTCP_VERSION_MASK;
cc7972ea
CP
45 if (opsize != TCPOLEN_MPTCP_MPC_SYN) {
46 if (version != MPTCP_SUPPORTED_VERSION)
47 break;
48 } else if (version < MPTCP_SUPPORTED_VERSION) {
eda7acdd 49 break;
cc7972ea 50 }
eda7acdd
PK
51
52 flags = *ptr++;
65492c5a 53 if (!mptcp_cap_flag_sha256(flags) ||
eda7acdd
PK
54 (flags & MPTCP_CAP_EXTENSIBILITY))
55 break;
56
57 /* RFC 6824, Section 3.1:
58 * "For the Checksum Required bit (labeled "A"), if either
59 * host requires the use of checksums, checksums MUST be used.
60 * In other words, the only way for checksums not to be used
61 * is if both hosts in their SYNs set A=0."
62 *
63 * Section 3.3.0:
64 * "If a checksum is not present when its use has been
65 * negotiated, the receiver MUST close the subflow with a RST as
66 * it is considered broken."
67 *
68 * We don't implement DSS checksum - fall back to TCP.
69 */
70 if (flags & MPTCP_CAP_CHECKSUM_REQD)
71 break;
72
73 mp_opt->mp_capable = 1;
cc7972ea
CP
74 if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) {
75 mp_opt->sndr_key = get_unaligned_be64(ptr);
76 ptr += 8;
77 }
78 if (opsize >= TCPOLEN_MPTCP_MPC_ACK) {
eda7acdd
PK
79 mp_opt->rcvr_key = get_unaligned_be64(ptr);
80 ptr += 8;
eda7acdd 81 }
cc7972ea
CP
82 if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA) {
83 /* Section 3.1.:
84 * "the data parameters in a MP_CAPABLE are semantically
85 * equivalent to those in a DSS option and can be used
86 * interchangeably."
87 */
88 mp_opt->dss = 1;
89 mp_opt->use_map = 1;
90 mp_opt->mpc_map = 1;
91 mp_opt->data_len = get_unaligned_be16(ptr);
92 ptr += 2;
93 }
94 pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d",
95 version, flags, opsize, mp_opt->sndr_key,
96 mp_opt->rcvr_key, mp_opt->data_len);
eda7acdd
PK
97 break;
98
99 case MPTCPOPT_DSS:
100 pr_debug("DSS");
648ef4b8
MM
101 ptr++;
102
cc7972ea
CP
103 /* we must clear 'mpc_map' be able to detect MP_CAPABLE
104 * map vs DSS map in mptcp_incoming_options(), and reconstruct
105 * map info accordingly
106 */
107 mp_opt->mpc_map = 0;
648ef4b8
MM
108 flags = (*ptr++) & MPTCP_DSS_FLAG_MASK;
109 mp_opt->data_fin = (flags & MPTCP_DSS_DATA_FIN) != 0;
110 mp_opt->dsn64 = (flags & MPTCP_DSS_DSN64) != 0;
111 mp_opt->use_map = (flags & MPTCP_DSS_HAS_MAP) != 0;
112 mp_opt->ack64 = (flags & MPTCP_DSS_ACK64) != 0;
113 mp_opt->use_ack = (flags & MPTCP_DSS_HAS_ACK);
114
115 pr_debug("data_fin=%d dsn64=%d use_map=%d ack64=%d use_ack=%d",
116 mp_opt->data_fin, mp_opt->dsn64,
117 mp_opt->use_map, mp_opt->ack64,
118 mp_opt->use_ack);
119
120 expected_opsize = TCPOLEN_MPTCP_DSS_BASE;
121
122 if (mp_opt->use_ack) {
123 if (mp_opt->ack64)
124 expected_opsize += TCPOLEN_MPTCP_DSS_ACK64;
125 else
126 expected_opsize += TCPOLEN_MPTCP_DSS_ACK32;
127 }
128
129 if (mp_opt->use_map) {
130 if (mp_opt->dsn64)
131 expected_opsize += TCPOLEN_MPTCP_DSS_MAP64;
132 else
133 expected_opsize += TCPOLEN_MPTCP_DSS_MAP32;
134 }
135
136 /* RFC 6824, Section 3.3:
137 * If a checksum is present, but its use had
138 * not been negotiated in the MP_CAPABLE handshake,
139 * the checksum field MUST be ignored.
140 */
141 if (opsize != expected_opsize &&
142 opsize != expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM)
143 break;
144
eda7acdd 145 mp_opt->dss = 1;
648ef4b8
MM
146
147 if (mp_opt->use_ack) {
148 if (mp_opt->ack64) {
149 mp_opt->data_ack = get_unaligned_be64(ptr);
150 ptr += 8;
151 } else {
152 mp_opt->data_ack = get_unaligned_be32(ptr);
153 ptr += 4;
154 }
155
156 pr_debug("data_ack=%llu", mp_opt->data_ack);
157 }
158
159 if (mp_opt->use_map) {
160 if (mp_opt->dsn64) {
161 mp_opt->data_seq = get_unaligned_be64(ptr);
162 ptr += 8;
163 } else {
164 mp_opt->data_seq = get_unaligned_be32(ptr);
165 ptr += 4;
166 }
167
168 mp_opt->subflow_seq = get_unaligned_be32(ptr);
169 ptr += 4;
170
171 mp_opt->data_len = get_unaligned_be16(ptr);
172 ptr += 2;
173
174 pr_debug("data_seq=%llu subflow_seq=%u data_len=%u",
175 mp_opt->data_seq, mp_opt->subflow_seq,
176 mp_opt->data_len);
177 }
178
eda7acdd
PK
179 break;
180
181 default:
182 break;
183 }
184}
185
cec37a6e
PK
186void mptcp_get_options(const struct sk_buff *skb,
187 struct tcp_options_received *opt_rx)
188{
189 const unsigned char *ptr;
190 const struct tcphdr *th = tcp_hdr(skb);
191 int length = (th->doff * 4) - sizeof(struct tcphdr);
192
193 ptr = (const unsigned char *)(th + 1);
194
195 while (length > 0) {
196 int opcode = *ptr++;
197 int opsize;
198
199 switch (opcode) {
200 case TCPOPT_EOL:
201 return;
202 case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
203 length--;
204 continue;
205 default:
206 opsize = *ptr++;
207 if (opsize < 2) /* "silly options" */
208 return;
209 if (opsize > length)
210 return; /* don't parse partial options */
211 if (opcode == TCPOPT_MPTCP)
cc7972ea 212 mptcp_parse_option(skb, ptr, opsize, opt_rx);
cec37a6e
PK
213 ptr += opsize - 2;
214 length -= opsize;
215 }
216 }
217}
218
cc7972ea
CP
219bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
220 unsigned int *size, struct mptcp_out_options *opts)
cec37a6e
PK
221{
222 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
223
cc7972ea
CP
224 /* we will use snd_isn to detect first pkt [re]transmission
225 * in mptcp_established_options_mp()
226 */
227 subflow->snd_isn = TCP_SKB_CB(skb)->end_seq;
cec37a6e
PK
228 if (subflow->request_mptcp) {
229 pr_debug("local_key=%llu", subflow->local_key);
230 opts->suboptions = OPTION_MPTCP_MPC_SYN;
231 opts->sndr_key = subflow->local_key;
232 *size = TCPOLEN_MPTCP_MPC_SYN;
233 return true;
234 }
235 return false;
236}
237
238void mptcp_rcv_synsent(struct sock *sk)
239{
240 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
241 struct tcp_sock *tp = tcp_sk(sk);
242
243 pr_debug("subflow=%p", subflow);
244 if (subflow->request_mptcp && tp->rx_opt.mptcp.mp_capable) {
245 subflow->mp_capable = 1;
246 subflow->remote_key = tp->rx_opt.mptcp.sndr_key;
247 } else {
248 tcp_sk(sk)->is_mptcp = 0;
249 }
250}
251
cc7972ea
CP
252static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
253 unsigned int *size,
6d0060f6
MM
254 unsigned int remaining,
255 struct mptcp_out_options *opts)
cec37a6e
PK
256{
257 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
cc7972ea
CP
258 struct mptcp_ext *mpext;
259 unsigned int data_len;
260
261 pr_debug("subflow=%p fourth_ack=%d seq=%x:%x remaining=%d", subflow,
262 subflow->fourth_ack, subflow->snd_isn,
263 skb ? TCP_SKB_CB(skb)->seq : 0, remaining);
264
265 if (subflow->mp_capable && !subflow->fourth_ack && skb &&
266 subflow->snd_isn == TCP_SKB_CB(skb)->seq) {
267 /* When skb is not available, we better over-estimate the
268 * emitted options len. A full DSS option is longer than
269 * TCPOLEN_MPTCP_MPC_ACK_DATA, so let's the caller try to fit
270 * that.
271 */
272 mpext = mptcp_get_ext(skb);
273 data_len = mpext ? mpext->data_len : 0;
cec37a6e 274
cc7972ea
CP
275 /* we will check ext_copy.data_len in mptcp_write_options() to
276 * discriminate between TCPOLEN_MPTCP_MPC_ACK_DATA and
277 * TCPOLEN_MPTCP_MPC_ACK
278 */
279 opts->ext_copy.data_len = data_len;
cec37a6e
PK
280 opts->suboptions = OPTION_MPTCP_MPC_ACK;
281 opts->sndr_key = subflow->local_key;
282 opts->rcvr_key = subflow->remote_key;
cc7972ea
CP
283
284 /* Section 3.1.
285 * The MP_CAPABLE option is carried on the SYN, SYN/ACK, and ACK
286 * packets that start the first subflow of an MPTCP connection,
287 * as well as the first packet that carries data
288 */
289 if (data_len > 0)
290 *size = ALIGN(TCPOLEN_MPTCP_MPC_ACK_DATA, 4);
291 else
292 *size = TCPOLEN_MPTCP_MPC_ACK;
293
294 pr_debug("subflow=%p, local_key=%llu, remote_key=%llu map_len=%d",
295 subflow, subflow->local_key, subflow->remote_key,
296 data_len);
297
cec37a6e
PK
298 return true;
299 }
300 return false;
301}
302
6d0060f6
MM
303static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow,
304 struct mptcp_ext *ext)
305{
306 ext->data_fin = 1;
307
308 if (!ext->use_map) {
309 /* RFC6824 requires a DSS mapping with specific values
310 * if DATA_FIN is set but no data payload is mapped
311 */
312 ext->use_map = 1;
313 ext->dsn64 = 1;
314 ext->data_seq = mptcp_sk(subflow->conn)->write_seq;
315 ext->subflow_seq = 0;
316 ext->data_len = 1;
317 } else {
318 /* If there's an existing DSS mapping, DATA_FIN consumes
319 * 1 additional byte of mapping space.
320 */
321 ext->data_len++;
322 }
323}
324
325static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
326 unsigned int *size,
327 unsigned int remaining,
328 struct mptcp_out_options *opts)
329{
330 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
331 unsigned int dss_size = 0;
332 struct mptcp_ext *mpext;
333 struct mptcp_sock *msk;
334 unsigned int ack_size;
335 u8 tcp_fin;
336
337 if (skb) {
338 mpext = mptcp_get_ext(skb);
339 tcp_fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
340 } else {
341 mpext = NULL;
342 tcp_fin = 0;
343 }
344
345 if (!skb || (mpext && mpext->use_map) || tcp_fin) {
346 unsigned int map_size;
347
348 map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64;
349
350 remaining -= map_size;
351 dss_size = map_size;
352 if (mpext)
353 opts->ext_copy = *mpext;
354
355 if (skb && tcp_fin &&
356 subflow->conn->sk_state != TCP_ESTABLISHED)
357 mptcp_write_data_fin(subflow, &opts->ext_copy);
358 }
359
360 ack_size = TCPOLEN_MPTCP_DSS_ACK64;
361
362 /* Add kind/length/subtype/flag overhead if mapping is not populated */
363 if (dss_size == 0)
364 ack_size += TCPOLEN_MPTCP_DSS_BASE;
365
366 dss_size += ack_size;
367
368 msk = mptcp_sk(mptcp_subflow_ctx(sk)->conn);
369 if (msk) {
370 opts->ext_copy.data_ack = msk->ack_seq;
371 } else {
372 mptcp_crypto_key_sha(mptcp_subflow_ctx(sk)->remote_key,
373 NULL, &opts->ext_copy.data_ack);
374 opts->ext_copy.data_ack++;
375 }
376
377 opts->ext_copy.ack64 = 1;
378 opts->ext_copy.use_ack = 1;
379
380 *size = ALIGN(dss_size, 4);
381 return true;
382}
383
384bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
385 unsigned int *size, unsigned int remaining,
386 struct mptcp_out_options *opts)
387{
388 unsigned int opt_size = 0;
389 bool ret = false;
390
cc7972ea 391 if (mptcp_established_options_mp(sk, skb, &opt_size, remaining, opts))
6d0060f6
MM
392 ret = true;
393 else if (mptcp_established_options_dss(sk, skb, &opt_size, remaining,
394 opts))
395 ret = true;
396
397 /* we reserved enough space for the above options, and exceeding the
398 * TCP option space would be fatal
399 */
400 if (WARN_ON_ONCE(opt_size > remaining))
401 return false;
402
403 *size += opt_size;
404 remaining -= opt_size;
405
406 return ret;
407}
408
cec37a6e
PK
409bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
410 struct mptcp_out_options *opts)
411{
412 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
413
414 if (subflow_req->mp_capable) {
415 opts->suboptions = OPTION_MPTCP_MPC_SYNACK;
416 opts->sndr_key = subflow_req->local_key;
417 *size = TCPOLEN_MPTCP_MPC_SYNACK;
418 pr_debug("subflow_req=%p, local_key=%llu",
419 subflow_req, subflow_req->local_key);
420 return true;
421 }
422 return false;
423}
424
648ef4b8
MM
425void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
426 struct tcp_options_received *opt_rx)
427{
428 struct mptcp_options_received *mp_opt;
429 struct mptcp_ext *mpext;
430
431 mp_opt = &opt_rx->mptcp;
432
433 if (!mp_opt->dss)
434 return;
435
436 mpext = skb_ext_add(skb, SKB_EXT_MPTCP);
437 if (!mpext)
438 return;
439
440 memset(mpext, 0, sizeof(*mpext));
441
442 if (mp_opt->use_map) {
cc7972ea
CP
443 if (mp_opt->mpc_map) {
444 struct mptcp_subflow_context *subflow =
445 mptcp_subflow_ctx(sk);
446
447 /* this is an MP_CAPABLE carrying MPTCP data
448 * we know this map the first chunk of data
449 */
450 mptcp_crypto_key_sha(subflow->remote_key, NULL,
451 &mpext->data_seq);
452 mpext->data_seq++;
453 mpext->subflow_seq = 1;
454 mpext->dsn64 = 1;
455 mpext->mpc_map = 1;
456 } else {
457 mpext->data_seq = mp_opt->data_seq;
458 mpext->subflow_seq = mp_opt->subflow_seq;
459 mpext->dsn64 = mp_opt->dsn64;
460 }
648ef4b8
MM
461 mpext->data_len = mp_opt->data_len;
462 mpext->use_map = 1;
648ef4b8
MM
463 }
464
465 if (mp_opt->use_ack) {
466 mpext->data_ack = mp_opt->data_ack;
467 mpext->use_ack = 1;
468 mpext->ack64 = mp_opt->ack64;
469 }
470
471 mpext->data_fin = mp_opt->data_fin;
472}
473
eda7acdd
PK
474void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts)
475{
cc7972ea 476 if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
eda7acdd
PK
477 OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
478 u8 len;
479
480 if (OPTION_MPTCP_MPC_SYN & opts->suboptions)
481 len = TCPOLEN_MPTCP_MPC_SYN;
cec37a6e
PK
482 else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions)
483 len = TCPOLEN_MPTCP_MPC_SYNACK;
cc7972ea
CP
484 else if (opts->ext_copy.data_len)
485 len = TCPOLEN_MPTCP_MPC_ACK_DATA;
eda7acdd
PK
486 else
487 len = TCPOLEN_MPTCP_MPC_ACK;
488
489 *ptr++ = htonl((TCPOPT_MPTCP << 24) | (len << 16) |
490 (MPTCPOPT_MP_CAPABLE << 12) |
491 (MPTCP_SUPPORTED_VERSION << 8) |
65492c5a 492 MPTCP_CAP_HMAC_SHA256);
cc7972ea
CP
493
494 if (!((OPTION_MPTCP_MPC_SYNACK | OPTION_MPTCP_MPC_ACK) &
495 opts->suboptions))
496 goto mp_capable_done;
497
eda7acdd
PK
498 put_unaligned_be64(opts->sndr_key, ptr);
499 ptr += 2;
cc7972ea
CP
500 if (!((OPTION_MPTCP_MPC_ACK) & opts->suboptions))
501 goto mp_capable_done;
502
503 put_unaligned_be64(opts->rcvr_key, ptr);
504 ptr += 2;
505 if (!opts->ext_copy.data_len)
506 goto mp_capable_done;
507
508 put_unaligned_be32(opts->ext_copy.data_len << 16 |
509 TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
510 ptr += 1;
eda7acdd 511 }
6d0060f6 512
cc7972ea 513mp_capable_done:
6d0060f6
MM
514 if (opts->ext_copy.use_ack || opts->ext_copy.use_map) {
515 struct mptcp_ext *mpext = &opts->ext_copy;
516 u8 len = TCPOLEN_MPTCP_DSS_BASE;
517 u8 flags = 0;
518
519 if (mpext->use_ack) {
520 len += TCPOLEN_MPTCP_DSS_ACK64;
521 flags = MPTCP_DSS_HAS_ACK | MPTCP_DSS_ACK64;
522 }
523
524 if (mpext->use_map) {
525 len += TCPOLEN_MPTCP_DSS_MAP64;
526
527 /* Use only 64-bit mapping flags for now, add
528 * support for optional 32-bit mappings later.
529 */
530 flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64;
531 if (mpext->data_fin)
532 flags |= MPTCP_DSS_DATA_FIN;
533 }
534
535 *ptr++ = htonl((TCPOPT_MPTCP << 24) |
536 (len << 16) |
537 (MPTCPOPT_DSS << 12) |
538 (flags));
539
540 if (mpext->use_ack) {
541 put_unaligned_be64(mpext->data_ack, ptr);
542 ptr += 2;
543 }
544
545 if (mpext->use_map) {
546 put_unaligned_be64(mpext->data_seq, ptr);
547 ptr += 2;
548 put_unaligned_be32(mpext->subflow_seq, ptr);
549 ptr += 1;
550 put_unaligned_be32(mpext->data_len << 16 |
551 TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
552 }
553 }
eda7acdd 554}