mptcp: Add handling of incoming MP_JOIN requests
[linux-block.git] / net / mptcp / options.c
1 // SPDX-License-Identifier: GPL-2.0
2 /* Multipath TCP
3  *
4  * Copyright (c) 2017 - 2019, Intel Corporation.
5  */
6
7 #include <linux/kernel.h>
8 #include <net/tcp.h>
9 #include <net/mptcp.h>
10 #include "protocol.h"
11
12 static bool mptcp_cap_flag_sha256(u8 flags)
13 {
14         return (flags & MPTCP_CAP_FLAG_MASK) == MPTCP_CAP_HMAC_SHA256;
15 }
16
17 void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr,
18                         int opsize, struct tcp_options_received *opt_rx)
19 {
20         struct mptcp_options_received *mp_opt = &opt_rx->mptcp;
21         u8 subtype = *ptr >> 4;
22         int expected_opsize;
23         u8 version;
24         u8 flags;
25
26         switch (subtype) {
27         case MPTCPOPT_MP_CAPABLE:
28                 /* strict size checking */
29                 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
30                         if (skb->len > tcp_hdr(skb)->doff << 2)
31                                 expected_opsize = TCPOLEN_MPTCP_MPC_ACK_DATA;
32                         else
33                                 expected_opsize = TCPOLEN_MPTCP_MPC_ACK;
34                 } else {
35                         if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)
36                                 expected_opsize = TCPOLEN_MPTCP_MPC_SYNACK;
37                         else
38                                 expected_opsize = TCPOLEN_MPTCP_MPC_SYN;
39                 }
40                 if (opsize != expected_opsize)
41                         break;
42
43                 /* try to be gentle vs future versions on the initial syn */
44                 version = *ptr++ & MPTCP_VERSION_MASK;
45                 if (opsize != TCPOLEN_MPTCP_MPC_SYN) {
46                         if (version != MPTCP_SUPPORTED_VERSION)
47                                 break;
48                 } else if (version < MPTCP_SUPPORTED_VERSION) {
49                         break;
50                 }
51
52                 flags = *ptr++;
53                 if (!mptcp_cap_flag_sha256(flags) ||
54                     (flags & MPTCP_CAP_EXTENSIBILITY))
55                         break;
56
57                 /* RFC 6824, Section 3.1:
58                  * "For the Checksum Required bit (labeled "A"), if either
59                  * host requires the use of checksums, checksums MUST be used.
60                  * In other words, the only way for checksums not to be used
61                  * is if both hosts in their SYNs set A=0."
62                  *
63                  * Section 3.3.0:
64                  * "If a checksum is not present when its use has been
65                  * negotiated, the receiver MUST close the subflow with a RST as
66                  * it is considered broken."
67                  *
68                  * We don't implement DSS checksum - fall back to TCP.
69                  */
70                 if (flags & MPTCP_CAP_CHECKSUM_REQD)
71                         break;
72
73                 mp_opt->mp_capable = 1;
74                 if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) {
75                         mp_opt->sndr_key = get_unaligned_be64(ptr);
76                         ptr += 8;
77                 }
78                 if (opsize >= TCPOLEN_MPTCP_MPC_ACK) {
79                         mp_opt->rcvr_key = get_unaligned_be64(ptr);
80                         ptr += 8;
81                 }
82                 if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA) {
83                         /* Section 3.1.:
84                          * "the data parameters in a MP_CAPABLE are semantically
85                          * equivalent to those in a DSS option and can be used
86                          * interchangeably."
87                          */
88                         mp_opt->dss = 1;
89                         mp_opt->use_map = 1;
90                         mp_opt->mpc_map = 1;
91                         mp_opt->data_len = get_unaligned_be16(ptr);
92                         ptr += 2;
93                 }
94                 pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d",
95                          version, flags, opsize, mp_opt->sndr_key,
96                          mp_opt->rcvr_key, mp_opt->data_len);
97                 break;
98
99         case MPTCPOPT_MP_JOIN:
100                 mp_opt->mp_join = 1;
101                 if (opsize == TCPOLEN_MPTCP_MPJ_SYN) {
102                         mp_opt->backup = *ptr++ & MPTCPOPT_BACKUP;
103                         mp_opt->join_id = *ptr++;
104                         mp_opt->token = get_unaligned_be32(ptr);
105                         ptr += 4;
106                         mp_opt->nonce = get_unaligned_be32(ptr);
107                         ptr += 4;
108                         pr_debug("MP_JOIN bkup=%u, id=%u, token=%u, nonce=%u",
109                                  mp_opt->backup, mp_opt->join_id,
110                                  mp_opt->token, mp_opt->nonce);
111                 } else if (opsize == TCPOLEN_MPTCP_MPJ_SYNACK) {
112                         mp_opt->backup = *ptr++ & MPTCPOPT_BACKUP;
113                         mp_opt->join_id = *ptr++;
114                         mp_opt->thmac = get_unaligned_be64(ptr);
115                         ptr += 8;
116                         mp_opt->nonce = get_unaligned_be32(ptr);
117                         ptr += 4;
118                         pr_debug("MP_JOIN bkup=%u, id=%u, thmac=%llu, nonce=%u",
119                                  mp_opt->backup, mp_opt->join_id,
120                                  mp_opt->thmac, mp_opt->nonce);
121                 } else if (opsize == TCPOLEN_MPTCP_MPJ_ACK) {
122                         ptr += 2;
123                         memcpy(mp_opt->hmac, ptr, MPTCPOPT_HMAC_LEN);
124                         pr_debug("MP_JOIN hmac");
125                 } else {
126                         pr_warn("MP_JOIN bad option size");
127                         mp_opt->mp_join = 0;
128                 }
129                 break;
130
131         case MPTCPOPT_DSS:
132                 pr_debug("DSS");
133                 ptr++;
134
135                 /* we must clear 'mpc_map' be able to detect MP_CAPABLE
136                  * map vs DSS map in mptcp_incoming_options(), and reconstruct
137                  * map info accordingly
138                  */
139                 mp_opt->mpc_map = 0;
140                 flags = (*ptr++) & MPTCP_DSS_FLAG_MASK;
141                 mp_opt->data_fin = (flags & MPTCP_DSS_DATA_FIN) != 0;
142                 mp_opt->dsn64 = (flags & MPTCP_DSS_DSN64) != 0;
143                 mp_opt->use_map = (flags & MPTCP_DSS_HAS_MAP) != 0;
144                 mp_opt->ack64 = (flags & MPTCP_DSS_ACK64) != 0;
145                 mp_opt->use_ack = (flags & MPTCP_DSS_HAS_ACK);
146
147                 pr_debug("data_fin=%d dsn64=%d use_map=%d ack64=%d use_ack=%d",
148                          mp_opt->data_fin, mp_opt->dsn64,
149                          mp_opt->use_map, mp_opt->ack64,
150                          mp_opt->use_ack);
151
152                 expected_opsize = TCPOLEN_MPTCP_DSS_BASE;
153
154                 if (mp_opt->use_ack) {
155                         if (mp_opt->ack64)
156                                 expected_opsize += TCPOLEN_MPTCP_DSS_ACK64;
157                         else
158                                 expected_opsize += TCPOLEN_MPTCP_DSS_ACK32;
159                 }
160
161                 if (mp_opt->use_map) {
162                         if (mp_opt->dsn64)
163                                 expected_opsize += TCPOLEN_MPTCP_DSS_MAP64;
164                         else
165                                 expected_opsize += TCPOLEN_MPTCP_DSS_MAP32;
166                 }
167
168                 /* RFC 6824, Section 3.3:
169                  * If a checksum is present, but its use had
170                  * not been negotiated in the MP_CAPABLE handshake,
171                  * the checksum field MUST be ignored.
172                  */
173                 if (opsize != expected_opsize &&
174                     opsize != expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM)
175                         break;
176
177                 mp_opt->dss = 1;
178
179                 if (mp_opt->use_ack) {
180                         if (mp_opt->ack64) {
181                                 mp_opt->data_ack = get_unaligned_be64(ptr);
182                                 ptr += 8;
183                         } else {
184                                 mp_opt->data_ack = get_unaligned_be32(ptr);
185                                 ptr += 4;
186                         }
187
188                         pr_debug("data_ack=%llu", mp_opt->data_ack);
189                 }
190
191                 if (mp_opt->use_map) {
192                         if (mp_opt->dsn64) {
193                                 mp_opt->data_seq = get_unaligned_be64(ptr);
194                                 ptr += 8;
195                         } else {
196                                 mp_opt->data_seq = get_unaligned_be32(ptr);
197                                 ptr += 4;
198                         }
199
200                         mp_opt->subflow_seq = get_unaligned_be32(ptr);
201                         ptr += 4;
202
203                         mp_opt->data_len = get_unaligned_be16(ptr);
204                         ptr += 2;
205
206                         pr_debug("data_seq=%llu subflow_seq=%u data_len=%u",
207                                  mp_opt->data_seq, mp_opt->subflow_seq,
208                                  mp_opt->data_len);
209                 }
210
211                 break;
212
213         case MPTCPOPT_ADD_ADDR:
214                 mp_opt->echo = (*ptr++) & MPTCP_ADDR_ECHO;
215                 if (!mp_opt->echo) {
216                         if (opsize == TCPOLEN_MPTCP_ADD_ADDR ||
217                             opsize == TCPOLEN_MPTCP_ADD_ADDR_PORT)
218                                 mp_opt->family = MPTCP_ADDR_IPVERSION_4;
219 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
220                         else if (opsize == TCPOLEN_MPTCP_ADD_ADDR6 ||
221                                  opsize == TCPOLEN_MPTCP_ADD_ADDR6_PORT)
222                                 mp_opt->family = MPTCP_ADDR_IPVERSION_6;
223 #endif
224                         else
225                                 break;
226                 } else {
227                         if (opsize == TCPOLEN_MPTCP_ADD_ADDR_BASE ||
228                             opsize == TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT)
229                                 mp_opt->family = MPTCP_ADDR_IPVERSION_4;
230 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
231                         else if (opsize == TCPOLEN_MPTCP_ADD_ADDR6_BASE ||
232                                  opsize == TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT)
233                                 mp_opt->family = MPTCP_ADDR_IPVERSION_6;
234 #endif
235                         else
236                                 break;
237                 }
238
239                 mp_opt->add_addr = 1;
240                 mp_opt->port = 0;
241                 mp_opt->addr_id = *ptr++;
242                 pr_debug("ADD_ADDR: id=%d", mp_opt->addr_id);
243                 if (mp_opt->family == MPTCP_ADDR_IPVERSION_4) {
244                         memcpy((u8 *)&mp_opt->addr.s_addr, (u8 *)ptr, 4);
245                         ptr += 4;
246                         if (opsize == TCPOLEN_MPTCP_ADD_ADDR_PORT ||
247                             opsize == TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT) {
248                                 mp_opt->port = get_unaligned_be16(ptr);
249                                 ptr += 2;
250                         }
251                 }
252 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
253                 else {
254                         memcpy(mp_opt->addr6.s6_addr, (u8 *)ptr, 16);
255                         ptr += 16;
256                         if (opsize == TCPOLEN_MPTCP_ADD_ADDR6_PORT ||
257                             opsize == TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT) {
258                                 mp_opt->port = get_unaligned_be16(ptr);
259                                 ptr += 2;
260                         }
261                 }
262 #endif
263                 if (!mp_opt->echo) {
264                         mp_opt->ahmac = get_unaligned_be64(ptr);
265                         ptr += 8;
266                 }
267                 break;
268
269         case MPTCPOPT_RM_ADDR:
270                 if (opsize != TCPOLEN_MPTCP_RM_ADDR_BASE)
271                         break;
272
273                 mp_opt->rm_addr = 1;
274                 mp_opt->rm_id = *ptr++;
275                 pr_debug("RM_ADDR: id=%d", mp_opt->rm_id);
276                 break;
277
278         default:
279                 break;
280         }
281 }
282
283 void mptcp_get_options(const struct sk_buff *skb,
284                        struct tcp_options_received *opt_rx)
285 {
286         const unsigned char *ptr;
287         const struct tcphdr *th = tcp_hdr(skb);
288         int length = (th->doff * 4) - sizeof(struct tcphdr);
289
290         ptr = (const unsigned char *)(th + 1);
291
292         while (length > 0) {
293                 int opcode = *ptr++;
294                 int opsize;
295
296                 switch (opcode) {
297                 case TCPOPT_EOL:
298                         return;
299                 case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
300                         length--;
301                         continue;
302                 default:
303                         opsize = *ptr++;
304                         if (opsize < 2) /* "silly options" */
305                                 return;
306                         if (opsize > length)
307                                 return; /* don't parse partial options */
308                         if (opcode == TCPOPT_MPTCP)
309                                 mptcp_parse_option(skb, ptr, opsize, opt_rx);
310                         ptr += opsize - 2;
311                         length -= opsize;
312                 }
313         }
314 }
315
316 bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
317                        unsigned int *size, struct mptcp_out_options *opts)
318 {
319         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
320
321         /* we will use snd_isn to detect first pkt [re]transmission
322          * in mptcp_established_options_mp()
323          */
324         subflow->snd_isn = TCP_SKB_CB(skb)->end_seq;
325         if (subflow->request_mptcp) {
326                 pr_debug("local_key=%llu", subflow->local_key);
327                 opts->suboptions = OPTION_MPTCP_MPC_SYN;
328                 opts->sndr_key = subflow->local_key;
329                 *size = TCPOLEN_MPTCP_MPC_SYN;
330                 return true;
331         }
332         return false;
333 }
334
335 void mptcp_rcv_synsent(struct sock *sk)
336 {
337         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
338         struct tcp_sock *tp = tcp_sk(sk);
339
340         pr_debug("subflow=%p", subflow);
341         if (subflow->request_mptcp && tp->rx_opt.mptcp.mp_capable) {
342                 subflow->mp_capable = 1;
343                 subflow->can_ack = 1;
344                 subflow->remote_key = tp->rx_opt.mptcp.sndr_key;
345         } else {
346                 tcp_sk(sk)->is_mptcp = 0;
347         }
348 }
349
350 static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
351                                          unsigned int *size,
352                                          unsigned int remaining,
353                                          struct mptcp_out_options *opts)
354 {
355         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
356         struct mptcp_ext *mpext;
357         unsigned int data_len;
358
359         pr_debug("subflow=%p fully established=%d seq=%x:%x remaining=%d",
360                  subflow, subflow->fully_established, subflow->snd_isn,
361                  skb ? TCP_SKB_CB(skb)->seq : 0, remaining);
362
363         if (subflow->mp_capable && !subflow->fully_established && skb &&
364             subflow->snd_isn == TCP_SKB_CB(skb)->seq) {
365                 /* When skb is not available, we better over-estimate the
366                  * emitted options len. A full DSS option is longer than
367                  * TCPOLEN_MPTCP_MPC_ACK_DATA, so let's the caller try to fit
368                  * that.
369                  */
370                 mpext = mptcp_get_ext(skb);
371                 data_len = mpext ? mpext->data_len : 0;
372
373                 /* we will check ext_copy.data_len in mptcp_write_options() to
374                  * discriminate between TCPOLEN_MPTCP_MPC_ACK_DATA and
375                  * TCPOLEN_MPTCP_MPC_ACK
376                  */
377                 opts->ext_copy.data_len = data_len;
378                 opts->suboptions = OPTION_MPTCP_MPC_ACK;
379                 opts->sndr_key = subflow->local_key;
380                 opts->rcvr_key = subflow->remote_key;
381
382                 /* Section 3.1.
383                  * The MP_CAPABLE option is carried on the SYN, SYN/ACK, and ACK
384                  * packets that start the first subflow of an MPTCP connection,
385                  * as well as the first packet that carries data
386                  */
387                 if (data_len > 0)
388                         *size = ALIGN(TCPOLEN_MPTCP_MPC_ACK_DATA, 4);
389                 else
390                         *size = TCPOLEN_MPTCP_MPC_ACK;
391
392                 pr_debug("subflow=%p, local_key=%llu, remote_key=%llu map_len=%d",
393                          subflow, subflow->local_key, subflow->remote_key,
394                          data_len);
395
396                 return true;
397         }
398         return false;
399 }
400
401 static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow,
402                                  struct mptcp_ext *ext)
403 {
404         if (!ext->use_map) {
405                 /* RFC6824 requires a DSS mapping with specific values
406                  * if DATA_FIN is set but no data payload is mapped
407                  */
408                 ext->data_fin = 1;
409                 ext->use_map = 1;
410                 ext->dsn64 = 1;
411                 ext->data_seq = subflow->data_fin_tx_seq;
412                 ext->subflow_seq = 0;
413                 ext->data_len = 1;
414         } else if (ext->data_seq + ext->data_len == subflow->data_fin_tx_seq) {
415                 /* If there's an existing DSS mapping and it is the
416                  * final mapping, DATA_FIN consumes 1 additional byte of
417                  * mapping space.
418                  */
419                 ext->data_fin = 1;
420                 ext->data_len++;
421         }
422 }
423
424 static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
425                                           unsigned int *size,
426                                           unsigned int remaining,
427                                           struct mptcp_out_options *opts)
428 {
429         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
430         unsigned int dss_size = 0;
431         struct mptcp_ext *mpext;
432         struct mptcp_sock *msk;
433         unsigned int ack_size;
434         bool ret = false;
435         u8 tcp_fin;
436
437         if (skb) {
438                 mpext = mptcp_get_ext(skb);
439                 tcp_fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
440         } else {
441                 mpext = NULL;
442                 tcp_fin = 0;
443         }
444
445         if (!skb || (mpext && mpext->use_map) || tcp_fin) {
446                 unsigned int map_size;
447
448                 map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64;
449
450                 remaining -= map_size;
451                 dss_size = map_size;
452                 if (mpext)
453                         opts->ext_copy = *mpext;
454
455                 if (skb && tcp_fin && subflow->data_fin_tx_enable)
456                         mptcp_write_data_fin(subflow, &opts->ext_copy);
457                 ret = true;
458         }
459
460         /* passive sockets msk will set the 'can_ack' after accept(), even
461          * if the first subflow may have the already the remote key handy
462          */
463         opts->ext_copy.use_ack = 0;
464         msk = mptcp_sk(subflow->conn);
465         if (!READ_ONCE(msk->can_ack)) {
466                 *size = ALIGN(dss_size, 4);
467                 return ret;
468         }
469
470         ack_size = TCPOLEN_MPTCP_DSS_ACK64;
471
472         /* Add kind/length/subtype/flag overhead if mapping is not populated */
473         if (dss_size == 0)
474                 ack_size += TCPOLEN_MPTCP_DSS_BASE;
475
476         dss_size += ack_size;
477
478         opts->ext_copy.data_ack = msk->ack_seq;
479         opts->ext_copy.ack64 = 1;
480         opts->ext_copy.use_ack = 1;
481
482         *size = ALIGN(dss_size, 4);
483         return true;
484 }
485
486 static u64 add_addr_generate_hmac(u64 key1, u64 key2, u8 addr_id,
487                                   struct in_addr *addr)
488 {
489         u8 hmac[MPTCP_ADDR_HMAC_LEN];
490         u8 msg[7];
491
492         msg[0] = addr_id;
493         memcpy(&msg[1], &addr->s_addr, 4);
494         msg[5] = 0;
495         msg[6] = 0;
496
497         mptcp_crypto_hmac_sha(key1, key2, msg, 7, hmac);
498
499         return get_unaligned_be64(hmac);
500 }
501
502 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
503 static u64 add_addr6_generate_hmac(u64 key1, u64 key2, u8 addr_id,
504                                    struct in6_addr *addr)
505 {
506         u8 hmac[MPTCP_ADDR_HMAC_LEN];
507         u8 msg[19];
508
509         msg[0] = addr_id;
510         memcpy(&msg[1], &addr->s6_addr, 16);
511         msg[17] = 0;
512         msg[18] = 0;
513
514         mptcp_crypto_hmac_sha(key1, key2, msg, 19, hmac);
515
516         return get_unaligned_be64(hmac);
517 }
518 #endif
519
520 static bool mptcp_established_options_addr(struct sock *sk,
521                                            unsigned int *size,
522                                            unsigned int remaining,
523                                            struct mptcp_out_options *opts)
524 {
525         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
526         struct mptcp_sock *msk = mptcp_sk(subflow->conn);
527         struct mptcp_addr_info saddr;
528         int len;
529
530         if (!mptcp_pm_should_signal(msk) ||
531             !(mptcp_pm_addr_signal(msk, remaining, &saddr)))
532                 return false;
533
534         len = mptcp_add_addr_len(saddr.family);
535         if (remaining < len)
536                 return false;
537
538         *size = len;
539         opts->addr_id = saddr.id;
540         if (saddr.family == AF_INET) {
541                 opts->suboptions |= OPTION_MPTCP_ADD_ADDR;
542                 opts->addr = saddr.addr;
543                 opts->ahmac = add_addr_generate_hmac(msk->local_key,
544                                                      msk->remote_key,
545                                                      opts->addr_id,
546                                                      &opts->addr);
547         }
548 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
549         else if (saddr.family == AF_INET6) {
550                 opts->suboptions |= OPTION_MPTCP_ADD_ADDR6;
551                 opts->addr6 = saddr.addr6;
552                 opts->ahmac = add_addr6_generate_hmac(msk->local_key,
553                                                       msk->remote_key,
554                                                       opts->addr_id,
555                                                       &opts->addr6);
556         }
557 #endif
558         pr_debug("addr_id=%d, ahmac=%llu", opts->addr_id, opts->ahmac);
559
560         return true;
561 }
562
563 bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
564                                unsigned int *size, unsigned int remaining,
565                                struct mptcp_out_options *opts)
566 {
567         unsigned int opt_size = 0;
568         bool ret = false;
569
570         opts->suboptions = 0;
571
572         if (mptcp_established_options_mp(sk, skb, &opt_size, remaining, opts))
573                 ret = true;
574         else if (mptcp_established_options_dss(sk, skb, &opt_size, remaining,
575                                                opts))
576                 ret = true;
577
578         /* we reserved enough space for the above options, and exceeding the
579          * TCP option space would be fatal
580          */
581         if (WARN_ON_ONCE(opt_size > remaining))
582                 return false;
583
584         *size += opt_size;
585         remaining -= opt_size;
586         if (mptcp_established_options_addr(sk, &opt_size, remaining, opts)) {
587                 *size += opt_size;
588                 remaining -= opt_size;
589                 ret = true;
590         }
591
592         return ret;
593 }
594
595 bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
596                           struct mptcp_out_options *opts)
597 {
598         struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
599
600         if (subflow_req->mp_capable) {
601                 opts->suboptions = OPTION_MPTCP_MPC_SYNACK;
602                 opts->sndr_key = subflow_req->local_key;
603                 *size = TCPOLEN_MPTCP_MPC_SYNACK;
604                 pr_debug("subflow_req=%p, local_key=%llu",
605                          subflow_req, subflow_req->local_key);
606                 return true;
607         } else if (subflow_req->mp_join) {
608                 opts->suboptions = OPTION_MPTCP_MPJ_SYNACK;
609                 opts->backup = subflow_req->backup;
610                 opts->join_id = subflow_req->local_id;
611                 opts->thmac = subflow_req->thmac;
612                 opts->nonce = subflow_req->local_nonce;
613                 pr_debug("req=%p, bkup=%u, id=%u, thmac=%llu, nonce=%u",
614                          subflow_req, opts->backup, opts->join_id,
615                          opts->thmac, opts->nonce);
616                 *size = TCPOLEN_MPTCP_MPJ_SYNACK;
617                 return true;
618         }
619         return false;
620 }
621
622 static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
623                                     struct mptcp_subflow_context *subflow,
624                                     struct sk_buff *skb,
625                                     struct mptcp_options_received *mp_opt)
626 {
627         /* here we can process OoO, in-window pkts, only in-sequence 4th ack
628          * will make the subflow fully established
629          */
630         if (likely(subflow->fully_established)) {
631                 /* on passive sockets, check for 3rd ack retransmission
632                  * note that msk is always set by subflow_syn_recv_sock()
633                  * for mp_join subflows
634                  */
635                 if (TCP_SKB_CB(skb)->seq == subflow->ssn_offset + 1 &&
636                     TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq &&
637                     subflow->mp_join && mp_opt->mp_join &&
638                     READ_ONCE(msk->pm.server_side))
639                         tcp_send_ack(sk);
640                 goto fully_established;
641         }
642
643         /* we should process OoO packets before the first subflow is fully
644          * established, but not expected for MP_JOIN subflows
645          */
646         if (TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1)
647                 return subflow->mp_capable;
648
649         if (mp_opt->use_ack) {
650                 /* subflows are fully established as soon as we get any
651                  * additional ack.
652                  */
653                 subflow->fully_established = 1;
654                 goto fully_established;
655         }
656
657         WARN_ON_ONCE(subflow->can_ack);
658
659         /* If the first established packet does not contain MP_CAPABLE + data
660          * then fallback to TCP
661          */
662         if (!mp_opt->mp_capable) {
663                 subflow->mp_capable = 0;
664                 tcp_sk(sk)->is_mptcp = 0;
665                 return false;
666         }
667
668         subflow->fully_established = 1;
669         subflow->remote_key = mp_opt->sndr_key;
670         subflow->can_ack = 1;
671
672 fully_established:
673         if (likely(subflow->pm_notified))
674                 return true;
675
676         subflow->pm_notified = 1;
677         if (subflow->mp_join)
678                 mptcp_pm_subflow_established(msk, subflow);
679         else
680                 mptcp_pm_fully_established(msk);
681         return true;
682 }
683
684 static bool add_addr_hmac_valid(struct mptcp_sock *msk,
685                                 struct mptcp_options_received *mp_opt)
686 {
687         u64 hmac = 0;
688
689         if (mp_opt->echo)
690                 return true;
691
692         if (mp_opt->family == MPTCP_ADDR_IPVERSION_4)
693                 hmac = add_addr_generate_hmac(msk->remote_key,
694                                               msk->local_key,
695                                               mp_opt->addr_id, &mp_opt->addr);
696 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
697         else
698                 hmac = add_addr6_generate_hmac(msk->remote_key,
699                                                msk->local_key,
700                                                mp_opt->addr_id, &mp_opt->addr6);
701 #endif
702
703         pr_debug("msk=%p, ahmac=%llu, mp_opt->ahmac=%llu\n",
704                  msk, (unsigned long long)hmac,
705                  (unsigned long long)mp_opt->ahmac);
706
707         return hmac == mp_opt->ahmac;
708 }
709
710 void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
711                             struct tcp_options_received *opt_rx)
712 {
713         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
714         struct mptcp_sock *msk = mptcp_sk(subflow->conn);
715         struct mptcp_options_received *mp_opt;
716         struct mptcp_ext *mpext;
717
718         mp_opt = &opt_rx->mptcp;
719         if (!check_fully_established(msk, sk, subflow, skb, mp_opt))
720                 return;
721
722         if (mp_opt->add_addr && add_addr_hmac_valid(msk, mp_opt)) {
723                 struct mptcp_addr_info addr;
724
725                 addr.port = htons(mp_opt->port);
726                 addr.id = mp_opt->addr_id;
727                 if (mp_opt->family == MPTCP_ADDR_IPVERSION_4) {
728                         addr.family = AF_INET;
729                         addr.addr = mp_opt->addr;
730                 }
731 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
732                 else if (mp_opt->family == MPTCP_ADDR_IPVERSION_6) {
733                         addr.family = AF_INET6;
734                         addr.addr6 = mp_opt->addr6;
735                 }
736 #endif
737                 if (!mp_opt->echo)
738                         mptcp_pm_add_addr_received(msk, &addr);
739                 mp_opt->add_addr = 0;
740         }
741
742         if (!mp_opt->dss)
743                 return;
744
745         mpext = skb_ext_add(skb, SKB_EXT_MPTCP);
746         if (!mpext)
747                 return;
748
749         memset(mpext, 0, sizeof(*mpext));
750
751         if (mp_opt->use_map) {
752                 if (mp_opt->mpc_map) {
753                         /* this is an MP_CAPABLE carrying MPTCP data
754                          * we know this map the first chunk of data
755                          */
756                         mptcp_crypto_key_sha(subflow->remote_key, NULL,
757                                              &mpext->data_seq);
758                         mpext->data_seq++;
759                         mpext->subflow_seq = 1;
760                         mpext->dsn64 = 1;
761                         mpext->mpc_map = 1;
762                 } else {
763                         mpext->data_seq = mp_opt->data_seq;
764                         mpext->subflow_seq = mp_opt->subflow_seq;
765                         mpext->dsn64 = mp_opt->dsn64;
766                 }
767                 mpext->data_len = mp_opt->data_len;
768                 mpext->use_map = 1;
769         }
770
771         if (mp_opt->use_ack) {
772                 mpext->data_ack = mp_opt->data_ack;
773                 mpext->use_ack = 1;
774                 mpext->ack64 = mp_opt->ack64;
775         }
776
777         mpext->data_fin = mp_opt->data_fin;
778 }
779
780 void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts)
781 {
782         if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
783              OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
784                 u8 len;
785
786                 if (OPTION_MPTCP_MPC_SYN & opts->suboptions)
787                         len = TCPOLEN_MPTCP_MPC_SYN;
788                 else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions)
789                         len = TCPOLEN_MPTCP_MPC_SYNACK;
790                 else if (opts->ext_copy.data_len)
791                         len = TCPOLEN_MPTCP_MPC_ACK_DATA;
792                 else
793                         len = TCPOLEN_MPTCP_MPC_ACK;
794
795                 *ptr++ = mptcp_option(MPTCPOPT_MP_CAPABLE, len,
796                                       MPTCP_SUPPORTED_VERSION,
797                                       MPTCP_CAP_HMAC_SHA256);
798
799                 if (!((OPTION_MPTCP_MPC_SYNACK | OPTION_MPTCP_MPC_ACK) &
800                     opts->suboptions))
801                         goto mp_capable_done;
802
803                 put_unaligned_be64(opts->sndr_key, ptr);
804                 ptr += 2;
805                 if (!((OPTION_MPTCP_MPC_ACK) & opts->suboptions))
806                         goto mp_capable_done;
807
808                 put_unaligned_be64(opts->rcvr_key, ptr);
809                 ptr += 2;
810                 if (!opts->ext_copy.data_len)
811                         goto mp_capable_done;
812
813                 put_unaligned_be32(opts->ext_copy.data_len << 16 |
814                                    TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
815                 ptr += 1;
816         }
817
818 mp_capable_done:
819         if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) {
820                 if (opts->ahmac)
821                         *ptr++ = mptcp_option(MPTCPOPT_ADD_ADDR,
822                                               TCPOLEN_MPTCP_ADD_ADDR, 0,
823                                               opts->addr_id);
824                 else
825                         *ptr++ = mptcp_option(MPTCPOPT_ADD_ADDR,
826                                               TCPOLEN_MPTCP_ADD_ADDR_BASE,
827                                               MPTCP_ADDR_ECHO,
828                                               opts->addr_id);
829                 memcpy((u8 *)ptr, (u8 *)&opts->addr.s_addr, 4);
830                 ptr += 1;
831                 if (opts->ahmac) {
832                         put_unaligned_be64(opts->ahmac, ptr);
833                         ptr += 2;
834                 }
835         }
836
837 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
838         if (OPTION_MPTCP_ADD_ADDR6 & opts->suboptions) {
839                 if (opts->ahmac)
840                         *ptr++ = mptcp_option(MPTCPOPT_ADD_ADDR,
841                                               TCPOLEN_MPTCP_ADD_ADDR6, 0,
842                                               opts->addr_id);
843                 else
844                         *ptr++ = mptcp_option(MPTCPOPT_ADD_ADDR,
845                                               TCPOLEN_MPTCP_ADD_ADDR6_BASE,
846                                               MPTCP_ADDR_ECHO,
847                                               opts->addr_id);
848                 memcpy((u8 *)ptr, opts->addr6.s6_addr, 16);
849                 ptr += 4;
850                 if (opts->ahmac) {
851                         put_unaligned_be64(opts->ahmac, ptr);
852                         ptr += 2;
853                 }
854         }
855 #endif
856
857         if (OPTION_MPTCP_RM_ADDR & opts->suboptions) {
858                 *ptr++ = mptcp_option(MPTCPOPT_RM_ADDR,
859                                       TCPOLEN_MPTCP_RM_ADDR_BASE,
860                                       0, opts->rm_id);
861         }
862
863         if (OPTION_MPTCP_MPJ_SYNACK & opts->suboptions) {
864                 *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
865                                       TCPOLEN_MPTCP_MPJ_SYNACK,
866                                       opts->backup, opts->join_id);
867                 put_unaligned_be64(opts->thmac, ptr);
868                 ptr += 2;
869                 put_unaligned_be32(opts->nonce, ptr);
870                 ptr += 1;
871         }
872
873         if (opts->ext_copy.use_ack || opts->ext_copy.use_map) {
874                 struct mptcp_ext *mpext = &opts->ext_copy;
875                 u8 len = TCPOLEN_MPTCP_DSS_BASE;
876                 u8 flags = 0;
877
878                 if (mpext->use_ack) {
879                         len += TCPOLEN_MPTCP_DSS_ACK64;
880                         flags = MPTCP_DSS_HAS_ACK | MPTCP_DSS_ACK64;
881                 }
882
883                 if (mpext->use_map) {
884                         len += TCPOLEN_MPTCP_DSS_MAP64;
885
886                         /* Use only 64-bit mapping flags for now, add
887                          * support for optional 32-bit mappings later.
888                          */
889                         flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64;
890                         if (mpext->data_fin)
891                                 flags |= MPTCP_DSS_DATA_FIN;
892                 }
893
894                 *ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags);
895
896                 if (mpext->use_ack) {
897                         put_unaligned_be64(mpext->data_ack, ptr);
898                         ptr += 2;
899                 }
900
901                 if (mpext->use_map) {
902                         put_unaligned_be64(mpext->data_seq, ptr);
903                         ptr += 2;
904                         put_unaligned_be32(mpext->subflow_seq, ptr);
905                         ptr += 1;
906                         put_unaligned_be32(mpext->data_len << 16 |
907                                            TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
908                 }
909         }
910 }