mptcp: always parse mptcp options for MPC reqsk
[linux-2.6-block.git] / net / mptcp / subflow.c
CommitLineData
2303f994
PK
1// SPDX-License-Identifier: GPL-2.0
2/* Multipath TCP
3 *
4 * Copyright (c) 2017 - 2019, Intel Corporation.
5 */
6
79c0949e
PK
7#define pr_fmt(fmt) "MPTCP: " fmt
8
2303f994
PK
9#include <linux/kernel.h>
10#include <linux/module.h>
11#include <linux/netdevice.h>
f296234c 12#include <crypto/algapi.h>
a24d22b2 13#include <crypto/sha2.h>
2303f994
PK
14#include <net/sock.h>
15#include <net/inet_common.h>
16#include <net/inet_hashtables.h>
17#include <net/protocol.h>
18#include <net/tcp.h>
cec37a6e
PK
19#if IS_ENABLED(CONFIG_MPTCP_IPV6)
20#include <net/ip6_route.h>
b19bc294 21#include <net/transp_v6.h>
cec37a6e 22#endif
2303f994 23#include <net/mptcp.h>
4596a2c1 24#include <uapi/linux/mptcp.h>
2303f994 25#include "protocol.h"
fc518953
FW
26#include "mib.h"
27
0918e34b
GT
28#include <trace/events/mptcp.h>
29
b19bc294
PA
30static void mptcp_subflow_ops_undo_override(struct sock *ssk);
31
fc518953
FW
32static void SUBFLOW_REQ_INC_STATS(struct request_sock *req,
33 enum linux_mptcp_mib_field field)
34{
35 MPTCP_INC_STATS(sock_net(req_to_sk(req)), field);
36}
2303f994 37
79c0949e
PK
38static void subflow_req_destructor(struct request_sock *req)
39{
40 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
41
42 pr_debug("subflow_req=%p", subflow_req);
43
8fd4de12
PA
44 if (subflow_req->msk)
45 sock_put((struct sock *)subflow_req->msk);
46
2c5ebd00 47 mptcp_token_destroy_request(req);
79c0949e
PK
48 tcp_request_sock_ops.destructor(req);
49}
50
f296234c
PK
51static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2,
52 void *hmac)
53{
54 u8 msg[8];
55
56 put_unaligned_be32(nonce1, &msg[0]);
57 put_unaligned_be32(nonce2, &msg[4]);
58
59 mptcp_crypto_hmac_sha(key1, key2, msg, 8, hmac);
60}
61
4cf8b7e4
PA
62static bool mptcp_can_accept_new_subflow(const struct mptcp_sock *msk)
63{
64 return mptcp_is_fully_established((void *)msk) &&
65 READ_ONCE(msk->pm.accept_subflow);
66}
67
f296234c 68/* validate received token and create truncated hmac and nonce for SYN-ACK */
ec20e143
GT
69static void subflow_req_create_thmac(struct mptcp_subflow_request_sock *subflow_req)
70{
71 struct mptcp_sock *msk = subflow_req->msk;
72 u8 hmac[SHA256_DIGEST_SIZE];
73
74 get_random_bytes(&subflow_req->local_nonce, sizeof(u32));
75
76 subflow_generate_hmac(msk->local_key, msk->remote_key,
77 subflow_req->local_nonce,
78 subflow_req->remote_nonce, hmac);
79
80 subflow_req->thmac = get_unaligned_be64(hmac);
81}
82
b5e2e42f 83static struct mptcp_sock *subflow_token_join_request(struct request_sock *req)
f296234c
PK
84{
85 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
f296234c
PK
86 struct mptcp_sock *msk;
87 int local_id;
88
89 msk = mptcp_token_get_sock(subflow_req->token);
90 if (!msk) {
fc518953 91 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINNOTOKEN);
8fd4de12 92 return NULL;
f296234c
PK
93 }
94
95 local_id = mptcp_pm_get_local_id(msk, (struct sock_common *)req);
96 if (local_id < 0) {
97 sock_put((struct sock *)msk);
8fd4de12 98 return NULL;
f296234c
PK
99 }
100 subflow_req->local_id = local_id;
101
8fd4de12 102 return msk;
f296234c
PK
103}
104
d8b59efa 105static void subflow_init_req(struct request_sock *req, const struct sock *sk_listener)
cec37a6e 106{
cec37a6e 107 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
cec37a6e
PK
108
109 subflow_req->mp_capable = 0;
f296234c 110 subflow_req->mp_join = 0;
8fd4de12 111 subflow_req->msk = NULL;
2c5ebd00 112 mptcp_token_init_request(req);
78d8b7bc
FW
113}
114
5bc56388
GT
115static bool subflow_use_different_sport(struct mptcp_sock *msk, const struct sock *sk)
116{
117 return inet_sk(sk)->inet_sport != inet_sk((struct sock *)msk)->inet_sport;
118}
119
dc87efdb
FW
120static void subflow_add_reset_reason(struct sk_buff *skb, u8 reason)
121{
122 struct mptcp_ext *mpext = skb_ext_add(skb, SKB_EXT_MPTCP);
123
124 if (mpext) {
125 memset(mpext, 0, sizeof(*mpext));
126 mpext->reset_reason = reason;
127 }
128}
129
3ecfbe3e
FW
130/* Init mptcp request socket.
131 *
132 * Returns an error code if a JOIN has failed and a TCP reset
133 * should be sent.
134 */
d8b59efa
PA
135static int subflow_check_req(struct request_sock *req,
136 const struct sock *sk_listener,
137 struct sk_buff *skb)
78d8b7bc
FW
138{
139 struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
140 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
141 struct mptcp_options_received mp_opt;
78d8b7bc
FW
142
143 pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
144
d8b59efa
PA
145#ifdef CONFIG_TCP_MD5SIG
146 /* no MPTCP if MD5SIG is enabled on this socket or we may run out of
147 * TCP option space.
148 */
149 if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info))
150 return -EINVAL;
151#endif
78d8b7bc
FW
152
153 mptcp_get_options(skb, &mp_opt);
154
cfde141e 155 if (mp_opt.mp_capable) {
fc518953
FW
156 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE);
157
cfde141e 158 if (mp_opt.mp_join)
3ecfbe3e 159 return 0;
cfde141e 160 } else if (mp_opt.mp_join) {
fc518953
FW
161 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX);
162 }
f296234c 163
cfde141e 164 if (mp_opt.mp_capable && listener->request_mptcp) {
535fb815
FW
165 int err, retries = 4;
166
c83a47e5 167 subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
535fb815
FW
168again:
169 do {
170 get_random_bytes(&subflow_req->local_key, sizeof(subflow_req->local_key));
171 } while (subflow_req->local_key == 0);
79c0949e 172
c83a47e5
FW
173 if (unlikely(req->syncookie)) {
174 mptcp_crypto_key_sha(subflow_req->local_key,
175 &subflow_req->token,
176 &subflow_req->idsn);
177 if (mptcp_token_exists(subflow_req->token)) {
178 if (retries-- > 0)
179 goto again;
a16195e3 180 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_TOKENFALLBACKINIT);
c83a47e5
FW
181 } else {
182 subflow_req->mp_capable = 1;
183 }
3ecfbe3e 184 return 0;
c83a47e5
FW
185 }
186
79c0949e
PK
187 err = mptcp_token_new_request(req);
188 if (err == 0)
189 subflow_req->mp_capable = 1;
535fb815
FW
190 else if (retries-- > 0)
191 goto again;
a16195e3
PA
192 else
193 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_TOKENFALLBACKINIT);
79c0949e 194
cfde141e 195 } else if (mp_opt.mp_join && listener->request_mptcp) {
ec3edaa7 196 subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
f296234c 197 subflow_req->mp_join = 1;
cfde141e
PA
198 subflow_req->backup = mp_opt.backup;
199 subflow_req->remote_id = mp_opt.join_id;
200 subflow_req->token = mp_opt.token;
201 subflow_req->remote_nonce = mp_opt.nonce;
b5e2e42f 202 subflow_req->msk = subflow_token_join_request(req);
9466a1cc 203
3ecfbe3e 204 /* Can't fall back to TCP in this case. */
dc87efdb
FW
205 if (!subflow_req->msk) {
206 subflow_add_reset_reason(skb, MPTCP_RST_EMPTCP);
3ecfbe3e 207 return -EPERM;
dc87efdb 208 }
3ecfbe3e 209
5bc56388
GT
210 if (subflow_use_different_sport(subflow_req->msk, sk_listener)) {
211 pr_debug("syn inet_sport=%d %d",
212 ntohs(inet_sk(sk_listener)->inet_sport),
213 ntohs(inet_sk((struct sock *)subflow_req->msk)->inet_sport));
214 if (!mptcp_pm_sport_in_anno_list(subflow_req->msk, sk_listener)) {
215 sock_put((struct sock *)subflow_req->msk);
216 mptcp_token_destroy_request(req);
217 tcp_request_sock_ops.destructor(req);
218 subflow_req->msk = NULL;
219 subflow_req->mp_join = 0;
2fbdd9ea 220 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTSYNRX);
5bc56388
GT
221 return -EPERM;
222 }
2fbdd9ea 223 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINPORTSYNRX);
5bc56388
GT
224 }
225
ec20e143
GT
226 subflow_req_create_thmac(subflow_req);
227
3ecfbe3e 228 if (unlikely(req->syncookie)) {
9466a1cc
FW
229 if (mptcp_can_accept_new_subflow(subflow_req->msk))
230 subflow_init_req_cookie_join_save(subflow_req, skb);
231 }
232
8fd4de12
PA
233 pr_debug("token=%u, remote_nonce=%u msk=%p", subflow_req->token,
234 subflow_req->remote_nonce, subflow_req->msk);
cec37a6e 235 }
3ecfbe3e
FW
236
237 return 0;
cec37a6e
PK
238}
239
c83a47e5
FW
240int mptcp_subflow_init_cookie_req(struct request_sock *req,
241 const struct sock *sk_listener,
242 struct sk_buff *skb)
243{
244 struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
245 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
246 struct mptcp_options_received mp_opt;
247 int err;
248
d8b59efa 249 subflow_init_req(req, sk_listener);
c83a47e5
FW
250 mptcp_get_options(skb, &mp_opt);
251
252 if (mp_opt.mp_capable && mp_opt.mp_join)
253 return -EINVAL;
254
255 if (mp_opt.mp_capable && listener->request_mptcp) {
256 if (mp_opt.sndr_key == 0)
257 return -EINVAL;
258
259 subflow_req->local_key = mp_opt.rcvr_key;
260 err = mptcp_token_new_request(req);
261 if (err)
262 return err;
263
264 subflow_req->mp_capable = 1;
265 subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1;
9466a1cc
FW
266 } else if (mp_opt.mp_join && listener->request_mptcp) {
267 if (!mptcp_token_join_cookie_init_state(subflow_req, skb))
268 return -EINVAL;
269
270 if (mptcp_can_accept_new_subflow(subflow_req->msk))
271 subflow_req->mp_join = 1;
272
273 subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1;
c83a47e5
FW
274 }
275
276 return 0;
277}
278EXPORT_SYMBOL_GPL(mptcp_subflow_init_cookie_req);
279
7ea851d1
FW
280static struct dst_entry *subflow_v4_route_req(const struct sock *sk,
281 struct sk_buff *skb,
282 struct flowi *fl,
283 struct request_sock *req)
cec37a6e 284{
7ea851d1 285 struct dst_entry *dst;
3ecfbe3e 286 int err;
7ea851d1 287
cec37a6e 288 tcp_rsk(req)->is_mptcp = 1;
d8b59efa 289 subflow_init_req(req, sk);
cec37a6e 290
7ea851d1
FW
291 dst = tcp_request_sock_ipv4_ops.route_req(sk, skb, fl, req);
292 if (!dst)
293 return NULL;
cec37a6e 294
d8b59efa 295 err = subflow_check_req(req, sk, skb);
3ecfbe3e
FW
296 if (err == 0)
297 return dst;
cec37a6e 298
3ecfbe3e
FW
299 dst_release(dst);
300 if (!req->syncookie)
301 tcp_request_sock_ops.send_reset(sk, skb);
302 return NULL;
cec37a6e
PK
303}
304
305#if IS_ENABLED(CONFIG_MPTCP_IPV6)
7ea851d1
FW
306static struct dst_entry *subflow_v6_route_req(const struct sock *sk,
307 struct sk_buff *skb,
308 struct flowi *fl,
309 struct request_sock *req)
cec37a6e 310{
7ea851d1 311 struct dst_entry *dst;
3ecfbe3e 312 int err;
7ea851d1 313
cec37a6e 314 tcp_rsk(req)->is_mptcp = 1;
d8b59efa 315 subflow_init_req(req, sk);
cec37a6e 316
7ea851d1
FW
317 dst = tcp_request_sock_ipv6_ops.route_req(sk, skb, fl, req);
318 if (!dst)
319 return NULL;
cec37a6e 320
d8b59efa 321 err = subflow_check_req(req, sk, skb);
3ecfbe3e
FW
322 if (err == 0)
323 return dst;
324
325 dst_release(dst);
326 if (!req->syncookie)
327 tcp6_request_sock_ops.send_reset(sk, skb);
328 return NULL;
cec37a6e
PK
329}
330#endif
331
ec3edaa7
PK
332/* validate received truncated hmac and create hmac for third ACK */
333static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow)
334{
bd697222 335 u8 hmac[SHA256_DIGEST_SIZE];
ec3edaa7
PK
336 u64 thmac;
337
338 subflow_generate_hmac(subflow->remote_key, subflow->local_key,
339 subflow->remote_nonce, subflow->local_nonce,
340 hmac);
341
342 thmac = get_unaligned_be64(hmac);
343 pr_debug("subflow=%p, token=%u, thmac=%llu, subflow->thmac=%llu\n",
344 subflow, subflow->token,
345 (unsigned long long)thmac,
346 (unsigned long long)subflow->thmac);
347
348 return thmac == subflow->thmac;
349}
350
d5824847
PA
351void mptcp_subflow_reset(struct sock *ssk)
352{
0e4f35d7
PA
353 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
354 struct sock *sk = subflow->conn;
355
ab82e996
FW
356 /* must hold: tcp_done() could drop last reference on parent */
357 sock_hold(sk);
358
d5824847
PA
359 tcp_set_state(ssk, TCP_CLOSE);
360 tcp_send_active_reset(ssk, GFP_ATOMIC);
361 tcp_done(ssk);
0e4f35d7
PA
362 if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags) &&
363 schedule_work(&mptcp_sk(sk)->work))
ab82e996
FW
364 return; /* worker will put sk for us */
365
366 sock_put(sk);
d5824847
PA
367}
368
5bc56388
GT
369static bool subflow_use_different_dport(struct mptcp_sock *msk, const struct sock *sk)
370{
371 return inet_sk(sk)->inet_dport != inet_sk((struct sock *)msk)->inet_dport;
372}
373
cec37a6e
PK
374static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
375{
376 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
cfde141e 377 struct mptcp_options_received mp_opt;
c3c123d1 378 struct sock *parent = subflow->conn;
cec37a6e
PK
379
380 subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);
381
1200832c 382 if (inet_sk_state_load(parent) == TCP_SYN_SENT) {
c3c123d1
DC
383 inet_sk_state_store(parent, TCP_ESTABLISHED);
384 parent->sk_state_change(parent);
385 }
386
263e1201
PA
387 /* be sure no special action on any packet other than syn-ack */
388 if (subflow->conn_finished)
389 return;
390
5cf92bba 391 mptcp_propagate_sndbuf(parent, sk);
b0977bb2 392 subflow->rel_write_seq = 1;
263e1201 393 subflow->conn_finished = 1;
e1ff9e82
DC
394 subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
395 pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset);
263e1201 396
cfde141e 397 mptcp_get_options(skb, &mp_opt);
fa25e815
PA
398 if (subflow->request_mptcp) {
399 if (!mp_opt.mp_capable) {
400 MPTCP_INC_STATS(sock_net(sk),
401 MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
402 mptcp_do_fallback(sk);
403 pr_fallback(mptcp_sk(subflow->conn));
404 goto fallback;
405 }
406
263e1201
PA
407 subflow->mp_capable = 1;
408 subflow->can_ack = 1;
cfde141e 409 subflow->remote_key = mp_opt.sndr_key;
263e1201
PA
410 pr_debug("subflow=%p, remote_key=%llu", subflow,
411 subflow->remote_key);
5695eb88 412 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK);
fa25e815
PA
413 mptcp_finish_connect(sk);
414 } else if (subflow->request_join) {
415 u8 hmac[SHA256_DIGEST_SIZE];
416
dc87efdb
FW
417 if (!mp_opt.mp_join) {
418 subflow->reset_reason = MPTCP_RST_EMPTCP;
fa25e815 419 goto do_reset;
dc87efdb 420 }
fa25e815 421
cfde141e
PA
422 subflow->thmac = mp_opt.thmac;
423 subflow->remote_nonce = mp_opt.nonce;
263e1201
PA
424 pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow,
425 subflow->thmac, subflow->remote_nonce);
263e1201 426
ec3edaa7 427 if (!subflow_thmac_valid(subflow)) {
fc518953 428 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC);
dc87efdb 429 subflow->reset_reason = MPTCP_RST_EMPTCP;
ec3edaa7
PK
430 goto do_reset;
431 }
432
433 subflow_generate_hmac(subflow->local_key, subflow->remote_key,
434 subflow->local_nonce,
435 subflow->remote_nonce,
bd697222 436 hmac);
bd697222 437 memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN);
ec3edaa7 438
ec3edaa7
PK
439 if (!mptcp_finish_join(sk))
440 goto do_reset;
441
fa25e815 442 subflow->mp_join = 1;
fc518953 443 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
5bc56388
GT
444
445 if (subflow_use_different_dport(mptcp_sk(parent), sk)) {
446 pr_debug("synack inet_dport=%d %d",
447 ntohs(inet_sk(sk)->inet_dport),
448 ntohs(inet_sk(parent)->inet_dport));
2fbdd9ea 449 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINPORTSYNACKRX);
5bc56388 450 }
fa25e815
PA
451 } else if (mptcp_check_fallback(sk)) {
452fallback:
453 mptcp_rcv_space_init(mptcp_sk(parent), sk);
cec37a6e 454 }
fa25e815
PA
455 return;
456
457do_reset:
dc87efdb 458 subflow->reset_transient = 0;
d5824847 459 mptcp_subflow_reset(sk);
cec37a6e
PK
460}
461
08b8d080
FW
462struct request_sock_ops mptcp_subflow_request_sock_ops;
463EXPORT_SYMBOL_GPL(mptcp_subflow_request_sock_ops);
cec37a6e
PK
464static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops;
465
466static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb)
467{
468 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
469
470 pr_debug("subflow=%p", subflow);
471
472 /* Never answer to SYNs sent to broadcast or multicast */
473 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
474 goto drop;
475
08b8d080 476 return tcp_conn_request(&mptcp_subflow_request_sock_ops,
cec37a6e
PK
477 &subflow_request_sock_ipv4_ops,
478 sk, skb);
479drop:
480 tcp_listendrop(sk);
481 return 0;
482}
483
484#if IS_ENABLED(CONFIG_MPTCP_IPV6)
485static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops;
486static struct inet_connection_sock_af_ops subflow_v6_specific;
487static struct inet_connection_sock_af_ops subflow_v6m_specific;
b19bc294 488static struct proto tcpv6_prot_override;
cec37a6e
PK
489
490static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb)
491{
492 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
493
494 pr_debug("subflow=%p", subflow);
495
496 if (skb->protocol == htons(ETH_P_IP))
497 return subflow_v4_conn_request(sk, skb);
498
499 if (!ipv6_unicast_destination(skb))
500 goto drop;
501
dcc32f4f
JK
502 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
503 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
504 return 0;
505 }
506
08b8d080 507 return tcp_conn_request(&mptcp_subflow_request_sock_ops,
cec37a6e
PK
508 &subflow_request_sock_ipv6_ops, sk, skb);
509
510drop:
511 tcp_listendrop(sk);
512 return 0; /* don't send reset */
513}
514#endif
515
f296234c
PK
516/* validate hmac received in third ACK */
517static bool subflow_hmac_valid(const struct request_sock *req,
cfde141e 518 const struct mptcp_options_received *mp_opt)
f296234c
PK
519{
520 const struct mptcp_subflow_request_sock *subflow_req;
bd697222 521 u8 hmac[SHA256_DIGEST_SIZE];
f296234c 522 struct mptcp_sock *msk;
f296234c
PK
523
524 subflow_req = mptcp_subflow_rsk(req);
8fd4de12 525 msk = subflow_req->msk;
f296234c
PK
526 if (!msk)
527 return false;
528
529 subflow_generate_hmac(msk->remote_key, msk->local_key,
530 subflow_req->remote_nonce,
531 subflow_req->local_nonce, hmac);
532
8fd4de12 533 return !crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN);
f296234c
PK
534}
535
df1036da
FW
536static void mptcp_sock_destruct(struct sock *sk)
537{
538 /* if new mptcp socket isn't accepted, it is free'd
539 * from the tcp listener sockets request queue, linked
540 * from req->sk. The tcp socket is released.
541 * This calls the ULP release function which will
542 * also remove the mptcp socket, via
543 * sock_put(ctx->conn).
544 *
7ee24926
PA
545 * Problem is that the mptcp socket will be in
546 * ESTABLISHED state and will not have the SOCK_DEAD flag.
df1036da
FW
547 * Both result in warnings from inet_sock_destruct.
548 */
578c18ef 549 if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) {
df1036da
FW
550 sk->sk_state = TCP_CLOSE;
551 WARN_ON_ONCE(sk->sk_socket);
552 sock_orphan(sk);
553 }
554
5c8c1640 555 mptcp_destroy_common(mptcp_sk(sk));
df1036da
FW
556 inet_sock_destruct(sk);
557}
558
9f5ca6a5
FW
559static void mptcp_force_close(struct sock *sk)
560{
561 inet_sk_state_store(sk, TCP_CLOSE);
562 sk_common_release(sk);
563}
564
4c8941de
PA
565static void subflow_ulp_fallback(struct sock *sk,
566 struct mptcp_subflow_context *old_ctx)
567{
568 struct inet_connection_sock *icsk = inet_csk(sk);
569
570 mptcp_subflow_tcp_fallback(sk, old_ctx);
571 icsk->icsk_ulp_ops = NULL;
572 rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
573 tcp_sk(sk)->is_mptcp = 0;
b19bc294
PA
574
575 mptcp_subflow_ops_undo_override(sk);
4c8941de
PA
576}
577
39884604
PA
578static void subflow_drop_ctx(struct sock *ssk)
579{
580 struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(ssk);
581
582 if (!ctx)
583 return;
584
585 subflow_ulp_fallback(ssk, ctx);
586 if (ctx->conn)
587 sock_put(ctx->conn);
588
589 kfree_rcu(ctx, rcu);
590}
591
b93df08c
PA
592void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
593 struct mptcp_options_received *mp_opt)
594{
595 struct mptcp_sock *msk = mptcp_sk(subflow->conn);
596
597 subflow->remote_key = mp_opt->sndr_key;
598 subflow->fully_established = 1;
599 subflow->can_ack = 1;
600 WRITE_ONCE(msk->fully_established, true);
601}
602
cec37a6e
PK
603static struct sock *subflow_syn_recv_sock(const struct sock *sk,
604 struct sk_buff *skb,
605 struct request_sock *req,
606 struct dst_entry *dst,
607 struct request_sock *req_unhash,
608 bool *own_req)
609{
610 struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk);
cc7972ea 611 struct mptcp_subflow_request_sock *subflow_req;
cfde141e 612 struct mptcp_options_received mp_opt;
9e365ff5 613 bool fallback, fallback_is_fatal;
58b09919 614 struct sock *new_msk = NULL;
cec37a6e
PK
615 struct sock *child;
616
617 pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
618
9e365ff5
PA
619 /* After child creation we must look for 'mp_capable' even when options
620 * are not parsed
cfde141e
PA
621 */
622 mp_opt.mp_capable = 0;
9e365ff5
PA
623
624 /* hopefully temporary handling for MP_JOIN+syncookie */
625 subflow_req = mptcp_subflow_rsk(req);
b7514694 626 fallback_is_fatal = tcp_rsk(req)->is_mptcp && subflow_req->mp_join;
9e365ff5
PA
627 fallback = !tcp_rsk(req)->is_mptcp;
628 if (fallback)
ae2dd716
FW
629 goto create_child;
630
d22f4988 631 /* if the sk is MP_CAPABLE, we try to fetch the client key */
cc7972ea 632 if (subflow_req->mp_capable) {
06f9a435
PA
633 /* we can receive and accept an in-window, out-of-order pkt,
634 * which may not carry the MP_CAPABLE opt even on mptcp enabled
635 * paths: always try to extract the peer key, and fallback
636 * for packets missing it.
637 * Even OoO DSS packets coming legitly after dropped or
638 * reordered MPC will cause fallback, but we don't have other
639 * options.
640 */
cfde141e
PA
641 mptcp_get_options(skb, &mp_opt);
642 if (!mp_opt.mp_capable) {
4c8941de 643 fallback = true;
58b09919 644 goto create_child;
d22f4988 645 }
58b09919 646
cfde141e 647 new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req);
58b09919 648 if (!new_msk)
4c8941de 649 fallback = true;
f296234c 650 } else if (subflow_req->mp_join) {
cfde141e 651 mptcp_get_options(skb, &mp_opt);
d3ab7885
PA
652 if (!mp_opt.mp_join || !subflow_hmac_valid(req, &mp_opt) ||
653 !mptcp_can_accept_new_subflow(subflow_req->msk)) {
fc518953 654 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
9e365ff5 655 fallback = true;
fc518953 656 }
cc7972ea 657 }
cec37a6e 658
d22f4988 659create_child:
cec37a6e
PK
660 child = listener->icsk_af_ops->syn_recv_sock(sk, skb, req, dst,
661 req_unhash, own_req);
662
663 if (child && *own_req) {
79c0949e
PK
664 struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(child);
665
90bf4513
PA
666 tcp_rsk(req)->drop_req = false;
667
4c8941de
PA
668 /* we need to fallback on ctx allocation failure and on pre-reqs
669 * checking above. In the latter scenario we additionally need
670 * to reset the context to non MPTCP status.
79c0949e 671 */
4c8941de 672 if (!ctx || fallback) {
dc87efdb
FW
673 if (fallback_is_fatal) {
674 subflow_add_reset_reason(skb, MPTCP_RST_EMPTCP);
729cd643 675 goto dispose_child;
dc87efdb 676 }
4c8941de 677
39884604 678 subflow_drop_ctx(child);
58b09919 679 goto out;
f296234c 680 }
79c0949e 681
df00b087
FW
682 /* ssk inherits options of listener sk */
683 ctx->setsockopt_seq = listener->setsockopt_seq;
684
79c0949e 685 if (ctx->mp_capable) {
b93df08c
PA
686 /* this can't race with mptcp_close(), as the msk is
687 * not yet exposted to user-space
688 */
689 inet_sk_state_store((void *)new_msk, TCP_ESTABLISHED);
690
5b950ff4
PA
691 /* record the newly created socket as the first msk
692 * subflow, but don't link it yet into conn_list
693 */
0397c6d8
PA
694 WRITE_ONCE(mptcp_sk(new_msk)->first, child);
695
58b09919
PA
696 /* new mpc subflow takes ownership of the newly
697 * created mptcp socket
698 */
df1036da 699 new_msk->sk_destruct = mptcp_sock_destruct;
df00b087 700 mptcp_sk(new_msk)->setsockopt_seq = ctx->setsockopt_seq;
6c714f1b 701 mptcp_pm_new_connection(mptcp_sk(new_msk), child, 1);
2c5ebd00 702 mptcp_token_accept(subflow_req, mptcp_sk(new_msk));
58b09919
PA
703 ctx->conn = new_msk;
704 new_msk = NULL;
fca5c82c
PA
705
706 /* with OoO packets we can reach here without ingress
707 * mpc option
708 */
b93df08c
PA
709 if (mp_opt.mp_capable)
710 mptcp_subflow_fully_established(ctx, &mp_opt);
f296234c
PK
711 } else if (ctx->mp_join) {
712 struct mptcp_sock *owner;
713
8fd4de12 714 owner = subflow_req->msk;
dc87efdb
FW
715 if (!owner) {
716 subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT);
729cd643 717 goto dispose_child;
dc87efdb 718 }
f296234c 719
8fd4de12
PA
720 /* move the msk reference ownership to the subflow */
721 subflow_req->msk = NULL;
f296234c 722 ctx->conn = (struct sock *)owner;
5bc56388
GT
723
724 if (subflow_use_different_sport(owner, sk)) {
725 pr_debug("ack inet_sport=%d %d",
726 ntohs(inet_sk(sk)->inet_sport),
727 ntohs(inet_sk((struct sock *)owner)->inet_sport));
2fbdd9ea
GT
728 if (!mptcp_pm_sport_in_anno_list(owner, sk)) {
729 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTACKRX);
9238e900 730 goto dispose_child;
2fbdd9ea
GT
731 }
732 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINPORTACKRX);
5bc56388 733 }
9238e900
GT
734
735 if (!mptcp_finish_join(child))
736 goto dispose_child;
737
738 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX);
739 tcp_rsk(req)->drop_req = true;
cec37a6e
PK
740 }
741 }
742
58b09919
PA
743out:
744 /* dispose of the left over mptcp master, if any */
745 if (unlikely(new_msk))
9f5ca6a5 746 mptcp_force_close(new_msk);
4c8941de
PA
747
748 /* check for expected invariant - should never trigger, just help
749 * catching eariler subtle bugs
750 */
ac2b47fb 751 WARN_ON_ONCE(child && *own_req && tcp_sk(child)->is_mptcp &&
4c8941de
PA
752 (!mptcp_subflow_ctx(child) ||
753 !mptcp_subflow_ctx(child)->conn));
cec37a6e 754 return child;
f296234c 755
729cd643 756dispose_child:
39884604 757 subflow_drop_ctx(child);
729cd643 758 tcp_rsk(req)->drop_req = true;
729cd643 759 inet_csk_prepare_for_destroy_sock(child);
f296234c 760 tcp_done(child);
97e61751 761 req->rsk_ops->send_reset(sk, skb);
729cd643
PA
762
763 /* The last child reference will be released by the caller */
764 return child;
cec37a6e
PK
765}
766
767static struct inet_connection_sock_af_ops subflow_specific;
b19bc294 768static struct proto tcp_prot_override;
cec37a6e 769
648ef4b8
MM
770enum mapping_status {
771 MAPPING_OK,
772 MAPPING_INVALID,
773 MAPPING_EMPTY,
e1ff9e82
DC
774 MAPPING_DATA_FIN,
775 MAPPING_DUMMY
648ef4b8
MM
776};
777
778static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq)
779{
780 if ((u32)seq == (u32)old_seq)
781 return old_seq;
782
783 /* Assume map covers data not mapped yet. */
784 return seq | ((old_seq + old_data_len + 1) & GENMASK_ULL(63, 32));
785}
786
787static void warn_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
788{
789 WARN_ONCE(1, "Bad mapping: ssn=%d map_seq=%d map_data_len=%d",
790 ssn, subflow->map_subflow_seq, subflow->map_data_len);
791}
792
793static bool skb_is_fully_mapped(struct sock *ssk, struct sk_buff *skb)
794{
795 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
796 unsigned int skb_consumed;
797
798 skb_consumed = tcp_sk(ssk)->copied_seq - TCP_SKB_CB(skb)->seq;
799 if (WARN_ON_ONCE(skb_consumed >= skb->len))
800 return true;
801
802 return skb->len - skb_consumed <= subflow->map_data_len -
803 mptcp_subflow_get_map_offset(subflow);
804}
805
806static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
807{
808 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
809 u32 ssn = tcp_sk(ssk)->copied_seq - subflow->ssn_offset;
810
811 if (unlikely(before(ssn, subflow->map_subflow_seq))) {
812 /* Mapping covers data later in the subflow stream,
813 * currently unsupported.
814 */
815 warn_bad_map(subflow, ssn);
816 return false;
817 }
818 if (unlikely(!before(ssn, subflow->map_subflow_seq +
819 subflow->map_data_len))) {
820 /* Mapping does covers past subflow data, invalid */
821 warn_bad_map(subflow, ssn + skb->len);
822 return false;
823 }
824 return true;
825}
826
43b54c6e
MM
827static enum mapping_status get_mapping_status(struct sock *ssk,
828 struct mptcp_sock *msk)
648ef4b8
MM
829{
830 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
831 struct mptcp_ext *mpext;
832 struct sk_buff *skb;
833 u16 data_len;
834 u64 map_seq;
835
836 skb = skb_peek(&ssk->sk_receive_queue);
837 if (!skb)
838 return MAPPING_EMPTY;
839
e1ff9e82
DC
840 if (mptcp_check_fallback(ssk))
841 return MAPPING_DUMMY;
842
648ef4b8
MM
843 mpext = mptcp_get_ext(skb);
844 if (!mpext || !mpext->use_map) {
845 if (!subflow->map_valid && !skb->len) {
846 /* the TCP stack deliver 0 len FIN pkt to the receive
847 * queue, that is the only 0len pkts ever expected here,
848 * and we can admit no mapping only for 0 len pkts
849 */
850 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
851 WARN_ONCE(1, "0len seq %d:%d flags %x",
852 TCP_SKB_CB(skb)->seq,
853 TCP_SKB_CB(skb)->end_seq,
854 TCP_SKB_CB(skb)->tcp_flags);
855 sk_eat_skb(ssk, skb);
856 return MAPPING_EMPTY;
857 }
858
859 if (!subflow->map_valid)
860 return MAPPING_INVALID;
861
862 goto validate_seq;
863 }
864
0918e34b 865 trace_get_mapping_status(mpext);
648ef4b8
MM
866
867 data_len = mpext->data_len;
868 if (data_len == 0) {
fc518953 869 MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
648ef4b8
MM
870 return MAPPING_INVALID;
871 }
872
873 if (mpext->data_fin == 1) {
874 if (data_len == 1) {
1a49b2c2
MM
875 bool updated = mptcp_update_rcv_data_fin(msk, mpext->data_seq,
876 mpext->dsn64);
43b54c6e 877 pr_debug("DATA_FIN with no payload seq=%llu", mpext->data_seq);
648ef4b8
MM
878 if (subflow->map_valid) {
879 /* A DATA_FIN might arrive in a DSS
880 * option before the previous mapping
881 * has been fully consumed. Continue
882 * handling the existing mapping.
883 */
884 skb_ext_del(skb, SKB_EXT_MPTCP);
885 return MAPPING_OK;
886 } else {
ef59b195
MM
887 if (updated && schedule_work(&msk->work))
888 sock_hold((struct sock *)msk);
889
648ef4b8
MM
890 return MAPPING_DATA_FIN;
891 }
43b54c6e 892 } else {
017512a0 893 u64 data_fin_seq = mpext->data_seq + data_len - 1;
1a49b2c2
MM
894
895 /* If mpext->data_seq is a 32-bit value, data_fin_seq
896 * must also be limited to 32 bits.
897 */
898 if (!mpext->dsn64)
899 data_fin_seq &= GENMASK_ULL(31, 0);
900
901 mptcp_update_rcv_data_fin(msk, data_fin_seq, mpext->dsn64);
902 pr_debug("DATA_FIN with mapping seq=%llu dsn64=%d",
903 data_fin_seq, mpext->dsn64);
648ef4b8
MM
904 }
905
906 /* Adjust for DATA_FIN using 1 byte of sequence space */
907 data_len--;
908 }
909
910 if (!mpext->dsn64) {
911 map_seq = expand_seq(subflow->map_seq, subflow->map_data_len,
912 mpext->data_seq);
913 pr_debug("expanded seq=%llu", subflow->map_seq);
914 } else {
915 map_seq = mpext->data_seq;
916 }
37198e93 917 WRITE_ONCE(mptcp_sk(subflow->conn)->use_64bit_ack, !!mpext->dsn64);
648ef4b8
MM
918
919 if (subflow->map_valid) {
920 /* Allow replacing only with an identical map */
921 if (subflow->map_seq == map_seq &&
922 subflow->map_subflow_seq == mpext->subflow_seq &&
923 subflow->map_data_len == data_len) {
924 skb_ext_del(skb, SKB_EXT_MPTCP);
925 return MAPPING_OK;
926 }
927
928 /* If this skb data are fully covered by the current mapping,
929 * the new map would need caching, which is not supported
930 */
fc518953
FW
931 if (skb_is_fully_mapped(ssk, skb)) {
932 MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSNOMATCH);
648ef4b8 933 return MAPPING_INVALID;
fc518953 934 }
648ef4b8
MM
935
936 /* will validate the next map after consuming the current one */
937 return MAPPING_OK;
938 }
939
940 subflow->map_seq = map_seq;
941 subflow->map_subflow_seq = mpext->subflow_seq;
942 subflow->map_data_len = data_len;
943 subflow->map_valid = 1;
d22f4988 944 subflow->mpc_map = mpext->mpc_map;
648ef4b8
MM
945 pr_debug("new map seq=%llu subflow_seq=%u data_len=%u",
946 subflow->map_seq, subflow->map_subflow_seq,
947 subflow->map_data_len);
948
949validate_seq:
950 /* we revalidate valid mapping on new skb, because we must ensure
951 * the current skb is completely covered by the available mapping
952 */
953 if (!validate_mapping(ssk, skb))
954 return MAPPING_INVALID;
955
956 skb_ext_del(skb, SKB_EXT_MPTCP);
957 return MAPPING_OK;
958}
959
04e4cd4f 960static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb,
1d39cd8c 961 u64 limit)
6719331c
PA
962{
963 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
04e4cd4f
PA
964 bool fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
965 u32 incr;
966
967 incr = limit >= skb->len ? skb->len + fin : limit;
968
969 pr_debug("discarding=%d len=%d seq=%d", incr, skb->len,
970 subflow->map_subflow_seq);
06242e44 971 MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DUPDATA);
04e4cd4f
PA
972 tcp_sk(ssk)->copied_seq += incr;
973 if (!before(tcp_sk(ssk)->copied_seq, TCP_SKB_CB(skb)->end_seq))
974 sk_eat_skb(ssk, skb);
975 if (mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len)
976 subflow->map_valid = 0;
6719331c
PA
977}
978
40947e13
FW
979/* sched mptcp worker to remove the subflow if no more data is pending */
980static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ssk)
981{
982 struct sock *sk = (struct sock *)msk;
983
984 if (likely(ssk->sk_state != TCP_CLOSE))
985 return;
986
987 if (skb_queue_empty(&ssk->sk_receive_queue) &&
988 !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) {
989 sock_hold(sk);
990 if (!schedule_work(&msk->work))
991 sock_put(sk);
992 }
993}
994
648ef4b8
MM
995static bool subflow_check_data_avail(struct sock *ssk)
996{
997 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
998 enum mapping_status status;
999 struct mptcp_sock *msk;
1000 struct sk_buff *skb;
1001
47bebdf3
PA
1002 if (!skb_peek(&ssk->sk_receive_queue))
1003 subflow->data_avail = 0;
648ef4b8
MM
1004 if (subflow->data_avail)
1005 return true;
1006
648ef4b8
MM
1007 msk = mptcp_sk(subflow->conn);
1008 for (;;) {
648ef4b8
MM
1009 u64 ack_seq;
1010 u64 old_ack;
1011
43b54c6e 1012 status = get_mapping_status(ssk, msk);
d96a838a 1013 trace_subflow_check_data_avail(status, skb_peek(&ssk->sk_receive_queue));
648ef4b8
MM
1014 if (status == MAPPING_INVALID) {
1015 ssk->sk_err = EBADMSG;
1016 goto fatal;
1017 }
e1ff9e82
DC
1018 if (status == MAPPING_DUMMY) {
1019 __mptcp_do_fallback(msk);
1020 skb = skb_peek(&ssk->sk_receive_queue);
1021 subflow->map_valid = 1;
1022 subflow->map_seq = READ_ONCE(msk->ack_seq);
1023 subflow->map_data_len = skb->len;
1024 subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq -
1025 subflow->ssn_offset;
6719331c 1026 subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
e1ff9e82
DC
1027 return true;
1028 }
648ef4b8
MM
1029
1030 if (status != MAPPING_OK)
40947e13 1031 goto no_data;
648ef4b8
MM
1032
1033 skb = skb_peek(&ssk->sk_receive_queue);
1034 if (WARN_ON_ONCE(!skb))
40947e13 1035 goto no_data;
648ef4b8 1036
d22f4988
CP
1037 /* if msk lacks the remote key, this subflow must provide an
1038 * MP_CAPABLE-based mapping
1039 */
1040 if (unlikely(!READ_ONCE(msk->can_ack))) {
1041 if (!subflow->mpc_map) {
1042 ssk->sk_err = EBADMSG;
1043 goto fatal;
1044 }
1045 WRITE_ONCE(msk->remote_key, subflow->remote_key);
1046 WRITE_ONCE(msk->ack_seq, subflow->map_seq);
1047 WRITE_ONCE(msk->can_ack, true);
1048 }
1049
648ef4b8
MM
1050 old_ack = READ_ONCE(msk->ack_seq);
1051 ack_seq = mptcp_subflow_get_mapped_dsn(subflow);
1052 pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack,
1053 ack_seq);
47bebdf3 1054 if (ack_seq == old_ack) {
6719331c
PA
1055 subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
1056 break;
1057 } else if (after64(ack_seq, old_ack)) {
1058 subflow->data_avail = MPTCP_SUBFLOW_OOO_DATA;
648ef4b8 1059 break;
47bebdf3 1060 }
648ef4b8
MM
1061
1062 /* only accept in-sequence mapping. Old values are spurious
6719331c 1063 * retransmission
648ef4b8 1064 */
04e4cd4f 1065 mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq);
648ef4b8
MM
1066 }
1067 return true;
1068
40947e13
FW
1069no_data:
1070 subflow_sched_work_if_closed(msk, ssk);
1071 return false;
648ef4b8
MM
1072fatal:
1073 /* fatal protocol error, close the socket */
1074 /* This barrier is coupled with smp_rmb() in tcp_poll() */
1075 smp_wmb();
1076 ssk->sk_error_report(ssk);
1077 tcp_set_state(ssk, TCP_CLOSE);
dc87efdb
FW
1078 subflow->reset_transient = 0;
1079 subflow->reset_reason = MPTCP_RST_EMPTCP;
648ef4b8 1080 tcp_send_active_reset(ssk, GFP_ATOMIC);
47bebdf3 1081 subflow->data_avail = 0;
648ef4b8
MM
1082 return false;
1083}
1084
1085bool mptcp_subflow_data_available(struct sock *sk)
1086{
1087 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
648ef4b8
MM
1088
1089 /* check if current mapping is still valid */
1090 if (subflow->map_valid &&
1091 mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) {
1092 subflow->map_valid = 0;
1093 subflow->data_avail = 0;
1094
1095 pr_debug("Done with mapping: seq=%u data_len=%u",
1096 subflow->map_subflow_seq,
1097 subflow->map_data_len);
1098 }
1099
47bebdf3 1100 return subflow_check_data_avail(sk);
648ef4b8
MM
1101}
1102
071c8ed6
FW
1103/* If ssk has an mptcp parent socket, use the mptcp rcvbuf occupancy,
1104 * not the ssk one.
1105 *
1106 * In mptcp, rwin is about the mptcp-level connection data.
1107 *
1108 * Data that is still on the ssk rx queue can thus be ignored,
55320b82 1109 * as far as mptcp peer is concerned that data is still inflight.
071c8ed6
FW
1110 * DSS ACK is updated when skb is moved to the mptcp rx queue.
1111 */
1112void mptcp_space(const struct sock *ssk, int *space, int *full_space)
1113{
1114 const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
1115 const struct sock *sk = subflow->conn;
1116
ea4ca586 1117 *space = __mptcp_space(sk);
071c8ed6
FW
1118 *full_space = tcp_full_space(sk);
1119}
1120
648ef4b8
MM
1121static void subflow_data_ready(struct sock *sk)
1122{
1123 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
8c728940 1124 u16 state = 1 << inet_sk_state_load(sk);
648ef4b8 1125 struct sock *parent = subflow->conn;
e1ff9e82 1126 struct mptcp_sock *msk;
648ef4b8 1127
e1ff9e82 1128 msk = mptcp_sk(parent);
8c728940 1129 if (state & TCPF_LISTEN) {
52557dbc
PA
1130 /* MPJ subflow are removed from accept queue before reaching here,
1131 * avoid stray wakeups
1132 */
1133 if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue))
1134 return;
1135
e1ff9e82 1136 set_bit(MPTCP_DATA_READY, &msk->flags);
dc093db5 1137 parent->sk_data_ready(parent);
648ef4b8
MM
1138 return;
1139 }
1140
e1ff9e82 1141 WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable &&
8c728940 1142 !subflow->mp_join && !(state & TCPF_CLOSE));
e1ff9e82 1143
101f6f85 1144 if (mptcp_subflow_data_available(sk))
2e52213c 1145 mptcp_data_ready(parent, sk);
648ef4b8
MM
1146}
1147
6e628cd3 1148static void subflow_write_space(struct sock *ssk)
648ef4b8 1149{
5cf92bba
PA
1150 struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
1151
1152 mptcp_propagate_sndbuf(sk, ssk);
1153 mptcp_write_space(sk);
648ef4b8
MM
1154}
1155
15cc1045
PA
1156void __mptcp_error_report(struct sock *sk)
1157{
1158 struct mptcp_subflow_context *subflow;
1159 struct mptcp_sock *msk = mptcp_sk(sk);
1160
1161 mptcp_for_each_subflow(msk, subflow) {
1162 struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
1163 int err = sock_error(ssk);
1164
1165 if (!err)
1166 continue;
1167
1168 /* only propagate errors on fallen-back sockets or
1169 * on MPC connect
1170 */
1171 if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk))
1172 continue;
1173
1174 inet_sk_state_store(sk, inet_sk_state_load(ssk));
1175 sk->sk_err = -err;
1176
1177 /* This barrier is coupled with smp_rmb() in mptcp_poll() */
1178 smp_wmb();
1179 sk->sk_error_report(sk);
1180 break;
1181 }
1182}
1183
1184static void subflow_error_report(struct sock *ssk)
1185{
1186 struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
1187
1188 mptcp_data_lock(sk);
1189 if (!sock_owned_by_user(sk))
1190 __mptcp_error_report(sk);
1191 else
1192 set_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags);
1193 mptcp_data_unlock(sk);
1194}
1195
cec37a6e
PK
1196static struct inet_connection_sock_af_ops *
1197subflow_default_af_ops(struct sock *sk)
1198{
1199#if IS_ENABLED(CONFIG_MPTCP_IPV6)
1200 if (sk->sk_family == AF_INET6)
1201 return &subflow_v6_specific;
1202#endif
1203 return &subflow_specific;
1204}
1205
cec37a6e 1206#if IS_ENABLED(CONFIG_MPTCP_IPV6)
31484d56
GU
1207void mptcpv6_handle_mapped(struct sock *sk, bool mapped)
1208{
cec37a6e
PK
1209 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
1210 struct inet_connection_sock *icsk = inet_csk(sk);
1211 struct inet_connection_sock_af_ops *target;
1212
1213 target = mapped ? &subflow_v6m_specific : subflow_default_af_ops(sk);
1214
1215 pr_debug("subflow=%p family=%d ops=%p target=%p mapped=%d",
edc7e489 1216 subflow, sk->sk_family, icsk->icsk_af_ops, target, mapped);
cec37a6e
PK
1217
1218 if (likely(icsk->icsk_af_ops == target))
1219 return;
1220
1221 subflow->icsk_af_ops = icsk->icsk_af_ops;
1222 icsk->icsk_af_ops = target;
cec37a6e 1223}
31484d56 1224#endif
cec37a6e 1225
1729cf18
GT
1226void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
1227 struct sockaddr_storage *addr,
1228 unsigned short family)
ec3edaa7
PK
1229{
1230 memset(addr, 0, sizeof(*addr));
50a13bc3 1231 addr->ss_family = family;
ec3edaa7
PK
1232 if (addr->ss_family == AF_INET) {
1233 struct sockaddr_in *in_addr = (struct sockaddr_in *)addr;
1234
50a13bc3
MB
1235 if (info->family == AF_INET)
1236 in_addr->sin_addr = info->addr;
1237#if IS_ENABLED(CONFIG_MPTCP_IPV6)
1238 else if (ipv6_addr_v4mapped(&info->addr6))
1239 in_addr->sin_addr.s_addr = info->addr6.s6_addr32[3];
1240#endif
ec3edaa7
PK
1241 in_addr->sin_port = info->port;
1242 }
1243#if IS_ENABLED(CONFIG_MPTCP_IPV6)
1244 else if (addr->ss_family == AF_INET6) {
1245 struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)addr;
1246
50a13bc3
MB
1247 if (info->family == AF_INET)
1248 ipv6_addr_set_v4mapped(info->addr.s_addr,
1249 &in6_addr->sin6_addr);
1250 else
1251 in6_addr->sin6_addr = info->addr6;
ec3edaa7
PK
1252 in6_addr->sin6_port = info->port;
1253 }
1254#endif
1255}
1256
ef0da3b8 1257int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
daa83ab0
GT
1258 const struct mptcp_addr_info *remote,
1259 u8 flags, int ifindex)
ec3edaa7
PK
1260{
1261 struct mptcp_sock *msk = mptcp_sk(sk);
1262 struct mptcp_subflow_context *subflow;
1263 struct sockaddr_storage addr;
2ff0e566 1264 int remote_id = remote->id;
6bad912b 1265 int local_id = loc->id;
ec3edaa7 1266 struct socket *sf;
6bad912b 1267 struct sock *ssk;
ec3edaa7
PK
1268 u32 remote_token;
1269 int addrlen;
1270 int err;
1271
b93df08c 1272 if (!mptcp_is_fully_established(sk))
ec3edaa7
PK
1273 return -ENOTCONN;
1274
1275 err = mptcp_subflow_create_socket(sk, &sf);
1276 if (err)
1277 return err;
1278
6bad912b
PA
1279 ssk = sf->sk;
1280 subflow = mptcp_subflow_ctx(ssk);
1281 do {
1282 get_random_bytes(&subflow->local_nonce, sizeof(u32));
1283 } while (!subflow->local_nonce);
1284
1285 if (!local_id) {
1286 err = mptcp_pm_get_local_id(msk, (struct sock_common *)ssk);
1287 if (err < 0)
1288 goto failed;
1289
1290 local_id = err;
1291 }
1292
ec3edaa7
PK
1293 subflow->remote_key = msk->remote_key;
1294 subflow->local_key = msk->local_key;
1295 subflow->token = msk->token;
50a13bc3 1296 mptcp_info2sockaddr(loc, &addr, ssk->sk_family);
ec3edaa7
PK
1297
1298 addrlen = sizeof(struct sockaddr_in);
1299#if IS_ENABLED(CONFIG_MPTCP_IPV6)
50a13bc3 1300 if (addr.ss_family == AF_INET6)
ec3edaa7
PK
1301 addrlen = sizeof(struct sockaddr_in6);
1302#endif
daa83ab0 1303 ssk->sk_bound_dev_if = ifindex;
ec3edaa7
PK
1304 err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen);
1305 if (err)
1306 goto failed;
1307
1308 mptcp_crypto_key_sha(subflow->remote_key, &remote_token, NULL);
2ff0e566
GT
1309 pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d", msk,
1310 remote_token, local_id, remote_id);
ec3edaa7 1311 subflow->remote_token = remote_token;
6bad912b 1312 subflow->local_id = local_id;
2ff0e566 1313 subflow->remote_id = remote_id;
ec3edaa7 1314 subflow->request_join = 1;
daa83ab0 1315 subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP);
50a13bc3 1316 mptcp_info2sockaddr(remote, &addr, ssk->sk_family);
ec3edaa7 1317
5b950ff4 1318 mptcp_add_pending_subflow(msk, subflow);
78962489 1319 mptcp_sockopt_sync(msk, ssk);
ec3edaa7
PK
1320 err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
1321 if (err && err != -EINPROGRESS)
5b950ff4 1322 goto failed_unlink;
ec3edaa7 1323
866f26f2
PA
1324 /* discard the subflow socket */
1325 mptcp_sock_graft(ssk, sk->sk_socket);
1326 iput(SOCK_INODE(sf));
ec3edaa7
PK
1327 return err;
1328
5b950ff4 1329failed_unlink:
ec3edaa7 1330 spin_lock_bh(&msk->join_list_lock);
5b950ff4 1331 list_del(&subflow->node);
ec3edaa7 1332 spin_unlock_bh(&msk->join_list_lock);
f0715779 1333 sock_put(mptcp_subflow_tcp_sock(subflow));
ec3edaa7 1334
ec3edaa7 1335failed:
e16163b6 1336 subflow->disposable = 1;
ec3edaa7
PK
1337 sock_release(sf);
1338 return err;
1339}
1340
3764b0c5
NR
1341static void mptcp_attach_cgroup(struct sock *parent, struct sock *child)
1342{
1343#ifdef CONFIG_SOCK_CGROUP_DATA
1344 struct sock_cgroup_data *parent_skcd = &parent->sk_cgrp_data,
1345 *child_skcd = &child->sk_cgrp_data;
1346
1347 /* only the additional subflows created by kworkers have to be modified */
1348 if (cgroup_id(sock_cgroup_ptr(parent_skcd)) !=
1349 cgroup_id(sock_cgroup_ptr(child_skcd))) {
1350#ifdef CONFIG_MEMCG
1351 struct mem_cgroup *memcg = parent->sk_memcg;
1352
1353 mem_cgroup_sk_free(child);
1354 if (memcg && css_tryget(&memcg->css))
1355 child->sk_memcg = memcg;
1356#endif /* CONFIG_MEMCG */
1357
1358 cgroup_sk_free(child_skcd);
1359 *child_skcd = *parent_skcd;
1360 cgroup_sk_clone(child_skcd);
1361 }
1362#endif /* CONFIG_SOCK_CGROUP_DATA */
1363}
1364
b19bc294
PA
1365static void mptcp_subflow_ops_override(struct sock *ssk)
1366{
1367#if IS_ENABLED(CONFIG_MPTCP_IPV6)
1368 if (ssk->sk_prot == &tcpv6_prot)
1369 ssk->sk_prot = &tcpv6_prot_override;
1370 else
1371#endif
1372 ssk->sk_prot = &tcp_prot_override;
1373}
1374
1375static void mptcp_subflow_ops_undo_override(struct sock *ssk)
1376{
1377#if IS_ENABLED(CONFIG_MPTCP_IPV6)
1378 if (ssk->sk_prot == &tcpv6_prot_override)
1379 ssk->sk_prot = &tcpv6_prot;
1380 else
1381#endif
1382 ssk->sk_prot = &tcp_prot;
1383}
2303f994
PK
1384int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
1385{
1386 struct mptcp_subflow_context *subflow;
1387 struct net *net = sock_net(sk);
1388 struct socket *sf;
1389 int err;
1390
adf73410
PA
1391 /* un-accepted server sockets can reach here - on bad configuration
1392 * bail early to avoid greater trouble later
1393 */
1394 if (unlikely(!sk->sk_socket))
1395 return -EINVAL;
1396
cec37a6e
PK
1397 err = sock_create_kern(net, sk->sk_family, SOCK_STREAM, IPPROTO_TCP,
1398 &sf);
2303f994
PK
1399 if (err)
1400 return err;
1401
1402 lock_sock(sf->sk);
1403
3764b0c5
NR
1404 /* the newly created socket has to be in the same cgroup as its parent */
1405 mptcp_attach_cgroup(sk, sf->sk);
1406
2303f994
PK
1407 /* kernel sockets do not by default acquire net ref, but TCP timer
1408 * needs it.
1409 */
1410 sf->sk->sk_net_refcnt = 1;
1411 get_net(net);
f6f7d8cf 1412#ifdef CONFIG_PROC_FS
2303f994 1413 this_cpu_add(*net->core.sock_inuse, 1);
f6f7d8cf 1414#endif
2303f994
PK
1415 err = tcp_set_ulp(sf->sk, "mptcp");
1416 release_sock(sf->sk);
1417
b8ad540d
WY
1418 if (err) {
1419 sock_release(sf);
2303f994 1420 return err;
b8ad540d 1421 }
2303f994 1422
7d14b0d2
PA
1423 /* the newly created socket really belongs to the owning MPTCP master
1424 * socket, even if for additional subflows the allocation is performed
1425 * by a kernel workqueue. Adjust inode references, so that the
1426 * procfs/diag interaces really show this one belonging to the correct
1427 * user.
1428 */
1429 SOCK_INODE(sf)->i_ino = SOCK_INODE(sk->sk_socket)->i_ino;
1430 SOCK_INODE(sf)->i_uid = SOCK_INODE(sk->sk_socket)->i_uid;
1431 SOCK_INODE(sf)->i_gid = SOCK_INODE(sk->sk_socket)->i_gid;
1432
2303f994
PK
1433 subflow = mptcp_subflow_ctx(sf->sk);
1434 pr_debug("subflow=%p", subflow);
1435
1436 *new_sock = sf;
79c0949e 1437 sock_hold(sk);
2303f994 1438 subflow->conn = sk;
b19bc294 1439 mptcp_subflow_ops_override(sf->sk);
2303f994
PK
1440
1441 return 0;
1442}
1443
1444static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
1445 gfp_t priority)
1446{
1447 struct inet_connection_sock *icsk = inet_csk(sk);
1448 struct mptcp_subflow_context *ctx;
1449
1450 ctx = kzalloc(sizeof(*ctx), priority);
1451 if (!ctx)
1452 return NULL;
1453
1454 rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
cec37a6e 1455 INIT_LIST_HEAD(&ctx->node);
b19bc294 1456 INIT_LIST_HEAD(&ctx->delegated_node);
2303f994
PK
1457
1458 pr_debug("subflow=%p", ctx);
1459
1460 ctx->tcp_sock = sk;
1461
1462 return ctx;
1463}
1464
648ef4b8
MM
1465static void __subflow_state_change(struct sock *sk)
1466{
1467 struct socket_wq *wq;
1468
1469 rcu_read_lock();
1470 wq = rcu_dereference(sk->sk_wq);
1471 if (skwq_has_sleeper(wq))
1472 wake_up_interruptible_all(&wq->wait);
1473 rcu_read_unlock();
1474}
1475
1476static bool subflow_is_done(const struct sock *sk)
1477{
1478 return sk->sk_shutdown & RCV_SHUTDOWN || sk->sk_state == TCP_CLOSE;
1479}
1480
1481static void subflow_state_change(struct sock *sk)
1482{
1483 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
dc093db5 1484 struct sock *parent = subflow->conn;
648ef4b8
MM
1485
1486 __subflow_state_change(sk);
1487
8fd73804 1488 if (subflow_simultaneous_connect(sk)) {
5cf92bba 1489 mptcp_propagate_sndbuf(parent, sk);
8fd73804 1490 mptcp_do_fallback(sk);
a6b118fe 1491 mptcp_rcv_space_init(mptcp_sk(parent), sk);
8fd73804
DC
1492 pr_fallback(mptcp_sk(parent));
1493 subflow->conn_finished = 1;
1494 if (inet_sk_state_load(parent) == TCP_SYN_SENT) {
1495 inet_sk_state_store(parent, TCP_ESTABLISHED);
1496 parent->sk_state_change(parent);
1497 }
1498 }
1499
648ef4b8
MM
1500 /* as recvmsg() does not acquire the subflow socket for ssk selection
1501 * a fin packet carrying a DSS can be unnoticed if we don't trigger
1502 * the data available machinery here.
1503 */
e1ff9e82 1504 if (mptcp_subflow_data_available(sk))
2e52213c 1505 mptcp_data_ready(parent, sk);
648ef4b8 1506
40947e13
FW
1507 subflow_sched_work_if_closed(mptcp_sk(parent), sk);
1508
067a0b3d 1509 if (__mptcp_check_fallback(mptcp_sk(parent)) &&
648ef4b8
MM
1510 !subflow->rx_eof && subflow_is_done(sk)) {
1511 subflow->rx_eof = 1;
59832e24 1512 mptcp_subflow_eof(parent);
648ef4b8
MM
1513 }
1514}
1515
2303f994
PK
1516static int subflow_ulp_init(struct sock *sk)
1517{
cec37a6e 1518 struct inet_connection_sock *icsk = inet_csk(sk);
2303f994
PK
1519 struct mptcp_subflow_context *ctx;
1520 struct tcp_sock *tp = tcp_sk(sk);
1521 int err = 0;
1522
1523 /* disallow attaching ULP to a socket unless it has been
1524 * created with sock_create_kern()
1525 */
1526 if (!sk->sk_kern_sock) {
1527 err = -EOPNOTSUPP;
1528 goto out;
1529 }
1530
1531 ctx = subflow_create_ctx(sk, GFP_KERNEL);
1532 if (!ctx) {
1533 err = -ENOMEM;
1534 goto out;
1535 }
1536
1537 pr_debug("subflow=%p, family=%d", ctx, sk->sk_family);
1538
1539 tp->is_mptcp = 1;
cec37a6e
PK
1540 ctx->icsk_af_ops = icsk->icsk_af_ops;
1541 icsk->icsk_af_ops = subflow_default_af_ops(sk);
648ef4b8
MM
1542 ctx->tcp_data_ready = sk->sk_data_ready;
1543 ctx->tcp_state_change = sk->sk_state_change;
1544 ctx->tcp_write_space = sk->sk_write_space;
15cc1045 1545 ctx->tcp_error_report = sk->sk_error_report;
648ef4b8
MM
1546 sk->sk_data_ready = subflow_data_ready;
1547 sk->sk_write_space = subflow_write_space;
1548 sk->sk_state_change = subflow_state_change;
15cc1045 1549 sk->sk_error_report = subflow_error_report;
2303f994
PK
1550out:
1551 return err;
1552}
1553
e16163b6 1554static void subflow_ulp_release(struct sock *ssk)
2303f994 1555{
e16163b6
PA
1556 struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(ssk);
1557 bool release = true;
1558 struct sock *sk;
2303f994
PK
1559
1560 if (!ctx)
1561 return;
1562
e16163b6
PA
1563 sk = ctx->conn;
1564 if (sk) {
1565 /* if the msk has been orphaned, keep the ctx
0597d0f8
PA
1566 * alive, will be freed by __mptcp_close_ssk(),
1567 * when the subflow is still unaccepted
e16163b6 1568 */
0597d0f8 1569 release = ctx->disposable || list_empty(&ctx->node);
e16163b6
PA
1570 sock_put(sk);
1571 }
79c0949e 1572
b19bc294 1573 mptcp_subflow_ops_undo_override(ssk);
e16163b6
PA
1574 if (release)
1575 kfree_rcu(ctx, rcu);
2303f994
PK
1576}
1577
cec37a6e
PK
1578static void subflow_ulp_clone(const struct request_sock *req,
1579 struct sock *newsk,
1580 const gfp_t priority)
1581{
1582 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
1583 struct mptcp_subflow_context *old_ctx = mptcp_subflow_ctx(newsk);
1584 struct mptcp_subflow_context *new_ctx;
1585
f296234c
PK
1586 if (!tcp_rsk(req)->is_mptcp ||
1587 (!subflow_req->mp_capable && !subflow_req->mp_join)) {
648ef4b8 1588 subflow_ulp_fallback(newsk, old_ctx);
cec37a6e
PK
1589 return;
1590 }
1591
1592 new_ctx = subflow_create_ctx(newsk, priority);
edc7e489 1593 if (!new_ctx) {
648ef4b8 1594 subflow_ulp_fallback(newsk, old_ctx);
cec37a6e
PK
1595 return;
1596 }
1597
1598 new_ctx->conn_finished = 1;
1599 new_ctx->icsk_af_ops = old_ctx->icsk_af_ops;
648ef4b8
MM
1600 new_ctx->tcp_data_ready = old_ctx->tcp_data_ready;
1601 new_ctx->tcp_state_change = old_ctx->tcp_state_change;
1602 new_ctx->tcp_write_space = old_ctx->tcp_write_space;
15cc1045 1603 new_ctx->tcp_error_report = old_ctx->tcp_error_report;
58b09919
PA
1604 new_ctx->rel_write_seq = 1;
1605 new_ctx->tcp_sock = newsk;
1606
f296234c
PK
1607 if (subflow_req->mp_capable) {
1608 /* see comments in subflow_syn_recv_sock(), MPTCP connection
1609 * is fully established only after we receive the remote key
1610 */
1611 new_ctx->mp_capable = 1;
f296234c
PK
1612 new_ctx->local_key = subflow_req->local_key;
1613 new_ctx->token = subflow_req->token;
1614 new_ctx->ssn_offset = subflow_req->ssn_offset;
1615 new_ctx->idsn = subflow_req->idsn;
1616 } else if (subflow_req->mp_join) {
ec3edaa7 1617 new_ctx->ssn_offset = subflow_req->ssn_offset;
f296234c
PK
1618 new_ctx->mp_join = 1;
1619 new_ctx->fully_established = 1;
1620 new_ctx->backup = subflow_req->backup;
1621 new_ctx->local_id = subflow_req->local_id;
2ff0e566 1622 new_ctx->remote_id = subflow_req->remote_id;
f296234c
PK
1623 new_ctx->token = subflow_req->token;
1624 new_ctx->thmac = subflow_req->thmac;
1625 }
cec37a6e
PK
1626}
1627
b19bc294
PA
1628static void tcp_release_cb_override(struct sock *ssk)
1629{
1630 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
1631
1632 if (mptcp_subflow_has_delegated_action(subflow))
1633 mptcp_subflow_process_delegated(ssk);
1634
1635 tcp_release_cb(ssk);
1636}
1637
2303f994
PK
1638static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = {
1639 .name = "mptcp",
1640 .owner = THIS_MODULE,
1641 .init = subflow_ulp_init,
1642 .release = subflow_ulp_release,
cec37a6e 1643 .clone = subflow_ulp_clone,
2303f994
PK
1644};
1645
cec37a6e
PK
1646static int subflow_ops_init(struct request_sock_ops *subflow_ops)
1647{
1648 subflow_ops->obj_size = sizeof(struct mptcp_subflow_request_sock);
1649 subflow_ops->slab_name = "request_sock_subflow";
1650
1651 subflow_ops->slab = kmem_cache_create(subflow_ops->slab_name,
1652 subflow_ops->obj_size, 0,
1653 SLAB_ACCOUNT |
1654 SLAB_TYPESAFE_BY_RCU,
1655 NULL);
1656 if (!subflow_ops->slab)
1657 return -ENOMEM;
1658
79c0949e
PK
1659 subflow_ops->destructor = subflow_req_destructor;
1660
cec37a6e
PK
1661 return 0;
1662}
1663
d39dceca 1664void __init mptcp_subflow_init(void)
2303f994 1665{
08b8d080
FW
1666 mptcp_subflow_request_sock_ops = tcp_request_sock_ops;
1667 if (subflow_ops_init(&mptcp_subflow_request_sock_ops) != 0)
cec37a6e
PK
1668 panic("MPTCP: failed to init subflow request sock ops\n");
1669
1670 subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops;
7ea851d1 1671 subflow_request_sock_ipv4_ops.route_req = subflow_v4_route_req;
cec37a6e
PK
1672
1673 subflow_specific = ipv4_specific;
1674 subflow_specific.conn_request = subflow_v4_conn_request;
1675 subflow_specific.syn_recv_sock = subflow_syn_recv_sock;
1676 subflow_specific.sk_rx_dst_set = subflow_finish_connect;
1677
b19bc294
PA
1678 tcp_prot_override = tcp_prot;
1679 tcp_prot_override.release_cb = tcp_release_cb_override;
1680
cec37a6e
PK
1681#if IS_ENABLED(CONFIG_MPTCP_IPV6)
1682 subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops;
7ea851d1 1683 subflow_request_sock_ipv6_ops.route_req = subflow_v6_route_req;
cec37a6e
PK
1684
1685 subflow_v6_specific = ipv6_specific;
1686 subflow_v6_specific.conn_request = subflow_v6_conn_request;
1687 subflow_v6_specific.syn_recv_sock = subflow_syn_recv_sock;
1688 subflow_v6_specific.sk_rx_dst_set = subflow_finish_connect;
1689
1690 subflow_v6m_specific = subflow_v6_specific;
1691 subflow_v6m_specific.queue_xmit = ipv4_specific.queue_xmit;
1692 subflow_v6m_specific.send_check = ipv4_specific.send_check;
1693 subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len;
1694 subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced;
1695 subflow_v6m_specific.net_frag_header_len = 0;
b19bc294
PA
1696
1697 tcpv6_prot_override = tcpv6_prot;
1698 tcpv6_prot_override.release_cb = tcp_release_cb_override;
cec37a6e
PK
1699#endif
1700
5147dfb5
DC
1701 mptcp_diag_subflow_init(&subflow_ulp_ops);
1702
2303f994
PK
1703 if (tcp_register_ulp(&subflow_ulp_ops) != 0)
1704 panic("MPTCP: failed to register subflows to ULP\n");
1705}