sch_cake: Take advantage of skb->hash where appropriate
[linux-2.6-block.git] / net / mptcp / subflow.c
CommitLineData
2303f994
PK
1// SPDX-License-Identifier: GPL-2.0
2/* Multipath TCP
3 *
4 * Copyright (c) 2017 - 2019, Intel Corporation.
5 */
6
79c0949e
PK
7#define pr_fmt(fmt) "MPTCP: " fmt
8
2303f994
PK
9#include <linux/kernel.h>
10#include <linux/module.h>
11#include <linux/netdevice.h>
f296234c 12#include <crypto/algapi.h>
bd697222 13#include <crypto/sha.h>
2303f994
PK
14#include <net/sock.h>
15#include <net/inet_common.h>
16#include <net/inet_hashtables.h>
17#include <net/protocol.h>
18#include <net/tcp.h>
cec37a6e
PK
19#if IS_ENABLED(CONFIG_MPTCP_IPV6)
20#include <net/ip6_route.h>
21#endif
2303f994
PK
22#include <net/mptcp.h>
23#include "protocol.h"
fc518953
FW
24#include "mib.h"
25
26static void SUBFLOW_REQ_INC_STATS(struct request_sock *req,
27 enum linux_mptcp_mib_field field)
28{
29 MPTCP_INC_STATS(sock_net(req_to_sk(req)), field);
30}
2303f994 31
79c0949e
PK
32static int subflow_rebuild_header(struct sock *sk)
33{
34 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
ec3edaa7 35 int local_id, err = 0;
79c0949e
PK
36
37 if (subflow->request_mptcp && !subflow->token) {
38 pr_debug("subflow=%p", sk);
39 err = mptcp_token_new_connect(sk);
ec3edaa7
PK
40 } else if (subflow->request_join && !subflow->local_nonce) {
41 struct mptcp_sock *msk = (struct mptcp_sock *)subflow->conn;
42
43 pr_debug("subflow=%p", sk);
44
45 do {
46 get_random_bytes(&subflow->local_nonce, sizeof(u32));
47 } while (!subflow->local_nonce);
48
49 if (subflow->local_id)
50 goto out;
51
52 local_id = mptcp_pm_get_local_id(msk, (struct sock_common *)sk);
53 if (local_id < 0)
54 return -EINVAL;
55
56 subflow->local_id = local_id;
79c0949e
PK
57 }
58
ec3edaa7 59out:
79c0949e
PK
60 if (err)
61 return err;
62
63 return subflow->icsk_af_ops->rebuild_header(sk);
64}
65
66static void subflow_req_destructor(struct request_sock *req)
67{
68 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
69
70 pr_debug("subflow_req=%p", subflow_req);
71
72 if (subflow_req->mp_capable)
73 mptcp_token_destroy_request(subflow_req->token);
74 tcp_request_sock_ops.destructor(req);
75}
76
f296234c
PK
77static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2,
78 void *hmac)
79{
80 u8 msg[8];
81
82 put_unaligned_be32(nonce1, &msg[0]);
83 put_unaligned_be32(nonce2, &msg[4]);
84
85 mptcp_crypto_hmac_sha(key1, key2, msg, 8, hmac);
86}
87
88/* validate received token and create truncated hmac and nonce for SYN-ACK */
89static bool subflow_token_join_request(struct request_sock *req,
90 const struct sk_buff *skb)
91{
92 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
bd697222 93 u8 hmac[SHA256_DIGEST_SIZE];
f296234c
PK
94 struct mptcp_sock *msk;
95 int local_id;
96
97 msk = mptcp_token_get_sock(subflow_req->token);
98 if (!msk) {
fc518953 99 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINNOTOKEN);
f296234c
PK
100 return false;
101 }
102
103 local_id = mptcp_pm_get_local_id(msk, (struct sock_common *)req);
104 if (local_id < 0) {
105 sock_put((struct sock *)msk);
106 return false;
107 }
108 subflow_req->local_id = local_id;
109
110 get_random_bytes(&subflow_req->local_nonce, sizeof(u32));
111
112 subflow_generate_hmac(msk->local_key, msk->remote_key,
113 subflow_req->local_nonce,
114 subflow_req->remote_nonce, hmac);
115
116 subflow_req->thmac = get_unaligned_be64(hmac);
117
118 sock_put((struct sock *)msk);
119 return true;
120}
121
cec37a6e
PK
122static void subflow_init_req(struct request_sock *req,
123 const struct sock *sk_listener,
124 struct sk_buff *skb)
125{
126 struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
127 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
cfde141e 128 struct mptcp_options_received mp_opt;
cec37a6e
PK
129
130 pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
131
cfde141e 132 mptcp_get_options(skb, &mp_opt);
cec37a6e
PK
133
134 subflow_req->mp_capable = 0;
f296234c 135 subflow_req->mp_join = 0;
cec37a6e
PK
136
137#ifdef CONFIG_TCP_MD5SIG
138 /* no MPTCP if MD5SIG is enabled on this socket or we may run out of
139 * TCP option space.
140 */
141 if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info))
142 return;
143#endif
144
cfde141e 145 if (mp_opt.mp_capable) {
fc518953
FW
146 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE);
147
cfde141e 148 if (mp_opt.mp_join)
fc518953 149 return;
cfde141e 150 } else if (mp_opt.mp_join) {
fc518953
FW
151 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX);
152 }
f296234c 153
cfde141e 154 if (mp_opt.mp_capable && listener->request_mptcp) {
79c0949e
PK
155 int err;
156
157 err = mptcp_token_new_request(req);
158 if (err == 0)
159 subflow_req->mp_capable = 1;
160
648ef4b8 161 subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
cfde141e 162 } else if (mp_opt.mp_join && listener->request_mptcp) {
ec3edaa7 163 subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
f296234c 164 subflow_req->mp_join = 1;
cfde141e
PA
165 subflow_req->backup = mp_opt.backup;
166 subflow_req->remote_id = mp_opt.join_id;
167 subflow_req->token = mp_opt.token;
168 subflow_req->remote_nonce = mp_opt.nonce;
f296234c
PK
169 pr_debug("token=%u, remote_nonce=%u", subflow_req->token,
170 subflow_req->remote_nonce);
171 if (!subflow_token_join_request(req, skb)) {
172 subflow_req->mp_join = 0;
173 // @@ need to trigger RST
174 }
cec37a6e
PK
175 }
176}
177
178static void subflow_v4_init_req(struct request_sock *req,
179 const struct sock *sk_listener,
180 struct sk_buff *skb)
181{
182 tcp_rsk(req)->is_mptcp = 1;
183
184 tcp_request_sock_ipv4_ops.init_req(req, sk_listener, skb);
185
186 subflow_init_req(req, sk_listener, skb);
187}
188
189#if IS_ENABLED(CONFIG_MPTCP_IPV6)
190static void subflow_v6_init_req(struct request_sock *req,
191 const struct sock *sk_listener,
192 struct sk_buff *skb)
193{
194 tcp_rsk(req)->is_mptcp = 1;
195
196 tcp_request_sock_ipv6_ops.init_req(req, sk_listener, skb);
197
198 subflow_init_req(req, sk_listener, skb);
199}
200#endif
201
ec3edaa7
PK
202/* validate received truncated hmac and create hmac for third ACK */
203static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow)
204{
bd697222 205 u8 hmac[SHA256_DIGEST_SIZE];
ec3edaa7
PK
206 u64 thmac;
207
208 subflow_generate_hmac(subflow->remote_key, subflow->local_key,
209 subflow->remote_nonce, subflow->local_nonce,
210 hmac);
211
212 thmac = get_unaligned_be64(hmac);
213 pr_debug("subflow=%p, token=%u, thmac=%llu, subflow->thmac=%llu\n",
214 subflow, subflow->token,
215 (unsigned long long)thmac,
216 (unsigned long long)subflow->thmac);
217
218 return thmac == subflow->thmac;
219}
220
cec37a6e
PK
221static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
222{
223 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
cfde141e 224 struct mptcp_options_received mp_opt;
c3c123d1 225 struct sock *parent = subflow->conn;
263e1201 226 struct tcp_sock *tp = tcp_sk(sk);
cec37a6e
PK
227
228 subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);
229
1200832c 230 if (inet_sk_state_load(parent) == TCP_SYN_SENT) {
c3c123d1
DC
231 inet_sk_state_store(parent, TCP_ESTABLISHED);
232 parent->sk_state_change(parent);
233 }
234
263e1201
PA
235 /* be sure no special action on any packet other than syn-ack */
236 if (subflow->conn_finished)
237 return;
238
239 subflow->conn_finished = 1;
240
cfde141e
PA
241 mptcp_get_options(skb, &mp_opt);
242 if (subflow->request_mptcp && mp_opt.mp_capable) {
263e1201
PA
243 subflow->mp_capable = 1;
244 subflow->can_ack = 1;
cfde141e 245 subflow->remote_key = mp_opt.sndr_key;
263e1201
PA
246 pr_debug("subflow=%p, remote_key=%llu", subflow,
247 subflow->remote_key);
cfde141e 248 } else if (subflow->request_join && mp_opt.mp_join) {
263e1201 249 subflow->mp_join = 1;
cfde141e
PA
250 subflow->thmac = mp_opt.thmac;
251 subflow->remote_nonce = mp_opt.nonce;
263e1201
PA
252 pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow,
253 subflow->thmac, subflow->remote_nonce);
254 } else if (subflow->request_mptcp) {
255 tp->is_mptcp = 0;
256 }
257
258 if (!tp->is_mptcp)
ec3edaa7
PK
259 return;
260
261 if (subflow->mp_capable) {
cec37a6e
PK
262 pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk),
263 subflow->remote_key);
264 mptcp_finish_connect(sk);
648ef4b8
MM
265
266 if (skb) {
267 pr_debug("synack seq=%u", TCP_SKB_CB(skb)->seq);
268 subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
269 }
ec3edaa7 270 } else if (subflow->mp_join) {
bd697222
TM
271 u8 hmac[SHA256_DIGEST_SIZE];
272
ec3edaa7
PK
273 pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u",
274 subflow, subflow->thmac,
275 subflow->remote_nonce);
276 if (!subflow_thmac_valid(subflow)) {
fc518953 277 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC);
ec3edaa7
PK
278 subflow->mp_join = 0;
279 goto do_reset;
280 }
281
282 subflow_generate_hmac(subflow->local_key, subflow->remote_key,
283 subflow->local_nonce,
284 subflow->remote_nonce,
bd697222
TM
285 hmac);
286
287 memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN);
ec3edaa7
PK
288
289 if (skb)
290 subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
291
292 if (!mptcp_finish_join(sk))
293 goto do_reset;
294
fc518953 295 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
ec3edaa7
PK
296 } else {
297do_reset:
298 tcp_send_active_reset(sk, GFP_ATOMIC);
299 tcp_done(sk);
cec37a6e
PK
300 }
301}
302
303static struct request_sock_ops subflow_request_sock_ops;
304static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops;
305
306static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb)
307{
308 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
309
310 pr_debug("subflow=%p", subflow);
311
312 /* Never answer to SYNs sent to broadcast or multicast */
313 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
314 goto drop;
315
316 return tcp_conn_request(&subflow_request_sock_ops,
317 &subflow_request_sock_ipv4_ops,
318 sk, skb);
319drop:
320 tcp_listendrop(sk);
321 return 0;
322}
323
324#if IS_ENABLED(CONFIG_MPTCP_IPV6)
325static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops;
326static struct inet_connection_sock_af_ops subflow_v6_specific;
327static struct inet_connection_sock_af_ops subflow_v6m_specific;
328
329static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb)
330{
331 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
332
333 pr_debug("subflow=%p", subflow);
334
335 if (skb->protocol == htons(ETH_P_IP))
336 return subflow_v4_conn_request(sk, skb);
337
338 if (!ipv6_unicast_destination(skb))
339 goto drop;
340
341 return tcp_conn_request(&subflow_request_sock_ops,
342 &subflow_request_sock_ipv6_ops, sk, skb);
343
344drop:
345 tcp_listendrop(sk);
346 return 0; /* don't send reset */
347}
348#endif
349
f296234c
PK
350/* validate hmac received in third ACK */
351static bool subflow_hmac_valid(const struct request_sock *req,
cfde141e 352 const struct mptcp_options_received *mp_opt)
f296234c
PK
353{
354 const struct mptcp_subflow_request_sock *subflow_req;
bd697222 355 u8 hmac[SHA256_DIGEST_SIZE];
f296234c
PK
356 struct mptcp_sock *msk;
357 bool ret;
358
359 subflow_req = mptcp_subflow_rsk(req);
360 msk = mptcp_token_get_sock(subflow_req->token);
361 if (!msk)
362 return false;
363
364 subflow_generate_hmac(msk->remote_key, msk->local_key,
365 subflow_req->remote_nonce,
366 subflow_req->local_nonce, hmac);
367
368 ret = true;
bd697222 369 if (crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN))
f296234c
PK
370 ret = false;
371
372 sock_put((struct sock *)msk);
373 return ret;
374}
375
df1036da
FW
376static void mptcp_sock_destruct(struct sock *sk)
377{
378 /* if new mptcp socket isn't accepted, it is free'd
379 * from the tcp listener sockets request queue, linked
380 * from req->sk. The tcp socket is released.
381 * This calls the ULP release function which will
382 * also remove the mptcp socket, via
383 * sock_put(ctx->conn).
384 *
385 * Problem is that the mptcp socket will not be in
386 * SYN_RECV state and doesn't have SOCK_DEAD flag.
387 * Both result in warnings from inet_sock_destruct.
388 */
389
390 if (sk->sk_state == TCP_SYN_RECV) {
391 sk->sk_state = TCP_CLOSE;
392 WARN_ON_ONCE(sk->sk_socket);
393 sock_orphan(sk);
394 }
395
396 inet_sock_destruct(sk);
397}
398
9f5ca6a5
FW
399static void mptcp_force_close(struct sock *sk)
400{
401 inet_sk_state_store(sk, TCP_CLOSE);
402 sk_common_release(sk);
403}
404
4c8941de
PA
405static void subflow_ulp_fallback(struct sock *sk,
406 struct mptcp_subflow_context *old_ctx)
407{
408 struct inet_connection_sock *icsk = inet_csk(sk);
409
410 mptcp_subflow_tcp_fallback(sk, old_ctx);
411 icsk->icsk_ulp_ops = NULL;
412 rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
413 tcp_sk(sk)->is_mptcp = 0;
414}
415
cec37a6e
PK
416static struct sock *subflow_syn_recv_sock(const struct sock *sk,
417 struct sk_buff *skb,
418 struct request_sock *req,
419 struct dst_entry *dst,
420 struct request_sock *req_unhash,
421 bool *own_req)
422{
423 struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk);
cc7972ea 424 struct mptcp_subflow_request_sock *subflow_req;
cfde141e 425 struct mptcp_options_received mp_opt;
f296234c 426 bool fallback_is_fatal = false;
58b09919 427 struct sock *new_msk = NULL;
4c8941de 428 bool fallback = false;
cec37a6e
PK
429 struct sock *child;
430
431 pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
432
cfde141e
PA
433 /* we need later a valid 'mp_capable' value even when options are not
434 * parsed
435 */
436 mp_opt.mp_capable = 0;
ae2dd716
FW
437 if (tcp_rsk(req)->is_mptcp == 0)
438 goto create_child;
439
d22f4988 440 /* if the sk is MP_CAPABLE, we try to fetch the client key */
cc7972ea
CP
441 subflow_req = mptcp_subflow_rsk(req);
442 if (subflow_req->mp_capable) {
d22f4988
CP
443 if (TCP_SKB_CB(skb)->seq != subflow_req->ssn_offset + 1) {
444 /* here we can receive and accept an in-window,
445 * out-of-order pkt, which will not carry the MP_CAPABLE
446 * opt even on mptcp enabled paths
447 */
58b09919 448 goto create_msk;
d22f4988
CP
449 }
450
cfde141e
PA
451 mptcp_get_options(skb, &mp_opt);
452 if (!mp_opt.mp_capable) {
4c8941de 453 fallback = true;
58b09919 454 goto create_child;
d22f4988 455 }
58b09919
PA
456
457create_msk:
cfde141e 458 new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req);
58b09919 459 if (!new_msk)
4c8941de 460 fallback = true;
f296234c
PK
461 } else if (subflow_req->mp_join) {
462 fallback_is_fatal = true;
cfde141e
PA
463 mptcp_get_options(skb, &mp_opt);
464 if (!mp_opt.mp_join ||
465 !subflow_hmac_valid(req, &mp_opt)) {
fc518953 466 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
f296234c 467 return NULL;
fc518953 468 }
cc7972ea 469 }
cec37a6e 470
d22f4988 471create_child:
cec37a6e
PK
472 child = listener->icsk_af_ops->syn_recv_sock(sk, skb, req, dst,
473 req_unhash, own_req);
474
475 if (child && *own_req) {
79c0949e
PK
476 struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(child);
477
90bf4513
PA
478 tcp_rsk(req)->drop_req = false;
479
4c8941de
PA
480 /* we need to fallback on ctx allocation failure and on pre-reqs
481 * checking above. In the latter scenario we additionally need
482 * to reset the context to non MPTCP status.
79c0949e 483 */
4c8941de 484 if (!ctx || fallback) {
f296234c 485 if (fallback_is_fatal)
729cd643 486 goto dispose_child;
4c8941de
PA
487
488 if (ctx) {
489 subflow_ulp_fallback(child, ctx);
490 kfree_rcu(ctx, rcu);
491 }
58b09919 492 goto out;
f296234c 493 }
79c0949e
PK
494
495 if (ctx->mp_capable) {
58b09919
PA
496 /* new mpc subflow takes ownership of the newly
497 * created mptcp socket
498 */
df1036da 499 new_msk->sk_destruct = mptcp_sock_destruct;
1b1c7a0e 500 mptcp_pm_new_connection(mptcp_sk(new_msk), 1);
58b09919
PA
501 ctx->conn = new_msk;
502 new_msk = NULL;
fca5c82c
PA
503
504 /* with OoO packets we can reach here without ingress
505 * mpc option
506 */
cfde141e
PA
507 ctx->remote_key = mp_opt.sndr_key;
508 ctx->fully_established = mp_opt.mp_capable;
509 ctx->can_ack = mp_opt.mp_capable;
f296234c
PK
510 } else if (ctx->mp_join) {
511 struct mptcp_sock *owner;
512
513 owner = mptcp_token_get_sock(ctx->token);
514 if (!owner)
729cd643 515 goto dispose_child;
f296234c
PK
516
517 ctx->conn = (struct sock *)owner;
518 if (!mptcp_finish_join(child))
729cd643 519 goto dispose_child;
fc518953
FW
520
521 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX);
90bf4513 522 tcp_rsk(req)->drop_req = true;
cec37a6e
PK
523 }
524 }
525
58b09919
PA
526out:
527 /* dispose of the left over mptcp master, if any */
528 if (unlikely(new_msk))
9f5ca6a5 529 mptcp_force_close(new_msk);
4c8941de
PA
530
531 /* check for expected invariant - should never trigger, just help
532 * catching eariler subtle bugs
533 */
ac2b47fb 534 WARN_ON_ONCE(child && *own_req && tcp_sk(child)->is_mptcp &&
4c8941de
PA
535 (!mptcp_subflow_ctx(child) ||
536 !mptcp_subflow_ctx(child)->conn));
cec37a6e 537 return child;
f296234c 538
729cd643
PA
539dispose_child:
540 tcp_rsk(req)->drop_req = true;
f296234c 541 tcp_send_active_reset(child, GFP_ATOMIC);
729cd643 542 inet_csk_prepare_for_destroy_sock(child);
f296234c 543 tcp_done(child);
729cd643
PA
544
545 /* The last child reference will be released by the caller */
546 return child;
cec37a6e
PK
547}
548
549static struct inet_connection_sock_af_ops subflow_specific;
550
648ef4b8
MM
551enum mapping_status {
552 MAPPING_OK,
553 MAPPING_INVALID,
554 MAPPING_EMPTY,
555 MAPPING_DATA_FIN
556};
557
558static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq)
559{
560 if ((u32)seq == (u32)old_seq)
561 return old_seq;
562
563 /* Assume map covers data not mapped yet. */
564 return seq | ((old_seq + old_data_len + 1) & GENMASK_ULL(63, 32));
565}
566
567static void warn_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
568{
569 WARN_ONCE(1, "Bad mapping: ssn=%d map_seq=%d map_data_len=%d",
570 ssn, subflow->map_subflow_seq, subflow->map_data_len);
571}
572
573static bool skb_is_fully_mapped(struct sock *ssk, struct sk_buff *skb)
574{
575 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
576 unsigned int skb_consumed;
577
578 skb_consumed = tcp_sk(ssk)->copied_seq - TCP_SKB_CB(skb)->seq;
579 if (WARN_ON_ONCE(skb_consumed >= skb->len))
580 return true;
581
582 return skb->len - skb_consumed <= subflow->map_data_len -
583 mptcp_subflow_get_map_offset(subflow);
584}
585
586static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
587{
588 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
589 u32 ssn = tcp_sk(ssk)->copied_seq - subflow->ssn_offset;
590
591 if (unlikely(before(ssn, subflow->map_subflow_seq))) {
592 /* Mapping covers data later in the subflow stream,
593 * currently unsupported.
594 */
595 warn_bad_map(subflow, ssn);
596 return false;
597 }
598 if (unlikely(!before(ssn, subflow->map_subflow_seq +
599 subflow->map_data_len))) {
600 /* Mapping does covers past subflow data, invalid */
601 warn_bad_map(subflow, ssn + skb->len);
602 return false;
603 }
604 return true;
605}
606
607static enum mapping_status get_mapping_status(struct sock *ssk)
608{
609 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
610 struct mptcp_ext *mpext;
611 struct sk_buff *skb;
612 u16 data_len;
613 u64 map_seq;
614
615 skb = skb_peek(&ssk->sk_receive_queue);
616 if (!skb)
617 return MAPPING_EMPTY;
618
619 mpext = mptcp_get_ext(skb);
620 if (!mpext || !mpext->use_map) {
621 if (!subflow->map_valid && !skb->len) {
622 /* the TCP stack deliver 0 len FIN pkt to the receive
623 * queue, that is the only 0len pkts ever expected here,
624 * and we can admit no mapping only for 0 len pkts
625 */
626 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
627 WARN_ONCE(1, "0len seq %d:%d flags %x",
628 TCP_SKB_CB(skb)->seq,
629 TCP_SKB_CB(skb)->end_seq,
630 TCP_SKB_CB(skb)->tcp_flags);
631 sk_eat_skb(ssk, skb);
632 return MAPPING_EMPTY;
633 }
634
635 if (!subflow->map_valid)
636 return MAPPING_INVALID;
637
638 goto validate_seq;
639 }
640
641 pr_debug("seq=%llu is64=%d ssn=%u data_len=%u data_fin=%d",
642 mpext->data_seq, mpext->dsn64, mpext->subflow_seq,
643 mpext->data_len, mpext->data_fin);
644
645 data_len = mpext->data_len;
646 if (data_len == 0) {
647 pr_err("Infinite mapping not handled");
fc518953 648 MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
648ef4b8
MM
649 return MAPPING_INVALID;
650 }
651
652 if (mpext->data_fin == 1) {
653 if (data_len == 1) {
654 pr_debug("DATA_FIN with no payload");
655 if (subflow->map_valid) {
656 /* A DATA_FIN might arrive in a DSS
657 * option before the previous mapping
658 * has been fully consumed. Continue
659 * handling the existing mapping.
660 */
661 skb_ext_del(skb, SKB_EXT_MPTCP);
662 return MAPPING_OK;
663 } else {
664 return MAPPING_DATA_FIN;
665 }
666 }
667
668 /* Adjust for DATA_FIN using 1 byte of sequence space */
669 data_len--;
670 }
671
672 if (!mpext->dsn64) {
673 map_seq = expand_seq(subflow->map_seq, subflow->map_data_len,
674 mpext->data_seq);
a0c1d0ea 675 subflow->use_64bit_ack = 0;
648ef4b8
MM
676 pr_debug("expanded seq=%llu", subflow->map_seq);
677 } else {
678 map_seq = mpext->data_seq;
a0c1d0ea 679 subflow->use_64bit_ack = 1;
648ef4b8
MM
680 }
681
682 if (subflow->map_valid) {
683 /* Allow replacing only with an identical map */
684 if (subflow->map_seq == map_seq &&
685 subflow->map_subflow_seq == mpext->subflow_seq &&
686 subflow->map_data_len == data_len) {
687 skb_ext_del(skb, SKB_EXT_MPTCP);
688 return MAPPING_OK;
689 }
690
691 /* If this skb data are fully covered by the current mapping,
692 * the new map would need caching, which is not supported
693 */
fc518953
FW
694 if (skb_is_fully_mapped(ssk, skb)) {
695 MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSNOMATCH);
648ef4b8 696 return MAPPING_INVALID;
fc518953 697 }
648ef4b8
MM
698
699 /* will validate the next map after consuming the current one */
700 return MAPPING_OK;
701 }
702
703 subflow->map_seq = map_seq;
704 subflow->map_subflow_seq = mpext->subflow_seq;
705 subflow->map_data_len = data_len;
706 subflow->map_valid = 1;
d22f4988 707 subflow->mpc_map = mpext->mpc_map;
648ef4b8
MM
708 pr_debug("new map seq=%llu subflow_seq=%u data_len=%u",
709 subflow->map_seq, subflow->map_subflow_seq,
710 subflow->map_data_len);
711
712validate_seq:
713 /* we revalidate valid mapping on new skb, because we must ensure
714 * the current skb is completely covered by the available mapping
715 */
716 if (!validate_mapping(ssk, skb))
717 return MAPPING_INVALID;
718
719 skb_ext_del(skb, SKB_EXT_MPTCP);
720 return MAPPING_OK;
721}
722
bfae9dae
FW
723static int subflow_read_actor(read_descriptor_t *desc,
724 struct sk_buff *skb,
725 unsigned int offset, size_t len)
726{
727 size_t copy_len = min(desc->count, len);
728
729 desc->count -= copy_len;
730
731 pr_debug("flushed %zu bytes, %zu left", copy_len, desc->count);
732 return copy_len;
733}
734
648ef4b8
MM
735static bool subflow_check_data_avail(struct sock *ssk)
736{
737 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
738 enum mapping_status status;
739 struct mptcp_sock *msk;
740 struct sk_buff *skb;
741
742 pr_debug("msk=%p ssk=%p data_avail=%d skb=%p", subflow->conn, ssk,
743 subflow->data_avail, skb_peek(&ssk->sk_receive_queue));
744 if (subflow->data_avail)
745 return true;
746
648ef4b8
MM
747 msk = mptcp_sk(subflow->conn);
748 for (;;) {
749 u32 map_remaining;
750 size_t delta;
751 u64 ack_seq;
752 u64 old_ack;
753
754 status = get_mapping_status(ssk);
755 pr_debug("msk=%p ssk=%p status=%d", msk, ssk, status);
756 if (status == MAPPING_INVALID) {
757 ssk->sk_err = EBADMSG;
758 goto fatal;
759 }
760
761 if (status != MAPPING_OK)
762 return false;
763
764 skb = skb_peek(&ssk->sk_receive_queue);
765 if (WARN_ON_ONCE(!skb))
766 return false;
767
d22f4988
CP
768 /* if msk lacks the remote key, this subflow must provide an
769 * MP_CAPABLE-based mapping
770 */
771 if (unlikely(!READ_ONCE(msk->can_ack))) {
772 if (!subflow->mpc_map) {
773 ssk->sk_err = EBADMSG;
774 goto fatal;
775 }
776 WRITE_ONCE(msk->remote_key, subflow->remote_key);
777 WRITE_ONCE(msk->ack_seq, subflow->map_seq);
778 WRITE_ONCE(msk->can_ack, true);
779 }
780
648ef4b8
MM
781 old_ack = READ_ONCE(msk->ack_seq);
782 ack_seq = mptcp_subflow_get_mapped_dsn(subflow);
783 pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack,
784 ack_seq);
785 if (ack_seq == old_ack)
786 break;
787
788 /* only accept in-sequence mapping. Old values are spurious
789 * retransmission; we can hit "future" values on active backup
790 * subflow switch, we relay on retransmissions to get
791 * in-sequence data.
792 * Cuncurrent subflows support will require subflow data
793 * reordering
794 */
795 map_remaining = subflow->map_data_len -
796 mptcp_subflow_get_map_offset(subflow);
797 if (before64(ack_seq, old_ack))
798 delta = min_t(size_t, old_ack - ack_seq, map_remaining);
799 else
800 delta = min_t(size_t, ack_seq - old_ack, map_remaining);
801
802 /* discard mapped data */
803 pr_debug("discarding %zu bytes, current map len=%d", delta,
804 map_remaining);
805 if (delta) {
648ef4b8
MM
806 read_descriptor_t desc = {
807 .count = delta,
648ef4b8
MM
808 };
809 int ret;
810
bfae9dae 811 ret = tcp_read_sock(ssk, &desc, subflow_read_actor);
648ef4b8
MM
812 if (ret < 0) {
813 ssk->sk_err = -ret;
814 goto fatal;
815 }
816 if (ret < delta)
817 return false;
818 if (delta == map_remaining)
819 subflow->map_valid = 0;
820 }
821 }
822 return true;
823
824fatal:
825 /* fatal protocol error, close the socket */
826 /* This barrier is coupled with smp_rmb() in tcp_poll() */
827 smp_wmb();
828 ssk->sk_error_report(ssk);
829 tcp_set_state(ssk, TCP_CLOSE);
830 tcp_send_active_reset(ssk, GFP_ATOMIC);
831 return false;
832}
833
834bool mptcp_subflow_data_available(struct sock *sk)
835{
836 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
837 struct sk_buff *skb;
838
839 /* check if current mapping is still valid */
840 if (subflow->map_valid &&
841 mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) {
842 subflow->map_valid = 0;
843 subflow->data_avail = 0;
844
845 pr_debug("Done with mapping: seq=%u data_len=%u",
846 subflow->map_subflow_seq,
847 subflow->map_data_len);
848 }
849
850 if (!subflow_check_data_avail(sk)) {
851 subflow->data_avail = 0;
852 return false;
853 }
854
855 skb = skb_peek(&sk->sk_receive_queue);
856 subflow->data_avail = skb &&
857 before(tcp_sk(sk)->copied_seq, TCP_SKB_CB(skb)->end_seq);
858 return subflow->data_avail;
859}
860
071c8ed6
FW
861/* If ssk has an mptcp parent socket, use the mptcp rcvbuf occupancy,
862 * not the ssk one.
863 *
864 * In mptcp, rwin is about the mptcp-level connection data.
865 *
866 * Data that is still on the ssk rx queue can thus be ignored,
867 * as far as mptcp peer is concerened that data is still inflight.
868 * DSS ACK is updated when skb is moved to the mptcp rx queue.
869 */
870void mptcp_space(const struct sock *ssk, int *space, int *full_space)
871{
872 const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
873 const struct sock *sk = subflow->conn;
874
875 *space = tcp_space(sk);
876 *full_space = tcp_full_space(sk);
877}
878
648ef4b8
MM
879static void subflow_data_ready(struct sock *sk)
880{
881 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
882 struct sock *parent = subflow->conn;
883
f296234c 884 if (!subflow->mp_capable && !subflow->mp_join) {
648ef4b8
MM
885 subflow->tcp_data_ready(sk);
886
dc093db5 887 parent->sk_data_ready(parent);
648ef4b8
MM
888 return;
889 }
890
101f6f85 891 if (mptcp_subflow_data_available(sk))
2e52213c 892 mptcp_data_ready(parent, sk);
648ef4b8
MM
893}
894
895static void subflow_write_space(struct sock *sk)
896{
897 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
898 struct sock *parent = subflow->conn;
899
900 sk_stream_write_space(sk);
dc093db5 901 if (sk_stream_is_writeable(sk)) {
1891c4a0
FW
902 set_bit(MPTCP_SEND_SPACE, &mptcp_sk(parent)->flags);
903 smp_mb__after_atomic();
904 /* set SEND_SPACE before sk_stream_write_space clears NOSPACE */
648ef4b8
MM
905 sk_stream_write_space(parent);
906 }
907}
908
cec37a6e
PK
909static struct inet_connection_sock_af_ops *
910subflow_default_af_ops(struct sock *sk)
911{
912#if IS_ENABLED(CONFIG_MPTCP_IPV6)
913 if (sk->sk_family == AF_INET6)
914 return &subflow_v6_specific;
915#endif
916 return &subflow_specific;
917}
918
cec37a6e 919#if IS_ENABLED(CONFIG_MPTCP_IPV6)
31484d56
GU
920void mptcpv6_handle_mapped(struct sock *sk, bool mapped)
921{
cec37a6e
PK
922 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
923 struct inet_connection_sock *icsk = inet_csk(sk);
924 struct inet_connection_sock_af_ops *target;
925
926 target = mapped ? &subflow_v6m_specific : subflow_default_af_ops(sk);
927
928 pr_debug("subflow=%p family=%d ops=%p target=%p mapped=%d",
edc7e489 929 subflow, sk->sk_family, icsk->icsk_af_ops, target, mapped);
cec37a6e
PK
930
931 if (likely(icsk->icsk_af_ops == target))
932 return;
933
934 subflow->icsk_af_ops = icsk->icsk_af_ops;
935 icsk->icsk_af_ops = target;
cec37a6e 936}
31484d56 937#endif
cec37a6e 938
ec3edaa7
PK
939static void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
940 struct sockaddr_storage *addr)
941{
942 memset(addr, 0, sizeof(*addr));
943 addr->ss_family = info->family;
944 if (addr->ss_family == AF_INET) {
945 struct sockaddr_in *in_addr = (struct sockaddr_in *)addr;
946
947 in_addr->sin_addr = info->addr;
948 in_addr->sin_port = info->port;
949 }
950#if IS_ENABLED(CONFIG_MPTCP_IPV6)
951 else if (addr->ss_family == AF_INET6) {
952 struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)addr;
953
954 in6_addr->sin6_addr = info->addr6;
955 in6_addr->sin6_port = info->port;
956 }
957#endif
958}
959
960int __mptcp_subflow_connect(struct sock *sk, int ifindex,
961 const struct mptcp_addr_info *loc,
962 const struct mptcp_addr_info *remote)
963{
964 struct mptcp_sock *msk = mptcp_sk(sk);
965 struct mptcp_subflow_context *subflow;
966 struct sockaddr_storage addr;
967 struct socket *sf;
968 u32 remote_token;
969 int addrlen;
970 int err;
971
972 if (sk->sk_state != TCP_ESTABLISHED)
973 return -ENOTCONN;
974
975 err = mptcp_subflow_create_socket(sk, &sf);
976 if (err)
977 return err;
978
979 subflow = mptcp_subflow_ctx(sf->sk);
980 subflow->remote_key = msk->remote_key;
981 subflow->local_key = msk->local_key;
982 subflow->token = msk->token;
983 mptcp_info2sockaddr(loc, &addr);
984
985 addrlen = sizeof(struct sockaddr_in);
986#if IS_ENABLED(CONFIG_MPTCP_IPV6)
987 if (loc->family == AF_INET6)
988 addrlen = sizeof(struct sockaddr_in6);
989#endif
990 sf->sk->sk_bound_dev_if = ifindex;
991 err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen);
992 if (err)
993 goto failed;
994
995 mptcp_crypto_key_sha(subflow->remote_key, &remote_token, NULL);
996 pr_debug("msk=%p remote_token=%u", msk, remote_token);
997 subflow->remote_token = remote_token;
998 subflow->local_id = loc->id;
999 subflow->request_join = 1;
1000 subflow->request_bkup = 1;
1001 mptcp_info2sockaddr(remote, &addr);
1002
1003 err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
1004 if (err && err != -EINPROGRESS)
1005 goto failed;
1006
1007 spin_lock_bh(&msk->join_list_lock);
1008 list_add_tail(&subflow->node, &msk->join_list);
1009 spin_unlock_bh(&msk->join_list_lock);
1010
1011 return err;
1012
1013failed:
1014 sock_release(sf);
1015 return err;
1016}
1017
2303f994
PK
1018int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
1019{
1020 struct mptcp_subflow_context *subflow;
1021 struct net *net = sock_net(sk);
1022 struct socket *sf;
1023 int err;
1024
cec37a6e
PK
1025 err = sock_create_kern(net, sk->sk_family, SOCK_STREAM, IPPROTO_TCP,
1026 &sf);
2303f994
PK
1027 if (err)
1028 return err;
1029
1030 lock_sock(sf->sk);
1031
1032 /* kernel sockets do not by default acquire net ref, but TCP timer
1033 * needs it.
1034 */
1035 sf->sk->sk_net_refcnt = 1;
1036 get_net(net);
f6f7d8cf 1037#ifdef CONFIG_PROC_FS
2303f994 1038 this_cpu_add(*net->core.sock_inuse, 1);
f6f7d8cf 1039#endif
2303f994
PK
1040 err = tcp_set_ulp(sf->sk, "mptcp");
1041 release_sock(sf->sk);
1042
1043 if (err)
1044 return err;
1045
7d14b0d2
PA
1046 /* the newly created socket really belongs to the owning MPTCP master
1047 * socket, even if for additional subflows the allocation is performed
1048 * by a kernel workqueue. Adjust inode references, so that the
1049 * procfs/diag interaces really show this one belonging to the correct
1050 * user.
1051 */
1052 SOCK_INODE(sf)->i_ino = SOCK_INODE(sk->sk_socket)->i_ino;
1053 SOCK_INODE(sf)->i_uid = SOCK_INODE(sk->sk_socket)->i_uid;
1054 SOCK_INODE(sf)->i_gid = SOCK_INODE(sk->sk_socket)->i_gid;
1055
2303f994
PK
1056 subflow = mptcp_subflow_ctx(sf->sk);
1057 pr_debug("subflow=%p", subflow);
1058
1059 *new_sock = sf;
79c0949e 1060 sock_hold(sk);
2303f994
PK
1061 subflow->conn = sk;
1062
1063 return 0;
1064}
1065
1066static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
1067 gfp_t priority)
1068{
1069 struct inet_connection_sock *icsk = inet_csk(sk);
1070 struct mptcp_subflow_context *ctx;
1071
1072 ctx = kzalloc(sizeof(*ctx), priority);
1073 if (!ctx)
1074 return NULL;
1075
1076 rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
cec37a6e 1077 INIT_LIST_HEAD(&ctx->node);
2303f994
PK
1078
1079 pr_debug("subflow=%p", ctx);
1080
1081 ctx->tcp_sock = sk;
1082
1083 return ctx;
1084}
1085
648ef4b8
MM
1086static void __subflow_state_change(struct sock *sk)
1087{
1088 struct socket_wq *wq;
1089
1090 rcu_read_lock();
1091 wq = rcu_dereference(sk->sk_wq);
1092 if (skwq_has_sleeper(wq))
1093 wake_up_interruptible_all(&wq->wait);
1094 rcu_read_unlock();
1095}
1096
1097static bool subflow_is_done(const struct sock *sk)
1098{
1099 return sk->sk_shutdown & RCV_SHUTDOWN || sk->sk_state == TCP_CLOSE;
1100}
1101
1102static void subflow_state_change(struct sock *sk)
1103{
1104 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
dc093db5 1105 struct sock *parent = subflow->conn;
648ef4b8
MM
1106
1107 __subflow_state_change(sk);
1108
1109 /* as recvmsg() does not acquire the subflow socket for ssk selection
1110 * a fin packet carrying a DSS can be unnoticed if we don't trigger
1111 * the data available machinery here.
1112 */
dc093db5 1113 if (subflow->mp_capable && mptcp_subflow_data_available(sk))
2e52213c 1114 mptcp_data_ready(parent, sk);
648ef4b8 1115
dc093db5 1116 if (!(parent->sk_shutdown & RCV_SHUTDOWN) &&
648ef4b8
MM
1117 !subflow->rx_eof && subflow_is_done(sk)) {
1118 subflow->rx_eof = 1;
59832e24 1119 mptcp_subflow_eof(parent);
648ef4b8
MM
1120 }
1121}
1122
2303f994
PK
1123static int subflow_ulp_init(struct sock *sk)
1124{
cec37a6e 1125 struct inet_connection_sock *icsk = inet_csk(sk);
2303f994
PK
1126 struct mptcp_subflow_context *ctx;
1127 struct tcp_sock *tp = tcp_sk(sk);
1128 int err = 0;
1129
1130 /* disallow attaching ULP to a socket unless it has been
1131 * created with sock_create_kern()
1132 */
1133 if (!sk->sk_kern_sock) {
1134 err = -EOPNOTSUPP;
1135 goto out;
1136 }
1137
1138 ctx = subflow_create_ctx(sk, GFP_KERNEL);
1139 if (!ctx) {
1140 err = -ENOMEM;
1141 goto out;
1142 }
1143
1144 pr_debug("subflow=%p, family=%d", ctx, sk->sk_family);
1145
1146 tp->is_mptcp = 1;
cec37a6e
PK
1147 ctx->icsk_af_ops = icsk->icsk_af_ops;
1148 icsk->icsk_af_ops = subflow_default_af_ops(sk);
648ef4b8
MM
1149 ctx->tcp_data_ready = sk->sk_data_ready;
1150 ctx->tcp_state_change = sk->sk_state_change;
1151 ctx->tcp_write_space = sk->sk_write_space;
1152 sk->sk_data_ready = subflow_data_ready;
1153 sk->sk_write_space = subflow_write_space;
1154 sk->sk_state_change = subflow_state_change;
2303f994
PK
1155out:
1156 return err;
1157}
1158
1159static void subflow_ulp_release(struct sock *sk)
1160{
1161 struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(sk);
1162
1163 if (!ctx)
1164 return;
1165
79c0949e
PK
1166 if (ctx->conn)
1167 sock_put(ctx->conn);
1168
2303f994
PK
1169 kfree_rcu(ctx, rcu);
1170}
1171
cec37a6e
PK
1172static void subflow_ulp_clone(const struct request_sock *req,
1173 struct sock *newsk,
1174 const gfp_t priority)
1175{
1176 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
1177 struct mptcp_subflow_context *old_ctx = mptcp_subflow_ctx(newsk);
1178 struct mptcp_subflow_context *new_ctx;
1179
f296234c
PK
1180 if (!tcp_rsk(req)->is_mptcp ||
1181 (!subflow_req->mp_capable && !subflow_req->mp_join)) {
648ef4b8 1182 subflow_ulp_fallback(newsk, old_ctx);
cec37a6e
PK
1183 return;
1184 }
1185
1186 new_ctx = subflow_create_ctx(newsk, priority);
edc7e489 1187 if (!new_ctx) {
648ef4b8 1188 subflow_ulp_fallback(newsk, old_ctx);
cec37a6e
PK
1189 return;
1190 }
1191
1192 new_ctx->conn_finished = 1;
1193 new_ctx->icsk_af_ops = old_ctx->icsk_af_ops;
648ef4b8
MM
1194 new_ctx->tcp_data_ready = old_ctx->tcp_data_ready;
1195 new_ctx->tcp_state_change = old_ctx->tcp_state_change;
1196 new_ctx->tcp_write_space = old_ctx->tcp_write_space;
58b09919
PA
1197 new_ctx->rel_write_seq = 1;
1198 new_ctx->tcp_sock = newsk;
1199
f296234c
PK
1200 if (subflow_req->mp_capable) {
1201 /* see comments in subflow_syn_recv_sock(), MPTCP connection
1202 * is fully established only after we receive the remote key
1203 */
1204 new_ctx->mp_capable = 1;
f296234c
PK
1205 new_ctx->local_key = subflow_req->local_key;
1206 new_ctx->token = subflow_req->token;
1207 new_ctx->ssn_offset = subflow_req->ssn_offset;
1208 new_ctx->idsn = subflow_req->idsn;
1209 } else if (subflow_req->mp_join) {
ec3edaa7 1210 new_ctx->ssn_offset = subflow_req->ssn_offset;
f296234c
PK
1211 new_ctx->mp_join = 1;
1212 new_ctx->fully_established = 1;
1213 new_ctx->backup = subflow_req->backup;
1214 new_ctx->local_id = subflow_req->local_id;
1215 new_ctx->token = subflow_req->token;
1216 new_ctx->thmac = subflow_req->thmac;
1217 }
cec37a6e
PK
1218}
1219
2303f994
PK
1220static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = {
1221 .name = "mptcp",
1222 .owner = THIS_MODULE,
1223 .init = subflow_ulp_init,
1224 .release = subflow_ulp_release,
cec37a6e 1225 .clone = subflow_ulp_clone,
2303f994
PK
1226};
1227
cec37a6e
PK
1228static int subflow_ops_init(struct request_sock_ops *subflow_ops)
1229{
1230 subflow_ops->obj_size = sizeof(struct mptcp_subflow_request_sock);
1231 subflow_ops->slab_name = "request_sock_subflow";
1232
1233 subflow_ops->slab = kmem_cache_create(subflow_ops->slab_name,
1234 subflow_ops->obj_size, 0,
1235 SLAB_ACCOUNT |
1236 SLAB_TYPESAFE_BY_RCU,
1237 NULL);
1238 if (!subflow_ops->slab)
1239 return -ENOMEM;
1240
79c0949e
PK
1241 subflow_ops->destructor = subflow_req_destructor;
1242
cec37a6e
PK
1243 return 0;
1244}
1245
2303f994
PK
1246void mptcp_subflow_init(void)
1247{
cec37a6e
PK
1248 subflow_request_sock_ops = tcp_request_sock_ops;
1249 if (subflow_ops_init(&subflow_request_sock_ops) != 0)
1250 panic("MPTCP: failed to init subflow request sock ops\n");
1251
1252 subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops;
1253 subflow_request_sock_ipv4_ops.init_req = subflow_v4_init_req;
1254
1255 subflow_specific = ipv4_specific;
1256 subflow_specific.conn_request = subflow_v4_conn_request;
1257 subflow_specific.syn_recv_sock = subflow_syn_recv_sock;
1258 subflow_specific.sk_rx_dst_set = subflow_finish_connect;
79c0949e 1259 subflow_specific.rebuild_header = subflow_rebuild_header;
cec37a6e
PK
1260
1261#if IS_ENABLED(CONFIG_MPTCP_IPV6)
1262 subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops;
1263 subflow_request_sock_ipv6_ops.init_req = subflow_v6_init_req;
1264
1265 subflow_v6_specific = ipv6_specific;
1266 subflow_v6_specific.conn_request = subflow_v6_conn_request;
1267 subflow_v6_specific.syn_recv_sock = subflow_syn_recv_sock;
1268 subflow_v6_specific.sk_rx_dst_set = subflow_finish_connect;
79c0949e 1269 subflow_v6_specific.rebuild_header = subflow_rebuild_header;
cec37a6e
PK
1270
1271 subflow_v6m_specific = subflow_v6_specific;
1272 subflow_v6m_specific.queue_xmit = ipv4_specific.queue_xmit;
1273 subflow_v6m_specific.send_check = ipv4_specific.send_check;
1274 subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len;
1275 subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced;
1276 subflow_v6m_specific.net_frag_header_len = 0;
1277#endif
1278
5147dfb5
DC
1279 mptcp_diag_subflow_init(&subflow_ulp_ops);
1280
2303f994
PK
1281 if (tcp_register_ulp(&subflow_ulp_ops) != 0)
1282 panic("MPTCP: failed to register subflows to ULP\n");
1283}