Commit | Line | Data |
---|---|---|
0abdde82 PA |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Multipath TCP | |
3 | * | |
4 | * Copyright (c) 2021, Red Hat. | |
5 | */ | |
6 | ||
7 | #define pr_fmt(fmt) "MPTCP: " fmt | |
8 | ||
9 | #include <linux/kernel.h> | |
10 | #include <linux/module.h> | |
11 | #include <net/sock.h> | |
12 | #include <net/protocol.h> | |
13 | #include <net/tcp.h> | |
14 | #include <net/mptcp.h> | |
15 | #include "protocol.h" | |
16 | ||
17 | static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk) | |
18 | { | |
19 | sock_owned_by_me((const struct sock *)msk); | |
20 | ||
21 | if (likely(!__mptcp_check_fallback(msk))) | |
22 | return NULL; | |
23 | ||
24 | return msk->first; | |
25 | } | |
26 | ||
df00b087 FW |
27 | static u32 sockopt_seq_reset(const struct sock *sk) |
28 | { | |
29 | sock_owned_by_me(sk); | |
30 | ||
31 | /* Highbits contain state. Allows to distinguish sockopt_seq | |
32 | * of listener and established: | |
33 | * s0 = new_listener() | |
34 | * sockopt(s0) - seq is 1 | |
35 | * s1 = accept(s0) - s1 inherits seq 1 if listener sk (s0) | |
36 | * sockopt(s0) - seq increments to 2 on s0 | |
37 | * sockopt(s1) // seq increments to 2 on s1 (different option) | |
38 | * new ssk completes join, inherits options from s0 // seq 2 | |
39 | * Needs sync from mptcp join logic, but ssk->seq == msk->seq | |
40 | * | |
41 | * Set High order bits to sk_state so ssk->seq == msk->seq test | |
42 | * will fail. | |
43 | */ | |
44 | ||
45 | return (u32)sk->sk_state << 24u; | |
46 | } | |
47 | ||
1b3e7ede FW |
48 | static void sockopt_seq_inc(struct mptcp_sock *msk) |
49 | { | |
50 | u32 seq = (msk->setsockopt_seq + 1) & 0x00ffffff; | |
51 | ||
52 | msk->setsockopt_seq = sockopt_seq_reset((struct sock *)msk) + seq; | |
53 | } | |
54 | ||
55 | static int mptcp_get_int_option(struct mptcp_sock *msk, sockptr_t optval, | |
56 | unsigned int optlen, int *val) | |
57 | { | |
58 | if (optlen < sizeof(int)) | |
59 | return -EINVAL; | |
60 | ||
61 | if (copy_from_sockptr(val, optval, sizeof(*val))) | |
62 | return -EFAULT; | |
63 | ||
64 | return 0; | |
65 | } | |
66 | ||
67 | static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, int val) | |
68 | { | |
69 | struct mptcp_subflow_context *subflow; | |
70 | struct sock *sk = (struct sock *)msk; | |
71 | ||
72 | lock_sock(sk); | |
73 | sockopt_seq_inc(msk); | |
74 | ||
75 | mptcp_for_each_subflow(msk, subflow) { | |
76 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
77 | bool slow = lock_sock_fast(ssk); | |
78 | ||
79 | switch (optname) { | |
a03c99b2 FW |
80 | case SO_DEBUG: |
81 | sock_valbool_flag(ssk, SOCK_DBG, !!val); | |
82 | break; | |
1b3e7ede FW |
83 | case SO_KEEPALIVE: |
84 | if (ssk->sk_prot->keepalive) | |
85 | ssk->sk_prot->keepalive(ssk, !!val); | |
86 | sock_valbool_flag(ssk, SOCK_KEEPOPEN, !!val); | |
87 | break; | |
88 | case SO_PRIORITY: | |
89 | ssk->sk_priority = val; | |
90 | break; | |
5d0a6bc8 FW |
91 | case SO_SNDBUF: |
92 | case SO_SNDBUFFORCE: | |
93 | ssk->sk_userlocks |= SOCK_SNDBUF_LOCK; | |
94 | WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); | |
95 | break; | |
96 | case SO_RCVBUF: | |
97 | case SO_RCVBUFFORCE: | |
98 | ssk->sk_userlocks |= SOCK_RCVBUF_LOCK; | |
99 | WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); | |
100 | break; | |
36704413 FW |
101 | case SO_MARK: |
102 | if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) { | |
103 | ssk->sk_mark = sk->sk_mark; | |
104 | sk_dst_reset(ssk); | |
105 | } | |
106 | break; | |
6f0d7198 FW |
107 | case SO_INCOMING_CPU: |
108 | WRITE_ONCE(ssk->sk_incoming_cpu, val); | |
109 | break; | |
1b3e7ede FW |
110 | } |
111 | ||
112 | subflow->setsockopt_seq = msk->setsockopt_seq; | |
113 | unlock_sock_fast(ssk, slow); | |
114 | } | |
115 | ||
116 | release_sock(sk); | |
117 | } | |
118 | ||
119 | static int mptcp_sol_socket_intval(struct mptcp_sock *msk, int optname, int val) | |
120 | { | |
121 | sockptr_t optval = KERNEL_SOCKPTR(&val); | |
122 | struct sock *sk = (struct sock *)msk; | |
123 | int ret; | |
124 | ||
125 | ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, | |
126 | optval, sizeof(val)); | |
127 | if (ret) | |
128 | return ret; | |
129 | ||
130 | mptcp_sol_socket_sync_intval(msk, optname, val); | |
131 | return 0; | |
132 | } | |
133 | ||
6f0d7198 FW |
134 | static void mptcp_so_incoming_cpu(struct mptcp_sock *msk, int val) |
135 | { | |
136 | struct sock *sk = (struct sock *)msk; | |
137 | ||
138 | WRITE_ONCE(sk->sk_incoming_cpu, val); | |
139 | ||
140 | mptcp_sol_socket_sync_intval(msk, SO_INCOMING_CPU, val); | |
141 | } | |
142 | ||
9061f24b FW |
143 | static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optname, int val) |
144 | { | |
145 | sockptr_t optval = KERNEL_SOCKPTR(&val); | |
146 | struct mptcp_subflow_context *subflow; | |
147 | struct sock *sk = (struct sock *)msk; | |
148 | int ret; | |
149 | ||
150 | ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, | |
151 | optval, sizeof(val)); | |
152 | if (ret) | |
153 | return ret; | |
154 | ||
155 | lock_sock(sk); | |
156 | mptcp_for_each_subflow(msk, subflow) { | |
157 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
158 | bool slow = lock_sock_fast(ssk); | |
159 | ||
160 | switch (optname) { | |
161 | case SO_TIMESTAMP_OLD: | |
162 | case SO_TIMESTAMP_NEW: | |
163 | case SO_TIMESTAMPNS_OLD: | |
164 | case SO_TIMESTAMPNS_NEW: | |
165 | sock_set_timestamp(sk, optname, !!val); | |
166 | break; | |
167 | case SO_TIMESTAMPING_NEW: | |
168 | case SO_TIMESTAMPING_OLD: | |
169 | sock_set_timestamping(sk, optname, val); | |
170 | break; | |
171 | } | |
172 | ||
173 | unlock_sock_fast(ssk, slow); | |
174 | } | |
175 | ||
176 | release_sock(sk); | |
177 | return 0; | |
178 | } | |
179 | ||
1b3e7ede FW |
180 | static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname, |
181 | sockptr_t optval, unsigned int optlen) | |
182 | { | |
183 | int val, ret; | |
184 | ||
185 | ret = mptcp_get_int_option(msk, optval, optlen, &val); | |
186 | if (ret) | |
187 | return ret; | |
188 | ||
189 | switch (optname) { | |
190 | case SO_KEEPALIVE: | |
191 | mptcp_sol_socket_sync_intval(msk, optname, val); | |
192 | return 0; | |
a03c99b2 | 193 | case SO_DEBUG: |
36704413 | 194 | case SO_MARK: |
1b3e7ede | 195 | case SO_PRIORITY: |
5d0a6bc8 FW |
196 | case SO_SNDBUF: |
197 | case SO_SNDBUFFORCE: | |
198 | case SO_RCVBUF: | |
199 | case SO_RCVBUFFORCE: | |
1b3e7ede | 200 | return mptcp_sol_socket_intval(msk, optname, val); |
6f0d7198 FW |
201 | case SO_INCOMING_CPU: |
202 | mptcp_so_incoming_cpu(msk, val); | |
203 | return 0; | |
9061f24b FW |
204 | case SO_TIMESTAMP_OLD: |
205 | case SO_TIMESTAMP_NEW: | |
206 | case SO_TIMESTAMPNS_OLD: | |
207 | case SO_TIMESTAMPNS_NEW: | |
208 | case SO_TIMESTAMPING_OLD: | |
209 | case SO_TIMESTAMPING_NEW: | |
210 | return mptcp_setsockopt_sol_socket_tstamp(msk, optname, val); | |
1b3e7ede FW |
211 | } |
212 | ||
213 | return -ENOPROTOOPT; | |
214 | } | |
215 | ||
268b1238 FW |
216 | static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t optval, |
217 | unsigned int optlen) | |
218 | { | |
219 | struct mptcp_subflow_context *subflow; | |
220 | struct sock *sk = (struct sock *)msk; | |
221 | struct linger ling; | |
222 | sockptr_t kopt; | |
223 | int ret; | |
224 | ||
225 | if (optlen < sizeof(ling)) | |
226 | return -EINVAL; | |
227 | ||
228 | if (copy_from_sockptr(&ling, optval, sizeof(ling))) | |
229 | return -EFAULT; | |
230 | ||
231 | kopt = KERNEL_SOCKPTR(&ling); | |
232 | ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, SO_LINGER, kopt, sizeof(ling)); | |
233 | if (ret) | |
234 | return ret; | |
235 | ||
236 | lock_sock(sk); | |
237 | sockopt_seq_inc(msk); | |
238 | mptcp_for_each_subflow(msk, subflow) { | |
239 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
240 | bool slow = lock_sock_fast(ssk); | |
241 | ||
242 | if (!ling.l_onoff) { | |
243 | sock_reset_flag(ssk, SOCK_LINGER); | |
244 | } else { | |
245 | ssk->sk_lingertime = sk->sk_lingertime; | |
246 | sock_set_flag(ssk, SOCK_LINGER); | |
247 | } | |
248 | ||
249 | subflow->setsockopt_seq = msk->setsockopt_seq; | |
250 | unlock_sock_fast(ssk, slow); | |
251 | } | |
252 | ||
253 | release_sock(sk); | |
254 | return 0; | |
255 | } | |
256 | ||
0abdde82 PA |
257 | static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, |
258 | sockptr_t optval, unsigned int optlen) | |
259 | { | |
260 | struct sock *sk = (struct sock *)msk; | |
261 | struct socket *ssock; | |
262 | int ret; | |
263 | ||
264 | switch (optname) { | |
265 | case SO_REUSEPORT: | |
266 | case SO_REUSEADDR: | |
5d0a6bc8 FW |
267 | case SO_BINDTODEVICE: |
268 | case SO_BINDTOIFINDEX: | |
0abdde82 PA |
269 | lock_sock(sk); |
270 | ssock = __mptcp_nmpc_socket(msk); | |
271 | if (!ssock) { | |
272 | release_sock(sk); | |
273 | return -EINVAL; | |
274 | } | |
275 | ||
276 | ret = sock_setsockopt(ssock, SOL_SOCKET, optname, optval, optlen); | |
277 | if (ret == 0) { | |
278 | if (optname == SO_REUSEPORT) | |
279 | sk->sk_reuseport = ssock->sk->sk_reuseport; | |
280 | else if (optname == SO_REUSEADDR) | |
281 | sk->sk_reuse = ssock->sk->sk_reuse; | |
5d0a6bc8 FW |
282 | else if (optname == SO_BINDTODEVICE) |
283 | sk->sk_bound_dev_if = ssock->sk->sk_bound_dev_if; | |
284 | else if (optname == SO_BINDTOIFINDEX) | |
285 | sk->sk_bound_dev_if = ssock->sk->sk_bound_dev_if; | |
0abdde82 PA |
286 | } |
287 | release_sock(sk); | |
288 | return ret; | |
1b3e7ede FW |
289 | case SO_KEEPALIVE: |
290 | case SO_PRIORITY: | |
5d0a6bc8 FW |
291 | case SO_SNDBUF: |
292 | case SO_SNDBUFFORCE: | |
293 | case SO_RCVBUF: | |
294 | case SO_RCVBUFFORCE: | |
36704413 | 295 | case SO_MARK: |
6f0d7198 | 296 | case SO_INCOMING_CPU: |
a03c99b2 | 297 | case SO_DEBUG: |
9061f24b FW |
298 | case SO_TIMESTAMP_OLD: |
299 | case SO_TIMESTAMP_NEW: | |
300 | case SO_TIMESTAMPNS_OLD: | |
301 | case SO_TIMESTAMPNS_NEW: | |
302 | case SO_TIMESTAMPING_OLD: | |
303 | case SO_TIMESTAMPING_NEW: | |
1b3e7ede | 304 | return mptcp_setsockopt_sol_socket_int(msk, optname, optval, optlen); |
268b1238 FW |
305 | case SO_LINGER: |
306 | return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen); | |
a03c99b2 FW |
307 | case SO_NO_CHECK: |
308 | case SO_DONTROUTE: | |
309 | case SO_BROADCAST: | |
310 | case SO_BSDCOMPAT: | |
311 | case SO_PASSCRED: | |
312 | case SO_PASSSEC: | |
313 | case SO_RXQ_OVFL: | |
314 | case SO_WIFI_STATUS: | |
315 | case SO_NOFCS: | |
316 | case SO_SELECT_ERR_QUEUE: | |
317 | return 0; | |
0abdde82 PA |
318 | } |
319 | ||
320 | return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen); | |
321 | } | |
322 | ||
323 | static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, | |
324 | sockptr_t optval, unsigned int optlen) | |
325 | { | |
326 | struct sock *sk = (struct sock *)msk; | |
327 | int ret = -EOPNOTSUPP; | |
328 | struct socket *ssock; | |
329 | ||
330 | switch (optname) { | |
331 | case IPV6_V6ONLY: | |
332 | lock_sock(sk); | |
333 | ssock = __mptcp_nmpc_socket(msk); | |
334 | if (!ssock) { | |
335 | release_sock(sk); | |
336 | return -EINVAL; | |
337 | } | |
338 | ||
339 | ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen); | |
340 | if (ret == 0) | |
341 | sk->sk_ipv6only = ssock->sk->sk_ipv6only; | |
342 | ||
343 | release_sock(sk); | |
344 | break; | |
345 | } | |
346 | ||
347 | return ret; | |
348 | } | |
349 | ||
d9e4c129 PA |
350 | static bool mptcp_supported_sockopt(int level, int optname) |
351 | { | |
352 | if (level == SOL_SOCKET) { | |
353 | switch (optname) { | |
354 | case SO_DEBUG: | |
355 | case SO_REUSEPORT: | |
356 | case SO_REUSEADDR: | |
357 | ||
358 | /* the following ones need a better implementation, | |
359 | * but are quite common we want to preserve them | |
360 | */ | |
361 | case SO_BINDTODEVICE: | |
362 | case SO_SNDBUF: | |
363 | case SO_SNDBUFFORCE: | |
364 | case SO_RCVBUF: | |
365 | case SO_RCVBUFFORCE: | |
366 | case SO_KEEPALIVE: | |
367 | case SO_PRIORITY: | |
368 | case SO_LINGER: | |
369 | case SO_TIMESTAMP_OLD: | |
370 | case SO_TIMESTAMP_NEW: | |
371 | case SO_TIMESTAMPNS_OLD: | |
372 | case SO_TIMESTAMPNS_NEW: | |
373 | case SO_TIMESTAMPING_OLD: | |
374 | case SO_TIMESTAMPING_NEW: | |
375 | case SO_RCVLOWAT: | |
376 | case SO_RCVTIMEO_OLD: | |
377 | case SO_RCVTIMEO_NEW: | |
378 | case SO_SNDTIMEO_OLD: | |
379 | case SO_SNDTIMEO_NEW: | |
380 | case SO_MARK: | |
381 | case SO_INCOMING_CPU: | |
382 | case SO_BINDTOIFINDEX: | |
383 | case SO_BUSY_POLL: | |
384 | case SO_PREFER_BUSY_POLL: | |
385 | case SO_BUSY_POLL_BUDGET: | |
386 | ||
387 | /* next ones are no-op for plain TCP */ | |
388 | case SO_NO_CHECK: | |
389 | case SO_DONTROUTE: | |
390 | case SO_BROADCAST: | |
391 | case SO_BSDCOMPAT: | |
392 | case SO_PASSCRED: | |
393 | case SO_PASSSEC: | |
394 | case SO_RXQ_OVFL: | |
395 | case SO_WIFI_STATUS: | |
396 | case SO_NOFCS: | |
397 | case SO_SELECT_ERR_QUEUE: | |
398 | return true; | |
399 | } | |
400 | ||
401 | /* SO_OOBINLINE is not supported, let's avoid the related mess */ | |
402 | /* SO_ATTACH_FILTER, SO_ATTACH_BPF, SO_ATTACH_REUSEPORT_CBPF, | |
403 | * SO_DETACH_REUSEPORT_BPF, SO_DETACH_FILTER, SO_LOCK_FILTER, | |
404 | * we must be careful with subflows | |
405 | */ | |
406 | /* SO_ATTACH_REUSEPORT_EBPF is not supported, at it checks | |
407 | * explicitly the sk_protocol field | |
408 | */ | |
409 | /* SO_PEEK_OFF is unsupported, as it is for plain TCP */ | |
410 | /* SO_MAX_PACING_RATE is unsupported, we must be careful with subflows */ | |
411 | /* SO_CNX_ADVICE is currently unsupported, could possibly be relevant, | |
412 | * but likely needs careful design | |
413 | */ | |
414 | /* SO_ZEROCOPY is currently unsupported, TODO in sndmsg */ | |
415 | /* SO_TXTIME is currently unsupported */ | |
416 | return false; | |
417 | } | |
418 | if (level == SOL_IP) { | |
419 | switch (optname) { | |
420 | /* should work fine */ | |
421 | case IP_FREEBIND: | |
422 | case IP_TRANSPARENT: | |
423 | ||
424 | /* the following are control cmsg related */ | |
425 | case IP_PKTINFO: | |
426 | case IP_RECVTTL: | |
427 | case IP_RECVTOS: | |
428 | case IP_RECVOPTS: | |
429 | case IP_RETOPTS: | |
430 | case IP_PASSSEC: | |
431 | case IP_RECVORIGDSTADDR: | |
432 | case IP_CHECKSUM: | |
433 | case IP_RECVFRAGSIZE: | |
434 | ||
435 | /* common stuff that need some love */ | |
436 | case IP_TOS: | |
437 | case IP_TTL: | |
438 | case IP_BIND_ADDRESS_NO_PORT: | |
439 | case IP_MTU_DISCOVER: | |
440 | case IP_RECVERR: | |
441 | ||
442 | /* possibly less common may deserve some love */ | |
443 | case IP_MINTTL: | |
444 | ||
445 | /* the following is apparently a no-op for plain TCP */ | |
446 | case IP_RECVERR_RFC4884: | |
447 | return true; | |
448 | } | |
449 | ||
450 | /* IP_OPTIONS is not supported, needs subflow care */ | |
451 | /* IP_HDRINCL, IP_NODEFRAG are not supported, RAW specific */ | |
452 | /* IP_MULTICAST_TTL, IP_MULTICAST_LOOP, IP_UNICAST_IF, | |
453 | * IP_ADD_MEMBERSHIP, IP_ADD_SOURCE_MEMBERSHIP, IP_DROP_MEMBERSHIP, | |
454 | * IP_DROP_SOURCE_MEMBERSHIP, IP_BLOCK_SOURCE, IP_UNBLOCK_SOURCE, | |
455 | * MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP MCAST_JOIN_SOURCE_GROUP, | |
456 | * MCAST_LEAVE_SOURCE_GROUP, MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, | |
457 | * MCAST_MSFILTER, IP_MULTICAST_ALL are not supported, better not deal | |
458 | * with mcast stuff | |
459 | */ | |
460 | /* IP_IPSEC_POLICY, IP_XFRM_POLICY are nut supported, unrelated here */ | |
461 | return false; | |
462 | } | |
463 | if (level == SOL_IPV6) { | |
464 | switch (optname) { | |
465 | case IPV6_V6ONLY: | |
466 | ||
467 | /* the following are control cmsg related */ | |
468 | case IPV6_RECVPKTINFO: | |
469 | case IPV6_2292PKTINFO: | |
470 | case IPV6_RECVHOPLIMIT: | |
471 | case IPV6_2292HOPLIMIT: | |
472 | case IPV6_RECVRTHDR: | |
473 | case IPV6_2292RTHDR: | |
474 | case IPV6_RECVHOPOPTS: | |
475 | case IPV6_2292HOPOPTS: | |
476 | case IPV6_RECVDSTOPTS: | |
477 | case IPV6_2292DSTOPTS: | |
478 | case IPV6_RECVTCLASS: | |
479 | case IPV6_FLOWINFO: | |
480 | case IPV6_RECVPATHMTU: | |
481 | case IPV6_RECVORIGDSTADDR: | |
482 | case IPV6_RECVFRAGSIZE: | |
483 | ||
484 | /* the following ones need some love but are quite common */ | |
485 | case IPV6_TCLASS: | |
486 | case IPV6_TRANSPARENT: | |
487 | case IPV6_FREEBIND: | |
488 | case IPV6_PKTINFO: | |
489 | case IPV6_2292PKTOPTIONS: | |
490 | case IPV6_UNICAST_HOPS: | |
491 | case IPV6_MTU_DISCOVER: | |
492 | case IPV6_MTU: | |
493 | case IPV6_RECVERR: | |
494 | case IPV6_FLOWINFO_SEND: | |
495 | case IPV6_FLOWLABEL_MGR: | |
496 | case IPV6_MINHOPCOUNT: | |
497 | case IPV6_DONTFRAG: | |
498 | case IPV6_AUTOFLOWLABEL: | |
499 | ||
500 | /* the following one is a no-op for plain TCP */ | |
501 | case IPV6_RECVERR_RFC4884: | |
502 | return true; | |
503 | } | |
504 | ||
505 | /* IPV6_HOPOPTS, IPV6_RTHDRDSTOPTS, IPV6_RTHDR, IPV6_DSTOPTS are | |
506 | * not supported | |
507 | */ | |
508 | /* IPV6_MULTICAST_HOPS, IPV6_MULTICAST_LOOP, IPV6_UNICAST_IF, | |
509 | * IPV6_MULTICAST_IF, IPV6_ADDRFORM, | |
510 | * IPV6_ADD_MEMBERSHIP, IPV6_DROP_MEMBERSHIP, IPV6_JOIN_ANYCAST, | |
511 | * IPV6_LEAVE_ANYCAST, IPV6_MULTICAST_ALL, MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP, | |
512 | * MCAST_JOIN_SOURCE_GROUP, MCAST_LEAVE_SOURCE_GROUP, | |
513 | * MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, MCAST_MSFILTER | |
514 | * are not supported better not deal with mcast | |
515 | */ | |
516 | /* IPV6_ROUTER_ALERT, IPV6_ROUTER_ALERT_ISOLATE are not supported, since are evil */ | |
517 | ||
518 | /* IPV6_IPSEC_POLICY, IPV6_XFRM_POLICY are not supported */ | |
519 | /* IPV6_ADDR_PREFERENCES is not supported, we must be careful with subflows */ | |
520 | return false; | |
521 | } | |
522 | if (level == SOL_TCP) { | |
523 | switch (optname) { | |
524 | /* the following are no-op or should work just fine */ | |
525 | case TCP_THIN_DUPACK: | |
526 | case TCP_DEFER_ACCEPT: | |
527 | ||
528 | /* the following need some love */ | |
529 | case TCP_MAXSEG: | |
530 | case TCP_NODELAY: | |
531 | case TCP_THIN_LINEAR_TIMEOUTS: | |
532 | case TCP_CONGESTION: | |
533 | case TCP_ULP: | |
534 | case TCP_CORK: | |
535 | case TCP_KEEPIDLE: | |
536 | case TCP_KEEPINTVL: | |
537 | case TCP_KEEPCNT: | |
538 | case TCP_SYNCNT: | |
539 | case TCP_SAVE_SYN: | |
540 | case TCP_LINGER2: | |
541 | case TCP_WINDOW_CLAMP: | |
542 | case TCP_QUICKACK: | |
543 | case TCP_USER_TIMEOUT: | |
544 | case TCP_TIMESTAMP: | |
545 | case TCP_NOTSENT_LOWAT: | |
546 | case TCP_TX_DELAY: | |
547 | return true; | |
548 | } | |
549 | ||
550 | /* TCP_MD5SIG, TCP_MD5SIG_EXT are not supported, MD5 is not compatible with MPTCP */ | |
551 | ||
552 | /* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS, | |
553 | * TCP_REPAIR_WINDOW are not supported, better avoid this mess | |
554 | */ | |
555 | /* TCP_FASTOPEN_KEY, TCP_FASTOPEN TCP_FASTOPEN_CONNECT, TCP_FASTOPEN_NO_COOKIE, | |
556 | * are not supported fastopen is currently unsupported | |
557 | */ | |
558 | /* TCP_INQ is currently unsupported, needs some recvmsg work */ | |
559 | } | |
560 | return false; | |
561 | } | |
562 | ||
aa1fbd94 FW |
563 | static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t optval, |
564 | unsigned int optlen) | |
565 | { | |
566 | struct mptcp_subflow_context *subflow; | |
567 | struct sock *sk = (struct sock *)msk; | |
568 | char name[TCP_CA_NAME_MAX]; | |
569 | bool cap_net_admin; | |
570 | int ret; | |
571 | ||
572 | if (optlen < 1) | |
573 | return -EINVAL; | |
574 | ||
575 | ret = strncpy_from_sockptr(name, optval, | |
576 | min_t(long, TCP_CA_NAME_MAX - 1, optlen)); | |
577 | if (ret < 0) | |
578 | return -EFAULT; | |
579 | ||
580 | name[ret] = 0; | |
581 | ||
582 | cap_net_admin = ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN); | |
583 | ||
584 | ret = 0; | |
585 | lock_sock(sk); | |
586 | sockopt_seq_inc(msk); | |
587 | mptcp_for_each_subflow(msk, subflow) { | |
588 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
589 | int err; | |
590 | ||
591 | lock_sock(ssk); | |
592 | err = tcp_set_congestion_control(ssk, name, true, cap_net_admin); | |
593 | if (err < 0 && ret == 0) | |
594 | ret = err; | |
595 | subflow->setsockopt_seq = msk->setsockopt_seq; | |
596 | release_sock(ssk); | |
597 | } | |
598 | ||
599 | if (ret == 0) | |
20b5759f | 600 | strcpy(msk->ca_name, name); |
aa1fbd94 FW |
601 | |
602 | release_sock(sk); | |
603 | return ret; | |
604 | } | |
605 | ||
606 | static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, | |
607 | sockptr_t optval, unsigned int optlen) | |
608 | { | |
609 | switch (optname) { | |
610 | case TCP_ULP: | |
611 | return -EOPNOTSUPP; | |
612 | case TCP_CONGESTION: | |
613 | return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen); | |
614 | } | |
615 | ||
616 | return -EOPNOTSUPP; | |
617 | } | |
618 | ||
0abdde82 PA |
619 | int mptcp_setsockopt(struct sock *sk, int level, int optname, |
620 | sockptr_t optval, unsigned int optlen) | |
621 | { | |
622 | struct mptcp_sock *msk = mptcp_sk(sk); | |
623 | struct sock *ssk; | |
624 | ||
625 | pr_debug("msk=%p", msk); | |
626 | ||
d9e4c129 PA |
627 | if (!mptcp_supported_sockopt(level, optname)) |
628 | return -ENOPROTOOPT; | |
629 | ||
0abdde82 PA |
630 | if (level == SOL_SOCKET) |
631 | return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen); | |
632 | ||
633 | /* @@ the meaning of setsockopt() when the socket is connected and | |
634 | * there are multiple subflows is not yet defined. It is up to the | |
635 | * MPTCP-level socket to configure the subflows until the subflow | |
636 | * is in TCP fallback, when TCP socket options are passed through | |
637 | * to the one remaining subflow. | |
638 | */ | |
639 | lock_sock(sk); | |
640 | ssk = __mptcp_tcp_fallback(msk); | |
641 | release_sock(sk); | |
642 | if (ssk) | |
643 | return tcp_setsockopt(ssk, level, optname, optval, optlen); | |
644 | ||
645 | if (level == SOL_IPV6) | |
646 | return mptcp_setsockopt_v6(msk, optname, optval, optlen); | |
647 | ||
aa1fbd94 FW |
648 | if (level == SOL_TCP) |
649 | return mptcp_setsockopt_sol_tcp(msk, optname, optval, optlen); | |
650 | ||
651 | return -EOPNOTSUPP; | |
652 | } | |
653 | ||
654 | static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, | |
655 | char __user *optval, int __user *optlen) | |
656 | { | |
657 | struct sock *sk = (struct sock *)msk; | |
658 | struct socket *ssock; | |
659 | int ret = -EINVAL; | |
660 | struct sock *ssk; | |
661 | ||
662 | lock_sock(sk); | |
663 | ssk = msk->first; | |
664 | if (ssk) { | |
665 | ret = tcp_getsockopt(ssk, level, optname, optval, optlen); | |
666 | goto out; | |
667 | } | |
668 | ||
669 | ssock = __mptcp_nmpc_socket(msk); | |
670 | if (!ssock) | |
671 | goto out; | |
672 | ||
673 | ret = tcp_getsockopt(ssock->sk, level, optname, optval, optlen); | |
674 | ||
675 | out: | |
676 | release_sock(sk); | |
677 | return ret; | |
678 | } | |
679 | ||
680 | static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, | |
681 | char __user *optval, int __user *optlen) | |
682 | { | |
683 | switch (optname) { | |
684 | case TCP_ULP: | |
685 | case TCP_CONGESTION: | |
686 | case TCP_INFO: | |
687 | case TCP_CC_INFO: | |
688 | return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname, | |
689 | optval, optlen); | |
690 | } | |
0abdde82 PA |
691 | return -EOPNOTSUPP; |
692 | } | |
693 | ||
694 | int mptcp_getsockopt(struct sock *sk, int level, int optname, | |
695 | char __user *optval, int __user *option) | |
696 | { | |
697 | struct mptcp_sock *msk = mptcp_sk(sk); | |
698 | struct sock *ssk; | |
699 | ||
700 | pr_debug("msk=%p", msk); | |
701 | ||
702 | /* @@ the meaning of setsockopt() when the socket is connected and | |
703 | * there are multiple subflows is not yet defined. It is up to the | |
704 | * MPTCP-level socket to configure the subflows until the subflow | |
705 | * is in TCP fallback, when socket options are passed through | |
706 | * to the one remaining subflow. | |
707 | */ | |
708 | lock_sock(sk); | |
709 | ssk = __mptcp_tcp_fallback(msk); | |
710 | release_sock(sk); | |
711 | if (ssk) | |
712 | return tcp_getsockopt(ssk, level, optname, optval, option); | |
713 | ||
aa1fbd94 FW |
714 | if (level == SOL_TCP) |
715 | return mptcp_getsockopt_sol_tcp(msk, optname, optval, option); | |
0abdde82 PA |
716 | return -EOPNOTSUPP; |
717 | } | |
718 | ||
1b3e7ede FW |
719 | static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) |
720 | { | |
5d0a6bc8 | 721 | static const unsigned int tx_rx_locks = SOCK_RCVBUF_LOCK | SOCK_SNDBUF_LOCK; |
1b3e7ede FW |
722 | struct sock *sk = (struct sock *)msk; |
723 | ||
724 | if (ssk->sk_prot->keepalive) { | |
725 | if (sock_flag(sk, SOCK_KEEPOPEN)) | |
726 | ssk->sk_prot->keepalive(ssk, 1); | |
727 | else | |
728 | ssk->sk_prot->keepalive(ssk, 0); | |
729 | } | |
730 | ||
731 | ssk->sk_priority = sk->sk_priority; | |
5d0a6bc8 FW |
732 | ssk->sk_bound_dev_if = sk->sk_bound_dev_if; |
733 | ssk->sk_incoming_cpu = sk->sk_incoming_cpu; | |
734 | ||
735 | if (sk->sk_userlocks & tx_rx_locks) { | |
736 | ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks; | |
737 | if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) | |
738 | WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); | |
739 | if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) | |
740 | WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); | |
741 | } | |
742 | ||
743 | if (sock_flag(sk, SOCK_LINGER)) { | |
744 | ssk->sk_lingertime = sk->sk_lingertime; | |
745 | sock_set_flag(ssk, SOCK_LINGER); | |
746 | } else { | |
747 | sock_reset_flag(ssk, SOCK_LINGER); | |
748 | } | |
749 | ||
750 | if (sk->sk_mark != ssk->sk_mark) { | |
751 | ssk->sk_mark = sk->sk_mark; | |
752 | sk_dst_reset(ssk); | |
753 | } | |
754 | ||
755 | sock_valbool_flag(ssk, SOCK_DBG, sock_flag(sk, SOCK_DBG)); | |
756 | ||
757 | if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops) | |
20b5759f | 758 | tcp_set_congestion_control(ssk, msk->ca_name, false, true); |
1b3e7ede FW |
759 | } |
760 | ||
df00b087 FW |
761 | static void __mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk) |
762 | { | |
1b3e7ede FW |
763 | bool slow = lock_sock_fast(ssk); |
764 | ||
765 | sync_socket_options(msk, ssk); | |
766 | ||
767 | unlock_sock_fast(ssk, slow); | |
df00b087 FW |
768 | } |
769 | ||
78962489 FW |
770 | void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk) |
771 | { | |
df00b087 FW |
772 | struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); |
773 | ||
78962489 | 774 | msk_owned_by_me(msk); |
df00b087 FW |
775 | |
776 | if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) { | |
777 | __mptcp_sockopt_sync(msk, ssk); | |
778 | ||
779 | subflow->setsockopt_seq = msk->setsockopt_seq; | |
780 | } | |
78962489 FW |
781 | } |
782 | ||
783 | void mptcp_sockopt_sync_all(struct mptcp_sock *msk) | |
784 | { | |
785 | struct mptcp_subflow_context *subflow; | |
df00b087 FW |
786 | struct sock *sk = (struct sock *)msk; |
787 | u32 seq; | |
78962489 | 788 | |
df00b087 | 789 | seq = sockopt_seq_reset(sk); |
78962489 FW |
790 | |
791 | mptcp_for_each_subflow(msk, subflow) { | |
792 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
df00b087 | 793 | u32 sseq = READ_ONCE(subflow->setsockopt_seq); |
78962489 | 794 | |
df00b087 FW |
795 | if (sseq != msk->setsockopt_seq) { |
796 | __mptcp_sockopt_sync(msk, ssk); | |
797 | WRITE_ONCE(subflow->setsockopt_seq, seq); | |
798 | } else if (sseq != seq) { | |
799 | WRITE_ONCE(subflow->setsockopt_seq, seq); | |
800 | } | |
78962489 FW |
801 | |
802 | cond_resched(); | |
803 | } | |
df00b087 FW |
804 | |
805 | msk->setsockopt_seq = seq; | |
78962489 | 806 | } |