Commit | Line | Data |
---|---|---|
0abdde82 PA |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Multipath TCP | |
3 | * | |
4 | * Copyright (c) 2021, Red Hat. | |
5 | */ | |
6 | ||
7 | #define pr_fmt(fmt) "MPTCP: " fmt | |
8 | ||
9 | #include <linux/kernel.h> | |
10 | #include <linux/module.h> | |
11 | #include <net/sock.h> | |
12 | #include <net/protocol.h> | |
13 | #include <net/tcp.h> | |
14 | #include <net/mptcp.h> | |
15 | #include "protocol.h" | |
16 | ||
49243207 PA |
17 | #define MIN_INFO_OPTLEN_SIZE 16 |
18 | #define MIN_FULL_INFO_OPTLEN_SIZE 40 | |
06f15cee | 19 | |
0abdde82 PA |
20 | static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk) |
21 | { | |
109cdeb8 | 22 | msk_owned_by_me(msk); |
0abdde82 PA |
23 | |
24 | if (likely(!__mptcp_check_fallback(msk))) | |
25 | return NULL; | |
26 | ||
27 | return msk->first; | |
28 | } | |
29 | ||
df00b087 FW |
30 | static u32 sockopt_seq_reset(const struct sock *sk) |
31 | { | |
32 | sock_owned_by_me(sk); | |
33 | ||
34 | /* Highbits contain state. Allows to distinguish sockopt_seq | |
35 | * of listener and established: | |
36 | * s0 = new_listener() | |
37 | * sockopt(s0) - seq is 1 | |
38 | * s1 = accept(s0) - s1 inherits seq 1 if listener sk (s0) | |
39 | * sockopt(s0) - seq increments to 2 on s0 | |
40 | * sockopt(s1) // seq increments to 2 on s1 (different option) | |
41 | * new ssk completes join, inherits options from s0 // seq 2 | |
42 | * Needs sync from mptcp join logic, but ssk->seq == msk->seq | |
43 | * | |
44 | * Set High order bits to sk_state so ssk->seq == msk->seq test | |
45 | * will fail. | |
46 | */ | |
47 | ||
48 | return (u32)sk->sk_state << 24u; | |
49 | } | |
50 | ||
1b3e7ede FW |
51 | static void sockopt_seq_inc(struct mptcp_sock *msk) |
52 | { | |
53 | u32 seq = (msk->setsockopt_seq + 1) & 0x00ffffff; | |
54 | ||
55 | msk->setsockopt_seq = sockopt_seq_reset((struct sock *)msk) + seq; | |
56 | } | |
57 | ||
58 | static int mptcp_get_int_option(struct mptcp_sock *msk, sockptr_t optval, | |
59 | unsigned int optlen, int *val) | |
60 | { | |
61 | if (optlen < sizeof(int)) | |
62 | return -EINVAL; | |
63 | ||
64 | if (copy_from_sockptr(val, optval, sizeof(*val))) | |
65 | return -EFAULT; | |
66 | ||
67 | return 0; | |
68 | } | |
69 | ||
70 | static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, int val) | |
71 | { | |
72 | struct mptcp_subflow_context *subflow; | |
73 | struct sock *sk = (struct sock *)msk; | |
74 | ||
75 | lock_sock(sk); | |
76 | sockopt_seq_inc(msk); | |
77 | ||
78 | mptcp_for_each_subflow(msk, subflow) { | |
79 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
80 | bool slow = lock_sock_fast(ssk); | |
81 | ||
82 | switch (optname) { | |
a03c99b2 FW |
83 | case SO_DEBUG: |
84 | sock_valbool_flag(ssk, SOCK_DBG, !!val); | |
85 | break; | |
1b3e7ede FW |
86 | case SO_KEEPALIVE: |
87 | if (ssk->sk_prot->keepalive) | |
88 | ssk->sk_prot->keepalive(ssk, !!val); | |
89 | sock_valbool_flag(ssk, SOCK_KEEPOPEN, !!val); | |
90 | break; | |
91 | case SO_PRIORITY: | |
10bbf165 | 92 | WRITE_ONCE(ssk->sk_priority, val); |
1b3e7ede | 93 | break; |
5d0a6bc8 FW |
94 | case SO_SNDBUF: |
95 | case SO_SNDBUFFORCE: | |
96 | ssk->sk_userlocks |= SOCK_SNDBUF_LOCK; | |
97 | WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); | |
98 | break; | |
99 | case SO_RCVBUF: | |
100 | case SO_RCVBUFFORCE: | |
101 | ssk->sk_userlocks |= SOCK_RCVBUF_LOCK; | |
102 | WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); | |
103 | break; | |
36704413 FW |
104 | case SO_MARK: |
105 | if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) { | |
3c5b4d69 | 106 | WRITE_ONCE(ssk->sk_mark, sk->sk_mark); |
36704413 FW |
107 | sk_dst_reset(ssk); |
108 | } | |
109 | break; | |
6f0d7198 FW |
110 | case SO_INCOMING_CPU: |
111 | WRITE_ONCE(ssk->sk_incoming_cpu, val); | |
112 | break; | |
1b3e7ede FW |
113 | } |
114 | ||
115 | subflow->setsockopt_seq = msk->setsockopt_seq; | |
116 | unlock_sock_fast(ssk, slow); | |
117 | } | |
118 | ||
119 | release_sock(sk); | |
120 | } | |
121 | ||
122 | static int mptcp_sol_socket_intval(struct mptcp_sock *msk, int optname, int val) | |
123 | { | |
124 | sockptr_t optval = KERNEL_SOCKPTR(&val); | |
125 | struct sock *sk = (struct sock *)msk; | |
126 | int ret; | |
127 | ||
128 | ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, | |
129 | optval, sizeof(val)); | |
130 | if (ret) | |
131 | return ret; | |
132 | ||
133 | mptcp_sol_socket_sync_intval(msk, optname, val); | |
134 | return 0; | |
135 | } | |
136 | ||
6f0d7198 FW |
137 | static void mptcp_so_incoming_cpu(struct mptcp_sock *msk, int val) |
138 | { | |
139 | struct sock *sk = (struct sock *)msk; | |
140 | ||
141 | WRITE_ONCE(sk->sk_incoming_cpu, val); | |
142 | ||
143 | mptcp_sol_socket_sync_intval(msk, SO_INCOMING_CPU, val); | |
144 | } | |
145 | ||
9061f24b FW |
146 | static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optname, int val) |
147 | { | |
148 | sockptr_t optval = KERNEL_SOCKPTR(&val); | |
149 | struct mptcp_subflow_context *subflow; | |
150 | struct sock *sk = (struct sock *)msk; | |
151 | int ret; | |
152 | ||
153 | ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, | |
154 | optval, sizeof(val)); | |
155 | if (ret) | |
156 | return ret; | |
157 | ||
158 | lock_sock(sk); | |
159 | mptcp_for_each_subflow(msk, subflow) { | |
160 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
161 | bool slow = lock_sock_fast(ssk); | |
162 | ||
6c9a0a0f | 163 | sock_set_timestamp(sk, optname, !!val); |
9061f24b FW |
164 | unlock_sock_fast(ssk, slow); |
165 | } | |
166 | ||
167 | release_sock(sk); | |
168 | return 0; | |
169 | } | |
170 | ||
1b3e7ede | 171 | static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname, |
6c9a0a0f YL |
172 | sockptr_t optval, |
173 | unsigned int optlen) | |
1b3e7ede FW |
174 | { |
175 | int val, ret; | |
176 | ||
177 | ret = mptcp_get_int_option(msk, optval, optlen, &val); | |
178 | if (ret) | |
179 | return ret; | |
180 | ||
181 | switch (optname) { | |
182 | case SO_KEEPALIVE: | |
183 | mptcp_sol_socket_sync_intval(msk, optname, val); | |
184 | return 0; | |
a03c99b2 | 185 | case SO_DEBUG: |
36704413 | 186 | case SO_MARK: |
1b3e7ede | 187 | case SO_PRIORITY: |
5d0a6bc8 FW |
188 | case SO_SNDBUF: |
189 | case SO_SNDBUFFORCE: | |
190 | case SO_RCVBUF: | |
191 | case SO_RCVBUFFORCE: | |
1b3e7ede | 192 | return mptcp_sol_socket_intval(msk, optname, val); |
6f0d7198 FW |
193 | case SO_INCOMING_CPU: |
194 | mptcp_so_incoming_cpu(msk, val); | |
195 | return 0; | |
9061f24b FW |
196 | case SO_TIMESTAMP_OLD: |
197 | case SO_TIMESTAMP_NEW: | |
198 | case SO_TIMESTAMPNS_OLD: | |
199 | case SO_TIMESTAMPNS_NEW: | |
9061f24b | 200 | return mptcp_setsockopt_sol_socket_tstamp(msk, optname, val); |
1b3e7ede FW |
201 | } |
202 | ||
203 | return -ENOPROTOOPT; | |
204 | } | |
205 | ||
6c9a0a0f YL |
206 | static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk, |
207 | int optname, | |
208 | sockptr_t optval, | |
209 | unsigned int optlen) | |
210 | { | |
211 | struct mptcp_subflow_context *subflow; | |
212 | struct sock *sk = (struct sock *)msk; | |
d463126e YL |
213 | struct so_timestamping timestamping; |
214 | int ret; | |
6c9a0a0f | 215 | |
d463126e YL |
216 | if (optlen == sizeof(timestamping)) { |
217 | if (copy_from_sockptr(×tamping, optval, | |
218 | sizeof(timestamping))) | |
219 | return -EFAULT; | |
220 | } else if (optlen == sizeof(int)) { | |
221 | memset(×tamping, 0, sizeof(timestamping)); | |
222 | ||
223 | if (copy_from_sockptr(×tamping.flags, optval, sizeof(int))) | |
224 | return -EFAULT; | |
225 | } else { | |
226 | return -EINVAL; | |
227 | } | |
6c9a0a0f YL |
228 | |
229 | ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, | |
d463126e YL |
230 | KERNEL_SOCKPTR(×tamping), |
231 | sizeof(timestamping)); | |
6c9a0a0f YL |
232 | if (ret) |
233 | return ret; | |
234 | ||
235 | lock_sock(sk); | |
236 | ||
237 | mptcp_for_each_subflow(msk, subflow) { | |
238 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
239 | bool slow = lock_sock_fast(ssk); | |
240 | ||
d463126e | 241 | sock_set_timestamping(sk, optname, timestamping); |
6c9a0a0f YL |
242 | unlock_sock_fast(ssk, slow); |
243 | } | |
244 | ||
245 | release_sock(sk); | |
246 | ||
247 | return 0; | |
248 | } | |
249 | ||
268b1238 FW |
250 | static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t optval, |
251 | unsigned int optlen) | |
252 | { | |
253 | struct mptcp_subflow_context *subflow; | |
254 | struct sock *sk = (struct sock *)msk; | |
255 | struct linger ling; | |
256 | sockptr_t kopt; | |
257 | int ret; | |
258 | ||
259 | if (optlen < sizeof(ling)) | |
260 | return -EINVAL; | |
261 | ||
262 | if (copy_from_sockptr(&ling, optval, sizeof(ling))) | |
263 | return -EFAULT; | |
264 | ||
265 | kopt = KERNEL_SOCKPTR(&ling); | |
266 | ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, SO_LINGER, kopt, sizeof(ling)); | |
267 | if (ret) | |
268 | return ret; | |
269 | ||
270 | lock_sock(sk); | |
271 | sockopt_seq_inc(msk); | |
272 | mptcp_for_each_subflow(msk, subflow) { | |
273 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
274 | bool slow = lock_sock_fast(ssk); | |
275 | ||
276 | if (!ling.l_onoff) { | |
277 | sock_reset_flag(ssk, SOCK_LINGER); | |
278 | } else { | |
279 | ssk->sk_lingertime = sk->sk_lingertime; | |
280 | sock_set_flag(ssk, SOCK_LINGER); | |
281 | } | |
282 | ||
283 | subflow->setsockopt_seq = msk->setsockopt_seq; | |
284 | unlock_sock_fast(ssk, slow); | |
285 | } | |
286 | ||
287 | release_sock(sk); | |
288 | return 0; | |
289 | } | |
290 | ||
0abdde82 PA |
291 | static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, |
292 | sockptr_t optval, unsigned int optlen) | |
293 | { | |
294 | struct sock *sk = (struct sock *)msk; | |
f0bc514b | 295 | struct sock *ssk; |
0abdde82 PA |
296 | int ret; |
297 | ||
298 | switch (optname) { | |
299 | case SO_REUSEPORT: | |
300 | case SO_REUSEADDR: | |
5d0a6bc8 FW |
301 | case SO_BINDTODEVICE: |
302 | case SO_BINDTOIFINDEX: | |
0abdde82 | 303 | lock_sock(sk); |
3f326a82 PA |
304 | ssk = __mptcp_nmpc_sk(msk); |
305 | if (IS_ERR(ssk)) { | |
0abdde82 | 306 | release_sock(sk); |
3f326a82 | 307 | return PTR_ERR(ssk); |
0abdde82 PA |
308 | } |
309 | ||
f0bc514b | 310 | ret = sk_setsockopt(ssk, SOL_SOCKET, optname, optval, optlen); |
0abdde82 PA |
311 | if (ret == 0) { |
312 | if (optname == SO_REUSEPORT) | |
f0bc514b | 313 | sk->sk_reuseport = ssk->sk_reuseport; |
0abdde82 | 314 | else if (optname == SO_REUSEADDR) |
f0bc514b | 315 | sk->sk_reuse = ssk->sk_reuse; |
5d0a6bc8 | 316 | else if (optname == SO_BINDTODEVICE) |
f0bc514b | 317 | sk->sk_bound_dev_if = ssk->sk_bound_dev_if; |
5d0a6bc8 | 318 | else if (optname == SO_BINDTOIFINDEX) |
f0bc514b | 319 | sk->sk_bound_dev_if = ssk->sk_bound_dev_if; |
0abdde82 PA |
320 | } |
321 | release_sock(sk); | |
322 | return ret; | |
1b3e7ede FW |
323 | case SO_KEEPALIVE: |
324 | case SO_PRIORITY: | |
5d0a6bc8 FW |
325 | case SO_SNDBUF: |
326 | case SO_SNDBUFFORCE: | |
327 | case SO_RCVBUF: | |
328 | case SO_RCVBUFFORCE: | |
36704413 | 329 | case SO_MARK: |
6f0d7198 | 330 | case SO_INCOMING_CPU: |
a03c99b2 | 331 | case SO_DEBUG: |
9061f24b FW |
332 | case SO_TIMESTAMP_OLD: |
333 | case SO_TIMESTAMP_NEW: | |
334 | case SO_TIMESTAMPNS_OLD: | |
335 | case SO_TIMESTAMPNS_NEW: | |
6c9a0a0f YL |
336 | return mptcp_setsockopt_sol_socket_int(msk, optname, optval, |
337 | optlen); | |
9061f24b FW |
338 | case SO_TIMESTAMPING_OLD: |
339 | case SO_TIMESTAMPING_NEW: | |
6c9a0a0f YL |
340 | return mptcp_setsockopt_sol_socket_timestamping(msk, optname, |
341 | optval, optlen); | |
268b1238 FW |
342 | case SO_LINGER: |
343 | return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen); | |
7a009a70 FW |
344 | case SO_RCVLOWAT: |
345 | case SO_RCVTIMEO_OLD: | |
346 | case SO_RCVTIMEO_NEW: | |
d6ab5ea2 GT |
347 | case SO_SNDTIMEO_OLD: |
348 | case SO_SNDTIMEO_NEW: | |
7a009a70 FW |
349 | case SO_BUSY_POLL: |
350 | case SO_PREFER_BUSY_POLL: | |
351 | case SO_BUSY_POLL_BUDGET: | |
352 | /* No need to copy: only relevant for msk */ | |
353 | return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen); | |
a03c99b2 FW |
354 | case SO_NO_CHECK: |
355 | case SO_DONTROUTE: | |
356 | case SO_BROADCAST: | |
357 | case SO_BSDCOMPAT: | |
358 | case SO_PASSCRED: | |
5e2ff670 | 359 | case SO_PASSPIDFD: |
a03c99b2 FW |
360 | case SO_PASSSEC: |
361 | case SO_RXQ_OVFL: | |
362 | case SO_WIFI_STATUS: | |
363 | case SO_NOFCS: | |
364 | case SO_SELECT_ERR_QUEUE: | |
365 | return 0; | |
0abdde82 PA |
366 | } |
367 | ||
7a009a70 FW |
368 | /* SO_OOBINLINE is not supported, let's avoid the related mess |
369 | * SO_ATTACH_FILTER, SO_ATTACH_BPF, SO_ATTACH_REUSEPORT_CBPF, | |
370 | * SO_DETACH_REUSEPORT_BPF, SO_DETACH_FILTER, SO_LOCK_FILTER, | |
371 | * we must be careful with subflows | |
372 | * | |
373 | * SO_ATTACH_REUSEPORT_EBPF is not supported, at it checks | |
374 | * explicitly the sk_protocol field | |
375 | * | |
376 | * SO_PEEK_OFF is unsupported, as it is for plain TCP | |
377 | * SO_MAX_PACING_RATE is unsupported, we must be careful with subflows | |
378 | * SO_CNX_ADVICE is currently unsupported, could possibly be relevant, | |
379 | * but likely needs careful design | |
380 | * | |
381 | * SO_ZEROCOPY is currently unsupported, TODO in sndmsg | |
382 | * SO_TXTIME is currently unsupported | |
383 | */ | |
384 | ||
385 | return -EOPNOTSUPP; | |
0abdde82 PA |
386 | } |
387 | ||
388 | static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, | |
389 | sockptr_t optval, unsigned int optlen) | |
390 | { | |
391 | struct sock *sk = (struct sock *)msk; | |
392 | int ret = -EOPNOTSUPP; | |
f0bc514b | 393 | struct sock *ssk; |
0abdde82 PA |
394 | |
395 | switch (optname) { | |
396 | case IPV6_V6ONLY: | |
c9406a23 FW |
397 | case IPV6_TRANSPARENT: |
398 | case IPV6_FREEBIND: | |
0abdde82 | 399 | lock_sock(sk); |
3f326a82 PA |
400 | ssk = __mptcp_nmpc_sk(msk); |
401 | if (IS_ERR(ssk)) { | |
0abdde82 | 402 | release_sock(sk); |
3f326a82 | 403 | return PTR_ERR(ssk); |
0abdde82 PA |
404 | } |
405 | ||
f0bc514b | 406 | ret = tcp_setsockopt(ssk, SOL_IPV6, optname, optval, optlen); |
c9406a23 FW |
407 | if (ret != 0) { |
408 | release_sock(sk); | |
409 | return ret; | |
410 | } | |
411 | ||
412 | sockopt_seq_inc(msk); | |
413 | ||
414 | switch (optname) { | |
415 | case IPV6_V6ONLY: | |
f0bc514b | 416 | sk->sk_ipv6only = ssk->sk_ipv6only; |
c9406a23 FW |
417 | break; |
418 | case IPV6_TRANSPARENT: | |
4bd0623f ED |
419 | inet_assign_bit(TRANSPARENT, sk, |
420 | inet_test_bit(TRANSPARENT, ssk)); | |
c9406a23 FW |
421 | break; |
422 | case IPV6_FREEBIND: | |
3f7e7532 ED |
423 | inet_assign_bit(FREEBIND, sk, |
424 | inet_test_bit(FREEBIND, ssk)); | |
c9406a23 FW |
425 | break; |
426 | } | |
0abdde82 PA |
427 | |
428 | release_sock(sk); | |
429 | break; | |
430 | } | |
431 | ||
432 | return ret; | |
433 | } | |
434 | ||
d9e4c129 PA |
435 | static bool mptcp_supported_sockopt(int level, int optname) |
436 | { | |
d9e4c129 PA |
437 | if (level == SOL_IP) { |
438 | switch (optname) { | |
439 | /* should work fine */ | |
440 | case IP_FREEBIND: | |
441 | case IP_TRANSPARENT: | |
442 | ||
443 | /* the following are control cmsg related */ | |
444 | case IP_PKTINFO: | |
445 | case IP_RECVTTL: | |
446 | case IP_RECVTOS: | |
447 | case IP_RECVOPTS: | |
448 | case IP_RETOPTS: | |
449 | case IP_PASSSEC: | |
450 | case IP_RECVORIGDSTADDR: | |
451 | case IP_CHECKSUM: | |
452 | case IP_RECVFRAGSIZE: | |
453 | ||
454 | /* common stuff that need some love */ | |
455 | case IP_TOS: | |
456 | case IP_TTL: | |
457 | case IP_BIND_ADDRESS_NO_PORT: | |
458 | case IP_MTU_DISCOVER: | |
459 | case IP_RECVERR: | |
460 | ||
461 | /* possibly less common may deserve some love */ | |
462 | case IP_MINTTL: | |
463 | ||
464 | /* the following is apparently a no-op for plain TCP */ | |
465 | case IP_RECVERR_RFC4884: | |
466 | return true; | |
467 | } | |
468 | ||
469 | /* IP_OPTIONS is not supported, needs subflow care */ | |
470 | /* IP_HDRINCL, IP_NODEFRAG are not supported, RAW specific */ | |
471 | /* IP_MULTICAST_TTL, IP_MULTICAST_LOOP, IP_UNICAST_IF, | |
472 | * IP_ADD_MEMBERSHIP, IP_ADD_SOURCE_MEMBERSHIP, IP_DROP_MEMBERSHIP, | |
473 | * IP_DROP_SOURCE_MEMBERSHIP, IP_BLOCK_SOURCE, IP_UNBLOCK_SOURCE, | |
474 | * MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP MCAST_JOIN_SOURCE_GROUP, | |
475 | * MCAST_LEAVE_SOURCE_GROUP, MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, | |
476 | * MCAST_MSFILTER, IP_MULTICAST_ALL are not supported, better not deal | |
477 | * with mcast stuff | |
478 | */ | |
479 | /* IP_IPSEC_POLICY, IP_XFRM_POLICY are nut supported, unrelated here */ | |
480 | return false; | |
481 | } | |
482 | if (level == SOL_IPV6) { | |
483 | switch (optname) { | |
484 | case IPV6_V6ONLY: | |
485 | ||
486 | /* the following are control cmsg related */ | |
487 | case IPV6_RECVPKTINFO: | |
488 | case IPV6_2292PKTINFO: | |
489 | case IPV6_RECVHOPLIMIT: | |
490 | case IPV6_2292HOPLIMIT: | |
491 | case IPV6_RECVRTHDR: | |
492 | case IPV6_2292RTHDR: | |
493 | case IPV6_RECVHOPOPTS: | |
494 | case IPV6_2292HOPOPTS: | |
495 | case IPV6_RECVDSTOPTS: | |
496 | case IPV6_2292DSTOPTS: | |
497 | case IPV6_RECVTCLASS: | |
498 | case IPV6_FLOWINFO: | |
499 | case IPV6_RECVPATHMTU: | |
500 | case IPV6_RECVORIGDSTADDR: | |
501 | case IPV6_RECVFRAGSIZE: | |
502 | ||
503 | /* the following ones need some love but are quite common */ | |
504 | case IPV6_TCLASS: | |
505 | case IPV6_TRANSPARENT: | |
506 | case IPV6_FREEBIND: | |
507 | case IPV6_PKTINFO: | |
508 | case IPV6_2292PKTOPTIONS: | |
509 | case IPV6_UNICAST_HOPS: | |
510 | case IPV6_MTU_DISCOVER: | |
511 | case IPV6_MTU: | |
512 | case IPV6_RECVERR: | |
513 | case IPV6_FLOWINFO_SEND: | |
514 | case IPV6_FLOWLABEL_MGR: | |
515 | case IPV6_MINHOPCOUNT: | |
516 | case IPV6_DONTFRAG: | |
517 | case IPV6_AUTOFLOWLABEL: | |
518 | ||
519 | /* the following one is a no-op for plain TCP */ | |
520 | case IPV6_RECVERR_RFC4884: | |
521 | return true; | |
522 | } | |
523 | ||
524 | /* IPV6_HOPOPTS, IPV6_RTHDRDSTOPTS, IPV6_RTHDR, IPV6_DSTOPTS are | |
525 | * not supported | |
526 | */ | |
527 | /* IPV6_MULTICAST_HOPS, IPV6_MULTICAST_LOOP, IPV6_UNICAST_IF, | |
528 | * IPV6_MULTICAST_IF, IPV6_ADDRFORM, | |
529 | * IPV6_ADD_MEMBERSHIP, IPV6_DROP_MEMBERSHIP, IPV6_JOIN_ANYCAST, | |
530 | * IPV6_LEAVE_ANYCAST, IPV6_MULTICAST_ALL, MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP, | |
531 | * MCAST_JOIN_SOURCE_GROUP, MCAST_LEAVE_SOURCE_GROUP, | |
532 | * MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, MCAST_MSFILTER | |
533 | * are not supported better not deal with mcast | |
534 | */ | |
535 | /* IPV6_ROUTER_ALERT, IPV6_ROUTER_ALERT_ISOLATE are not supported, since are evil */ | |
536 | ||
537 | /* IPV6_IPSEC_POLICY, IPV6_XFRM_POLICY are not supported */ | |
538 | /* IPV6_ADDR_PREFERENCES is not supported, we must be careful with subflows */ | |
539 | return false; | |
540 | } | |
541 | if (level == SOL_TCP) { | |
542 | switch (optname) { | |
543 | /* the following are no-op or should work just fine */ | |
544 | case TCP_THIN_DUPACK: | |
545 | case TCP_DEFER_ACCEPT: | |
546 | ||
547 | /* the following need some love */ | |
548 | case TCP_MAXSEG: | |
549 | case TCP_NODELAY: | |
550 | case TCP_THIN_LINEAR_TIMEOUTS: | |
551 | case TCP_CONGESTION: | |
d9e4c129 PA |
552 | case TCP_CORK: |
553 | case TCP_KEEPIDLE: | |
554 | case TCP_KEEPINTVL: | |
555 | case TCP_KEEPCNT: | |
556 | case TCP_SYNCNT: | |
557 | case TCP_SAVE_SYN: | |
558 | case TCP_LINGER2: | |
559 | case TCP_WINDOW_CLAMP: | |
560 | case TCP_QUICKACK: | |
561 | case TCP_USER_TIMEOUT: | |
562 | case TCP_TIMESTAMP: | |
563 | case TCP_NOTSENT_LOWAT: | |
564 | case TCP_TX_DELAY: | |
2c9e7765 | 565 | case TCP_INQ: |
4ffb0a02 | 566 | case TCP_FASTOPEN: |
54635bd0 | 567 | case TCP_FASTOPEN_CONNECT: |
cb99816c | 568 | case TCP_FASTOPEN_KEY: |
e64d4deb | 569 | case TCP_FASTOPEN_NO_COOKIE: |
d9e4c129 PA |
570 | return true; |
571 | } | |
572 | ||
573 | /* TCP_MD5SIG, TCP_MD5SIG_EXT are not supported, MD5 is not compatible with MPTCP */ | |
574 | ||
575 | /* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS, | |
576 | * TCP_REPAIR_WINDOW are not supported, better avoid this mess | |
577 | */ | |
d9e4c129 PA |
578 | } |
579 | return false; | |
580 | } | |
581 | ||
aa1fbd94 FW |
582 | static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t optval, |
583 | unsigned int optlen) | |
584 | { | |
585 | struct mptcp_subflow_context *subflow; | |
586 | struct sock *sk = (struct sock *)msk; | |
587 | char name[TCP_CA_NAME_MAX]; | |
588 | bool cap_net_admin; | |
589 | int ret; | |
590 | ||
591 | if (optlen < 1) | |
592 | return -EINVAL; | |
593 | ||
594 | ret = strncpy_from_sockptr(name, optval, | |
595 | min_t(long, TCP_CA_NAME_MAX - 1, optlen)); | |
596 | if (ret < 0) | |
597 | return -EFAULT; | |
598 | ||
599 | name[ret] = 0; | |
600 | ||
601 | cap_net_admin = ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN); | |
602 | ||
603 | ret = 0; | |
604 | lock_sock(sk); | |
605 | sockopt_seq_inc(msk); | |
606 | mptcp_for_each_subflow(msk, subflow) { | |
607 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
608 | int err; | |
609 | ||
610 | lock_sock(ssk); | |
611 | err = tcp_set_congestion_control(ssk, name, true, cap_net_admin); | |
612 | if (err < 0 && ret == 0) | |
613 | ret = err; | |
614 | subflow->setsockopt_seq = msk->setsockopt_seq; | |
615 | release_sock(ssk); | |
616 | } | |
617 | ||
618 | if (ret == 0) | |
20b5759f | 619 | strcpy(msk->ca_name, name); |
aa1fbd94 FW |
620 | |
621 | release_sock(sk); | |
622 | return ret; | |
623 | } | |
624 | ||
4f6e14bd MG |
625 | static int mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, sockptr_t optval, |
626 | unsigned int optlen) | |
627 | { | |
628 | struct mptcp_subflow_context *subflow; | |
629 | struct sock *sk = (struct sock *)msk; | |
630 | int val; | |
631 | ||
632 | if (optlen < sizeof(int)) | |
633 | return -EINVAL; | |
634 | ||
635 | if (copy_from_sockptr(&val, optval, sizeof(val))) | |
636 | return -EFAULT; | |
637 | ||
638 | lock_sock(sk); | |
639 | sockopt_seq_inc(msk); | |
640 | msk->cork = !!val; | |
641 | mptcp_for_each_subflow(msk, subflow) { | |
642 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
643 | ||
644 | lock_sock(ssk); | |
645 | __tcp_sock_set_cork(ssk, !!val); | |
646 | release_sock(ssk); | |
647 | } | |
648 | if (!val) | |
649 | mptcp_check_and_set_pending(sk); | |
650 | release_sock(sk); | |
651 | ||
652 | return 0; | |
653 | } | |
654 | ||
655 | static int mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, sockptr_t optval, | |
656 | unsigned int optlen) | |
657 | { | |
658 | struct mptcp_subflow_context *subflow; | |
659 | struct sock *sk = (struct sock *)msk; | |
660 | int val; | |
661 | ||
662 | if (optlen < sizeof(int)) | |
663 | return -EINVAL; | |
664 | ||
665 | if (copy_from_sockptr(&val, optval, sizeof(val))) | |
666 | return -EFAULT; | |
667 | ||
668 | lock_sock(sk); | |
669 | sockopt_seq_inc(msk); | |
670 | msk->nodelay = !!val; | |
671 | mptcp_for_each_subflow(msk, subflow) { | |
672 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
673 | ||
674 | lock_sock(ssk); | |
675 | __tcp_sock_set_nodelay(ssk, !!val); | |
676 | release_sock(ssk); | |
677 | } | |
678 | if (val) | |
679 | mptcp_check_and_set_pending(sk); | |
680 | release_sock(sk); | |
681 | ||
682 | return 0; | |
683 | } | |
684 | ||
c9406a23 FW |
685 | static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int optname, |
686 | sockptr_t optval, unsigned int optlen) | |
687 | { | |
688 | struct sock *sk = (struct sock *)msk; | |
3f326a82 | 689 | struct sock *ssk; |
c9406a23 FW |
690 | int err; |
691 | ||
692 | err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); | |
693 | if (err != 0) | |
694 | return err; | |
695 | ||
696 | lock_sock(sk); | |
697 | ||
3f326a82 PA |
698 | ssk = __mptcp_nmpc_sk(msk); |
699 | if (IS_ERR(ssk)) { | |
c9406a23 | 700 | release_sock(sk); |
3f326a82 | 701 | return PTR_ERR(ssk); |
c9406a23 FW |
702 | } |
703 | ||
c9406a23 FW |
704 | switch (optname) { |
705 | case IP_FREEBIND: | |
3f7e7532 | 706 | inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk)); |
c9406a23 FW |
707 | break; |
708 | case IP_TRANSPARENT: | |
4bd0623f ED |
709 | inet_assign_bit(TRANSPARENT, ssk, |
710 | inet_test_bit(TRANSPARENT, sk)); | |
c9406a23 FW |
711 | break; |
712 | default: | |
713 | release_sock(sk); | |
714 | WARN_ON_ONCE(1); | |
715 | return -EOPNOTSUPP; | |
716 | } | |
717 | ||
718 | sockopt_seq_inc(msk); | |
719 | release_sock(sk); | |
720 | return 0; | |
721 | } | |
722 | ||
ffcacff8 PS |
723 | static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname, |
724 | sockptr_t optval, unsigned int optlen) | |
725 | { | |
726 | struct mptcp_subflow_context *subflow; | |
727 | struct sock *sk = (struct sock *)msk; | |
728 | int err, val; | |
729 | ||
730 | err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); | |
731 | ||
732 | if (err != 0) | |
733 | return err; | |
734 | ||
735 | lock_sock(sk); | |
736 | sockopt_seq_inc(msk); | |
e08d0b3d | 737 | val = READ_ONCE(inet_sk(sk)->tos); |
ffcacff8 PS |
738 | mptcp_for_each_subflow(msk, subflow) { |
739 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
740 | ||
878d951c | 741 | __ip_sock_set_tos(ssk, val); |
ffcacff8 PS |
742 | } |
743 | release_sock(sk); | |
744 | ||
03e7d28c | 745 | return 0; |
ffcacff8 PS |
746 | } |
747 | ||
748 | static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname, | |
749 | sockptr_t optval, unsigned int optlen) | |
750 | { | |
751 | switch (optname) { | |
c9406a23 FW |
752 | case IP_FREEBIND: |
753 | case IP_TRANSPARENT: | |
754 | return mptcp_setsockopt_sol_ip_set_transparent(msk, optname, optval, optlen); | |
ffcacff8 PS |
755 | case IP_TOS: |
756 | return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen); | |
757 | } | |
758 | ||
759 | return -EOPNOTSUPP; | |
760 | } | |
761 | ||
d3d42904 MB |
762 | static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, |
763 | sockptr_t optval, unsigned int optlen) | |
54635bd0 | 764 | { |
21e43569 | 765 | struct sock *sk = (struct sock *)msk; |
3f326a82 | 766 | struct sock *ssk; |
ddb1a072 | 767 | int ret; |
54635bd0 | 768 | |
d3d42904 | 769 | /* Limit to first subflow, before the connection establishment */ |
21e43569 | 770 | lock_sock(sk); |
3f326a82 PA |
771 | ssk = __mptcp_nmpc_sk(msk); |
772 | if (IS_ERR(ssk)) { | |
773 | ret = PTR_ERR(ssk); | |
21e43569 | 774 | goto unlock; |
ddb1a072 | 775 | } |
54635bd0 | 776 | |
3f326a82 | 777 | ret = tcp_setsockopt(ssk, level, optname, optval, optlen); |
21e43569 PA |
778 | |
779 | unlock: | |
780 | release_sock(sk); | |
781 | return ret; | |
54635bd0 BH |
782 | } |
783 | ||
aa1fbd94 FW |
784 | static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, |
785 | sockptr_t optval, unsigned int optlen) | |
786 | { | |
2c9e7765 FW |
787 | struct sock *sk = (void *)msk; |
788 | int ret, val; | |
789 | ||
aa1fbd94 | 790 | switch (optname) { |
2c9e7765 FW |
791 | case TCP_INQ: |
792 | ret = mptcp_get_int_option(msk, optval, optlen, &val); | |
793 | if (ret) | |
794 | return ret; | |
795 | if (val < 0 || val > 1) | |
796 | return -EINVAL; | |
797 | ||
798 | lock_sock(sk); | |
799 | msk->recvmsg_inq = !!val; | |
800 | release_sock(sk); | |
801 | return 0; | |
aa1fbd94 FW |
802 | case TCP_ULP: |
803 | return -EOPNOTSUPP; | |
804 | case TCP_CONGESTION: | |
805 | return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen); | |
4f6e14bd MG |
806 | case TCP_CORK: |
807 | return mptcp_setsockopt_sol_tcp_cork(msk, optval, optlen); | |
808 | case TCP_NODELAY: | |
809 | return mptcp_setsockopt_sol_tcp_nodelay(msk, optval, optlen); | |
ea1e301d | 810 | case TCP_DEFER_ACCEPT: |
caea6467 MB |
811 | /* See tcp.c: TCP_DEFER_ACCEPT does not fail */ |
812 | mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen); | |
813 | return 0; | |
4ffb0a02 | 814 | case TCP_FASTOPEN: |
54635bd0 | 815 | case TCP_FASTOPEN_CONNECT: |
cb99816c | 816 | case TCP_FASTOPEN_KEY: |
e64d4deb | 817 | case TCP_FASTOPEN_NO_COOKIE: |
d3d42904 MB |
818 | return mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, |
819 | optval, optlen); | |
aa1fbd94 FW |
820 | } |
821 | ||
822 | return -EOPNOTSUPP; | |
823 | } | |
824 | ||
0abdde82 PA |
825 | int mptcp_setsockopt(struct sock *sk, int level, int optname, |
826 | sockptr_t optval, unsigned int optlen) | |
827 | { | |
828 | struct mptcp_sock *msk = mptcp_sk(sk); | |
829 | struct sock *ssk; | |
830 | ||
831 | pr_debug("msk=%p", msk); | |
832 | ||
833 | if (level == SOL_SOCKET) | |
834 | return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen); | |
835 | ||
7a009a70 FW |
836 | if (!mptcp_supported_sockopt(level, optname)) |
837 | return -ENOPROTOOPT; | |
838 | ||
0abdde82 PA |
839 | /* @@ the meaning of setsockopt() when the socket is connected and |
840 | * there are multiple subflows is not yet defined. It is up to the | |
841 | * MPTCP-level socket to configure the subflows until the subflow | |
842 | * is in TCP fallback, when TCP socket options are passed through | |
843 | * to the one remaining subflow. | |
844 | */ | |
845 | lock_sock(sk); | |
846 | ssk = __mptcp_tcp_fallback(msk); | |
847 | release_sock(sk); | |
848 | if (ssk) | |
849 | return tcp_setsockopt(ssk, level, optname, optval, optlen); | |
850 | ||
ffcacff8 PS |
851 | if (level == SOL_IP) |
852 | return mptcp_setsockopt_v4(msk, optname, optval, optlen); | |
853 | ||
0abdde82 PA |
854 | if (level == SOL_IPV6) |
855 | return mptcp_setsockopt_v6(msk, optname, optval, optlen); | |
856 | ||
aa1fbd94 FW |
857 | if (level == SOL_TCP) |
858 | return mptcp_setsockopt_sol_tcp(msk, optname, optval, optlen); | |
859 | ||
860 | return -EOPNOTSUPP; | |
861 | } | |
862 | ||
863 | static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, | |
864 | char __user *optval, int __user *optlen) | |
865 | { | |
866 | struct sock *sk = (struct sock *)msk; | |
aa1fbd94 | 867 | struct sock *ssk; |
f0bc514b | 868 | int ret; |
aa1fbd94 FW |
869 | |
870 | lock_sock(sk); | |
871 | ssk = msk->first; | |
872 | if (ssk) { | |
873 | ret = tcp_getsockopt(ssk, level, optname, optval, optlen); | |
874 | goto out; | |
875 | } | |
876 | ||
3f326a82 PA |
877 | ssk = __mptcp_nmpc_sk(msk); |
878 | if (IS_ERR(ssk)) { | |
879 | ret = PTR_ERR(ssk); | |
aa1fbd94 | 880 | goto out; |
ddb1a072 | 881 | } |
aa1fbd94 | 882 | |
f0bc514b | 883 | ret = tcp_getsockopt(ssk, level, optname, optval, optlen); |
aa1fbd94 FW |
884 | |
885 | out: | |
886 | release_sock(sk); | |
887 | return ret; | |
888 | } | |
889 | ||
61bc6e82 FW |
890 | void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) |
891 | { | |
38967f42 | 892 | struct sock *sk = (struct sock *)msk; |
61bc6e82 | 893 | u32 flags = 0; |
38967f42 | 894 | bool slow; |
61bc6e82 | 895 | |
55c42fa7 FW |
896 | memset(info, 0, sizeof(*info)); |
897 | ||
61bc6e82 FW |
898 | info->mptcpi_subflows = READ_ONCE(msk->pm.subflows); |
899 | info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled); | |
900 | info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted); | |
901 | info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used); | |
e925a032 | 902 | |
38967f42 PA |
903 | if (inet_sk_state_load(sk) == TCP_LISTEN) |
904 | return; | |
905 | ||
e925a032 MB |
906 | /* The following limits only make sense for the in-kernel PM */ |
907 | if (mptcp_pm_is_kernel(msk)) { | |
908 | info->mptcpi_subflows_max = | |
909 | mptcp_pm_get_subflows_max(msk); | |
910 | info->mptcpi_add_addr_signal_max = | |
911 | mptcp_pm_get_add_addr_signal_max(msk); | |
912 | info->mptcpi_add_addr_accepted_max = | |
913 | mptcp_pm_get_add_addr_accept_max(msk); | |
914 | info->mptcpi_local_addr_max = | |
915 | mptcp_pm_get_local_addr_max(msk); | |
916 | } | |
917 | ||
61bc6e82 FW |
918 | if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags)) |
919 | flags |= MPTCP_INFO_FLAG_FALLBACK; | |
920 | if (READ_ONCE(msk->can_ack)) | |
921 | flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED; | |
922 | info->mptcpi_flags = flags; | |
38967f42 PA |
923 | mptcp_data_lock(sk); |
924 | info->mptcpi_snd_una = msk->snd_una; | |
925 | info->mptcpi_rcv_nxt = msk->ack_seq; | |
926 | info->mptcpi_bytes_acked = msk->bytes_acked; | |
927 | mptcp_data_unlock(sk); | |
928 | ||
929 | slow = lock_sock_fast(sk); | |
930 | info->mptcpi_csum_enabled = msk->csum_enabled; | |
931 | info->mptcpi_token = msk->token; | |
932 | info->mptcpi_write_seq = msk->write_seq; | |
933 | info->mptcpi_retransmits = inet_csk(sk)->icsk_retransmits; | |
934 | info->mptcpi_bytes_sent = msk->bytes_sent; | |
935 | info->mptcpi_bytes_received = msk->bytes_received; | |
936 | info->mptcpi_bytes_retrans = msk->bytes_retrans; | |
937 | unlock_sock_fast(sk, slow); | |
61bc6e82 FW |
938 | } |
939 | EXPORT_SYMBOL_GPL(mptcp_diag_fill_info); | |
940 | ||
55c42fa7 FW |
941 | static int mptcp_getsockopt_info(struct mptcp_sock *msk, char __user *optval, int __user *optlen) |
942 | { | |
943 | struct mptcp_info m_info; | |
944 | int len; | |
945 | ||
946 | if (get_user(len, optlen)) | |
947 | return -EFAULT; | |
948 | ||
949 | len = min_t(unsigned int, len, sizeof(struct mptcp_info)); | |
950 | ||
951 | mptcp_diag_fill_info(msk, &m_info); | |
952 | ||
953 | if (put_user(len, optlen)) | |
954 | return -EFAULT; | |
955 | ||
956 | if (copy_to_user(optval, &m_info, len)) | |
957 | return -EFAULT; | |
958 | ||
959 | return 0; | |
960 | } | |
961 | ||
06f15cee FW |
962 | static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd, |
963 | char __user *optval, | |
964 | u32 copied, | |
965 | int __user *optlen) | |
966 | { | |
967 | u32 copylen = min_t(u32, sfd->size_subflow_data, sizeof(*sfd)); | |
968 | ||
969 | if (copied) | |
970 | copied += sfd->size_subflow_data; | |
971 | else | |
972 | copied = copylen; | |
973 | ||
974 | if (put_user(copied, optlen)) | |
975 | return -EFAULT; | |
976 | ||
977 | if (copy_to_user(optval, sfd, copylen)) | |
978 | return -EFAULT; | |
979 | ||
980 | return 0; | |
981 | } | |
982 | ||
983 | static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd, | |
49243207 PA |
984 | char __user *optval, |
985 | int __user *optlen) | |
06f15cee FW |
986 | { |
987 | int len, copylen; | |
988 | ||
989 | if (get_user(len, optlen)) | |
990 | return -EFAULT; | |
991 | ||
992 | /* if mptcp_subflow_data size is changed, need to adjust | |
993 | * this function to deal with programs using old version. | |
994 | */ | |
995 | BUILD_BUG_ON(sizeof(*sfd) != MIN_INFO_OPTLEN_SIZE); | |
996 | ||
997 | if (len < MIN_INFO_OPTLEN_SIZE) | |
998 | return -EINVAL; | |
999 | ||
1000 | memset(sfd, 0, sizeof(*sfd)); | |
1001 | ||
1002 | copylen = min_t(unsigned int, len, sizeof(*sfd)); | |
1003 | if (copy_from_user(sfd, optval, copylen)) | |
1004 | return -EFAULT; | |
1005 | ||
1006 | /* size_subflow_data is u32, but len is signed */ | |
1007 | if (sfd->size_subflow_data > INT_MAX || | |
1008 | sfd->size_user > INT_MAX) | |
1009 | return -EINVAL; | |
1010 | ||
1011 | if (sfd->size_subflow_data < MIN_INFO_OPTLEN_SIZE || | |
1012 | sfd->size_subflow_data > len) | |
1013 | return -EINVAL; | |
1014 | ||
1015 | if (sfd->num_subflows || sfd->size_kernel) | |
1016 | return -EINVAL; | |
1017 | ||
1018 | return len - sfd->size_subflow_data; | |
1019 | } | |
1020 | ||
1021 | static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval, | |
1022 | int __user *optlen) | |
1023 | { | |
1024 | struct mptcp_subflow_context *subflow; | |
80638684 | 1025 | struct sock *sk = (struct sock *)msk; |
06f15cee FW |
1026 | unsigned int sfcount = 0, copied = 0; |
1027 | struct mptcp_subflow_data sfd; | |
1028 | char __user *infoptr; | |
1029 | int len; | |
1030 | ||
1031 | len = mptcp_get_subflow_data(&sfd, optval, optlen); | |
1032 | if (len < 0) | |
1033 | return len; | |
1034 | ||
1035 | sfd.size_kernel = sizeof(struct tcp_info); | |
1036 | sfd.size_user = min_t(unsigned int, sfd.size_user, | |
1037 | sizeof(struct tcp_info)); | |
1038 | ||
1039 | infoptr = optval + sfd.size_subflow_data; | |
1040 | ||
1041 | lock_sock(sk); | |
1042 | ||
1043 | mptcp_for_each_subflow(msk, subflow) { | |
1044 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
1045 | ||
1046 | ++sfcount; | |
1047 | ||
1048 | if (len && len >= sfd.size_user) { | |
1049 | struct tcp_info info; | |
1050 | ||
1051 | tcp_get_info(ssk, &info); | |
1052 | ||
1053 | if (copy_to_user(infoptr, &info, sfd.size_user)) { | |
1054 | release_sock(sk); | |
1055 | return -EFAULT; | |
1056 | } | |
1057 | ||
1058 | infoptr += sfd.size_user; | |
1059 | copied += sfd.size_user; | |
1060 | len -= sfd.size_user; | |
1061 | } | |
1062 | } | |
1063 | ||
1064 | release_sock(sk); | |
1065 | ||
1066 | sfd.num_subflows = sfcount; | |
1067 | ||
1068 | if (mptcp_put_subflow_data(&sfd, optval, copied, optlen)) | |
1069 | return -EFAULT; | |
1070 | ||
1071 | return 0; | |
1072 | } | |
1073 | ||
c11c5906 FW |
1074 | static void mptcp_get_sub_addrs(const struct sock *sk, struct mptcp_subflow_addrs *a) |
1075 | { | |
abc17a11 | 1076 | const struct inet_sock *inet = inet_sk(sk); |
c11c5906 FW |
1077 | |
1078 | memset(a, 0, sizeof(*a)); | |
1079 | ||
1080 | if (sk->sk_family == AF_INET) { | |
1081 | a->sin_local.sin_family = AF_INET; | |
1082 | a->sin_local.sin_port = inet->inet_sport; | |
1083 | a->sin_local.sin_addr.s_addr = inet->inet_rcv_saddr; | |
1084 | ||
1085 | if (!a->sin_local.sin_addr.s_addr) | |
1086 | a->sin_local.sin_addr.s_addr = inet->inet_saddr; | |
1087 | ||
1088 | a->sin_remote.sin_family = AF_INET; | |
1089 | a->sin_remote.sin_port = inet->inet_dport; | |
1090 | a->sin_remote.sin_addr.s_addr = inet->inet_daddr; | |
1091 | #if IS_ENABLED(CONFIG_IPV6) | |
1092 | } else if (sk->sk_family == AF_INET6) { | |
1093 | const struct ipv6_pinfo *np = inet6_sk(sk); | |
1094 | ||
29211e7d TG |
1095 | if (WARN_ON_ONCE(!np)) |
1096 | return; | |
1097 | ||
c11c5906 FW |
1098 | a->sin6_local.sin6_family = AF_INET6; |
1099 | a->sin6_local.sin6_port = inet->inet_sport; | |
1100 | ||
1101 | if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) | |
1102 | a->sin6_local.sin6_addr = np->saddr; | |
1103 | else | |
1104 | a->sin6_local.sin6_addr = sk->sk_v6_rcv_saddr; | |
1105 | ||
1106 | a->sin6_remote.sin6_family = AF_INET6; | |
1107 | a->sin6_remote.sin6_port = inet->inet_dport; | |
1108 | a->sin6_remote.sin6_addr = sk->sk_v6_daddr; | |
1109 | #endif | |
1110 | } | |
1111 | } | |
1112 | ||
1113 | static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *optval, | |
1114 | int __user *optlen) | |
1115 | { | |
c11c5906 | 1116 | struct mptcp_subflow_context *subflow; |
80638684 | 1117 | struct sock *sk = (struct sock *)msk; |
c11c5906 FW |
1118 | unsigned int sfcount = 0, copied = 0; |
1119 | struct mptcp_subflow_data sfd; | |
1120 | char __user *addrptr; | |
1121 | int len; | |
1122 | ||
1123 | len = mptcp_get_subflow_data(&sfd, optval, optlen); | |
1124 | if (len < 0) | |
1125 | return len; | |
1126 | ||
1127 | sfd.size_kernel = sizeof(struct mptcp_subflow_addrs); | |
1128 | sfd.size_user = min_t(unsigned int, sfd.size_user, | |
1129 | sizeof(struct mptcp_subflow_addrs)); | |
1130 | ||
1131 | addrptr = optval + sfd.size_subflow_data; | |
1132 | ||
1133 | lock_sock(sk); | |
1134 | ||
1135 | mptcp_for_each_subflow(msk, subflow) { | |
1136 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
1137 | ||
1138 | ++sfcount; | |
1139 | ||
1140 | if (len && len >= sfd.size_user) { | |
1141 | struct mptcp_subflow_addrs a; | |
1142 | ||
1143 | mptcp_get_sub_addrs(ssk, &a); | |
1144 | ||
1145 | if (copy_to_user(addrptr, &a, sfd.size_user)) { | |
1146 | release_sock(sk); | |
1147 | return -EFAULT; | |
1148 | } | |
1149 | ||
1150 | addrptr += sfd.size_user; | |
1151 | copied += sfd.size_user; | |
1152 | len -= sfd.size_user; | |
1153 | } | |
1154 | } | |
1155 | ||
1156 | release_sock(sk); | |
1157 | ||
1158 | sfd.num_subflows = sfcount; | |
1159 | ||
1160 | if (mptcp_put_subflow_data(&sfd, optval, copied, optlen)) | |
1161 | return -EFAULT; | |
1162 | ||
1163 | return 0; | |
1164 | } | |
1165 | ||
49243207 PA |
1166 | static int mptcp_get_full_info(struct mptcp_full_info *mfi, |
1167 | char __user *optval, | |
1168 | int __user *optlen) | |
1169 | { | |
1170 | int len; | |
1171 | ||
1172 | BUILD_BUG_ON(offsetof(struct mptcp_full_info, mptcp_info) != | |
1173 | MIN_FULL_INFO_OPTLEN_SIZE); | |
1174 | ||
1175 | if (get_user(len, optlen)) | |
1176 | return -EFAULT; | |
1177 | ||
1178 | if (len < MIN_FULL_INFO_OPTLEN_SIZE) | |
1179 | return -EINVAL; | |
1180 | ||
1181 | memset(mfi, 0, sizeof(*mfi)); | |
1182 | if (copy_from_user(mfi, optval, MIN_FULL_INFO_OPTLEN_SIZE)) | |
1183 | return -EFAULT; | |
1184 | ||
1185 | if (mfi->size_tcpinfo_kernel || | |
1186 | mfi->size_sfinfo_kernel || | |
1187 | mfi->num_subflows) | |
1188 | return -EINVAL; | |
1189 | ||
1190 | if (mfi->size_sfinfo_user > INT_MAX || | |
1191 | mfi->size_tcpinfo_user > INT_MAX) | |
1192 | return -EINVAL; | |
1193 | ||
1194 | return len - MIN_FULL_INFO_OPTLEN_SIZE; | |
1195 | } | |
1196 | ||
1197 | static int mptcp_put_full_info(struct mptcp_full_info *mfi, | |
1198 | char __user *optval, | |
1199 | u32 copylen, | |
1200 | int __user *optlen) | |
1201 | { | |
1202 | copylen += MIN_FULL_INFO_OPTLEN_SIZE; | |
1203 | if (put_user(copylen, optlen)) | |
1204 | return -EFAULT; | |
1205 | ||
1206 | if (copy_to_user(optval, mfi, copylen)) | |
1207 | return -EFAULT; | |
1208 | return 0; | |
1209 | } | |
1210 | ||
1211 | static int mptcp_getsockopt_full_info(struct mptcp_sock *msk, char __user *optval, | |
1212 | int __user *optlen) | |
1213 | { | |
1214 | unsigned int sfcount = 0, copylen = 0; | |
1215 | struct mptcp_subflow_context *subflow; | |
1216 | struct sock *sk = (struct sock *)msk; | |
1217 | void __user *tcpinfoptr, *sfinfoptr; | |
1218 | struct mptcp_full_info mfi; | |
1219 | int len; | |
1220 | ||
1221 | len = mptcp_get_full_info(&mfi, optval, optlen); | |
1222 | if (len < 0) | |
1223 | return len; | |
1224 | ||
1225 | /* don't bother filling the mptcp info if there is not enough | |
1226 | * user-space-provided storage | |
1227 | */ | |
1228 | if (len > 0) { | |
1229 | mptcp_diag_fill_info(msk, &mfi.mptcp_info); | |
1230 | copylen += min_t(unsigned int, len, sizeof(struct mptcp_info)); | |
1231 | } | |
1232 | ||
1233 | mfi.size_tcpinfo_kernel = sizeof(struct tcp_info); | |
1234 | mfi.size_tcpinfo_user = min_t(unsigned int, mfi.size_tcpinfo_user, | |
1235 | sizeof(struct tcp_info)); | |
1236 | sfinfoptr = u64_to_user_ptr(mfi.subflow_info); | |
1237 | mfi.size_sfinfo_kernel = sizeof(struct mptcp_subflow_info); | |
1238 | mfi.size_sfinfo_user = min_t(unsigned int, mfi.size_sfinfo_user, | |
1239 | sizeof(struct mptcp_subflow_info)); | |
1240 | tcpinfoptr = u64_to_user_ptr(mfi.tcp_info); | |
1241 | ||
1242 | lock_sock(sk); | |
1243 | mptcp_for_each_subflow(msk, subflow) { | |
1244 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
1245 | struct mptcp_subflow_info sfinfo; | |
1246 | struct tcp_info tcp_info; | |
1247 | ||
1248 | if (sfcount++ >= mfi.size_arrays_user) | |
1249 | continue; | |
1250 | ||
1251 | /* fetch addr/tcp_info only if the user space buffers | |
1252 | * are wide enough | |
1253 | */ | |
1254 | memset(&sfinfo, 0, sizeof(sfinfo)); | |
1255 | sfinfo.id = subflow->subflow_id; | |
1256 | if (mfi.size_sfinfo_user > | |
1257 | offsetof(struct mptcp_subflow_info, addrs)) | |
1258 | mptcp_get_sub_addrs(ssk, &sfinfo.addrs); | |
1259 | if (copy_to_user(sfinfoptr, &sfinfo, mfi.size_sfinfo_user)) | |
1260 | goto fail_release; | |
1261 | ||
1262 | if (mfi.size_tcpinfo_user) { | |
1263 | tcp_get_info(ssk, &tcp_info); | |
1264 | if (copy_to_user(tcpinfoptr, &tcp_info, | |
1265 | mfi.size_tcpinfo_user)) | |
1266 | goto fail_release; | |
1267 | } | |
1268 | ||
1269 | tcpinfoptr += mfi.size_tcpinfo_user; | |
1270 | sfinfoptr += mfi.size_sfinfo_user; | |
1271 | } | |
1272 | release_sock(sk); | |
1273 | ||
1274 | mfi.num_subflows = sfcount; | |
1275 | if (mptcp_put_full_info(&mfi, optval, copylen, optlen)) | |
1276 | return -EFAULT; | |
1277 | ||
1278 | return 0; | |
1279 | ||
1280 | fail_release: | |
1281 | release_sock(sk); | |
1282 | return -EFAULT; | |
1283 | } | |
1284 | ||
2c9e7765 FW |
1285 | static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval, |
1286 | int __user *optlen, int val) | |
1287 | { | |
1288 | int len; | |
1289 | ||
1290 | if (get_user(len, optlen)) | |
1291 | return -EFAULT; | |
2c9e7765 FW |
1292 | if (len < 0) |
1293 | return -EINVAL; | |
1294 | ||
3b1e21eb FW |
1295 | if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) { |
1296 | unsigned char ucval = (unsigned char)val; | |
1297 | ||
1298 | len = 1; | |
1299 | if (put_user(len, optlen)) | |
1300 | return -EFAULT; | |
1301 | if (copy_to_user(optval, &ucval, 1)) | |
1302 | return -EFAULT; | |
1303 | } else { | |
1304 | len = min_t(unsigned int, len, sizeof(int)); | |
1305 | if (put_user(len, optlen)) | |
1306 | return -EFAULT; | |
1307 | if (copy_to_user(optval, &val, len)) | |
1308 | return -EFAULT; | |
1309 | } | |
2c9e7765 FW |
1310 | |
1311 | return 0; | |
1312 | } | |
1313 | ||
aa1fbd94 FW |
1314 | static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, |
1315 | char __user *optval, int __user *optlen) | |
1316 | { | |
1317 | switch (optname) { | |
1318 | case TCP_ULP: | |
1319 | case TCP_CONGESTION: | |
1320 | case TCP_INFO: | |
1321 | case TCP_CC_INFO: | |
ea1e301d | 1322 | case TCP_DEFER_ACCEPT: |
4ffb0a02 | 1323 | case TCP_FASTOPEN: |
54635bd0 | 1324 | case TCP_FASTOPEN_CONNECT: |
cb99816c | 1325 | case TCP_FASTOPEN_KEY: |
e64d4deb | 1326 | case TCP_FASTOPEN_NO_COOKIE: |
aa1fbd94 FW |
1327 | return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname, |
1328 | optval, optlen); | |
2c9e7765 FW |
1329 | case TCP_INQ: |
1330 | return mptcp_put_int_option(msk, optval, optlen, msk->recvmsg_inq); | |
4f6e14bd MG |
1331 | case TCP_CORK: |
1332 | return mptcp_put_int_option(msk, optval, optlen, msk->cork); | |
1333 | case TCP_NODELAY: | |
1334 | return mptcp_put_int_option(msk, optval, optlen, msk->nodelay); | |
aa1fbd94 | 1335 | } |
0abdde82 PA |
1336 | return -EOPNOTSUPP; |
1337 | } | |
1338 | ||
3b1e21eb FW |
1339 | static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname, |
1340 | char __user *optval, int __user *optlen) | |
1341 | { | |
1342 | struct sock *sk = (void *)msk; | |
1343 | ||
1344 | switch (optname) { | |
1345 | case IP_TOS: | |
e08d0b3d | 1346 | return mptcp_put_int_option(msk, optval, optlen, READ_ONCE(inet_sk(sk)->tos)); |
3b1e21eb FW |
1347 | } |
1348 | ||
1349 | return -EOPNOTSUPP; | |
1350 | } | |
1351 | ||
55c42fa7 FW |
1352 | static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname, |
1353 | char __user *optval, int __user *optlen) | |
1354 | { | |
1355 | switch (optname) { | |
1356 | case MPTCP_INFO: | |
1357 | return mptcp_getsockopt_info(msk, optval, optlen); | |
49243207 PA |
1358 | case MPTCP_FULL_INFO: |
1359 | return mptcp_getsockopt_full_info(msk, optval, optlen); | |
06f15cee FW |
1360 | case MPTCP_TCPINFO: |
1361 | return mptcp_getsockopt_tcpinfo(msk, optval, optlen); | |
c11c5906 FW |
1362 | case MPTCP_SUBFLOW_ADDRS: |
1363 | return mptcp_getsockopt_subflow_addrs(msk, optval, optlen); | |
55c42fa7 FW |
1364 | } |
1365 | ||
1366 | return -EOPNOTSUPP; | |
1367 | } | |
1368 | ||
0abdde82 PA |
1369 | int mptcp_getsockopt(struct sock *sk, int level, int optname, |
1370 | char __user *optval, int __user *option) | |
1371 | { | |
1372 | struct mptcp_sock *msk = mptcp_sk(sk); | |
1373 | struct sock *ssk; | |
1374 | ||
1375 | pr_debug("msk=%p", msk); | |
1376 | ||
1377 | /* @@ the meaning of setsockopt() when the socket is connected and | |
1378 | * there are multiple subflows is not yet defined. It is up to the | |
1379 | * MPTCP-level socket to configure the subflows until the subflow | |
1380 | * is in TCP fallback, when socket options are passed through | |
1381 | * to the one remaining subflow. | |
1382 | */ | |
1383 | lock_sock(sk); | |
1384 | ssk = __mptcp_tcp_fallback(msk); | |
1385 | release_sock(sk); | |
1386 | if (ssk) | |
1387 | return tcp_getsockopt(ssk, level, optname, optval, option); | |
1388 | ||
3b1e21eb FW |
1389 | if (level == SOL_IP) |
1390 | return mptcp_getsockopt_v4(msk, optname, optval, option); | |
aa1fbd94 FW |
1391 | if (level == SOL_TCP) |
1392 | return mptcp_getsockopt_sol_tcp(msk, optname, optval, option); | |
55c42fa7 FW |
1393 | if (level == SOL_MPTCP) |
1394 | return mptcp_getsockopt_sol_mptcp(msk, optname, optval, option); | |
0abdde82 PA |
1395 | return -EOPNOTSUPP; |
1396 | } | |
1397 | ||
1b3e7ede FW |
1398 | static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) |
1399 | { | |
5d0a6bc8 | 1400 | static const unsigned int tx_rx_locks = SOCK_RCVBUF_LOCK | SOCK_SNDBUF_LOCK; |
1b3e7ede FW |
1401 | struct sock *sk = (struct sock *)msk; |
1402 | ||
1403 | if (ssk->sk_prot->keepalive) { | |
1404 | if (sock_flag(sk, SOCK_KEEPOPEN)) | |
1405 | ssk->sk_prot->keepalive(ssk, 1); | |
1406 | else | |
1407 | ssk->sk_prot->keepalive(ssk, 0); | |
1408 | } | |
1409 | ||
1410 | ssk->sk_priority = sk->sk_priority; | |
5d0a6bc8 FW |
1411 | ssk->sk_bound_dev_if = sk->sk_bound_dev_if; |
1412 | ssk->sk_incoming_cpu = sk->sk_incoming_cpu; | |
7e9740e0 | 1413 | ssk->sk_ipv6only = sk->sk_ipv6only; |
878d951c | 1414 | __ip_sock_set_tos(ssk, inet_sk(sk)->tos); |
5d0a6bc8 FW |
1415 | |
1416 | if (sk->sk_userlocks & tx_rx_locks) { | |
1417 | ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks; | |
1418 | if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) | |
1419 | WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); | |
1420 | if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) | |
1421 | WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); | |
1422 | } | |
1423 | ||
1424 | if (sock_flag(sk, SOCK_LINGER)) { | |
1425 | ssk->sk_lingertime = sk->sk_lingertime; | |
1426 | sock_set_flag(ssk, SOCK_LINGER); | |
1427 | } else { | |
1428 | sock_reset_flag(ssk, SOCK_LINGER); | |
1429 | } | |
1430 | ||
1431 | if (sk->sk_mark != ssk->sk_mark) { | |
1432 | ssk->sk_mark = sk->sk_mark; | |
1433 | sk_dst_reset(ssk); | |
1434 | } | |
1435 | ||
1436 | sock_valbool_flag(ssk, SOCK_DBG, sock_flag(sk, SOCK_DBG)); | |
1437 | ||
1438 | if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops) | |
20b5759f | 1439 | tcp_set_congestion_control(ssk, msk->ca_name, false, true); |
4f6e14bd MG |
1440 | __tcp_sock_set_cork(ssk, !!msk->cork); |
1441 | __tcp_sock_set_nodelay(ssk, !!msk->nodelay); | |
c9406a23 | 1442 | |
4bd0623f | 1443 | inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk)); |
3f7e7532 | 1444 | inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk)); |
1b3e7ede FW |
1445 | } |
1446 | ||
3e501490 | 1447 | void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk) |
78962489 | 1448 | { |
3e501490 | 1449 | struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); |
78962489 | 1450 | |
3e501490 | 1451 | msk_owned_by_me(msk); |
78962489 | 1452 | |
5684ab1a PA |
1453 | ssk->sk_rcvlowat = 0; |
1454 | ||
3e501490 PA |
1455 | if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) { |
1456 | sync_socket_options(msk, ssk); | |
78962489 | 1457 | |
3e501490 | 1458 | subflow->setsockopt_seq = msk->setsockopt_seq; |
78962489 FW |
1459 | } |
1460 | } | |
5684ab1a PA |
1461 | |
1462 | /* unfortunately this is different enough from the tcp version so | |
1463 | * that we can't factor it out | |
1464 | */ | |
1465 | int mptcp_set_rcvlowat(struct sock *sk, int val) | |
1466 | { | |
1467 | struct mptcp_subflow_context *subflow; | |
1468 | int space, cap; | |
1469 | ||
1470 | if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) | |
1471 | cap = sk->sk_rcvbuf >> 1; | |
1472 | else | |
1473 | cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1; | |
1474 | val = min(val, cap); | |
1475 | WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); | |
1476 | ||
1477 | /* Check if we need to signal EPOLLIN right now */ | |
1478 | if (mptcp_epollin_ready(sk)) | |
1479 | sk->sk_data_ready(sk); | |
1480 | ||
1481 | if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) | |
1482 | return 0; | |
1483 | ||
1484 | space = __tcp_space_from_win(mptcp_sk(sk)->scaling_ratio, val); | |
1485 | if (space <= sk->sk_rcvbuf) | |
1486 | return 0; | |
1487 | ||
1488 | /* propagate the rcvbuf changes to all the subflows */ | |
1489 | WRITE_ONCE(sk->sk_rcvbuf, space); | |
1490 | mptcp_for_each_subflow(mptcp_sk(sk), subflow) { | |
1491 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
1492 | bool slow; | |
1493 | ||
1494 | slow = lock_sock_fast(ssk); | |
1495 | WRITE_ONCE(ssk->sk_rcvbuf, space); | |
1496 | tcp_sk(ssk)->window_clamp = val; | |
1497 | unlock_sock_fast(ssk, slow); | |
1498 | } | |
1499 | return 0; | |
1500 | } |