Commit | Line | Data |
---|---|---|
0abdde82 PA |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Multipath TCP | |
3 | * | |
4 | * Copyright (c) 2021, Red Hat. | |
5 | */ | |
6 | ||
7 | #define pr_fmt(fmt) "MPTCP: " fmt | |
8 | ||
9 | #include <linux/kernel.h> | |
10 | #include <linux/module.h> | |
11 | #include <net/sock.h> | |
12 | #include <net/protocol.h> | |
13 | #include <net/tcp.h> | |
14 | #include <net/mptcp.h> | |
15 | #include "protocol.h" | |
16 | ||
06f15cee FW |
17 | #define MIN_INFO_OPTLEN_SIZE 16 |
18 | ||
0abdde82 PA |
19 | static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk) |
20 | { | |
21 | sock_owned_by_me((const struct sock *)msk); | |
22 | ||
23 | if (likely(!__mptcp_check_fallback(msk))) | |
24 | return NULL; | |
25 | ||
26 | return msk->first; | |
27 | } | |
28 | ||
df00b087 FW |
29 | static u32 sockopt_seq_reset(const struct sock *sk) |
30 | { | |
31 | sock_owned_by_me(sk); | |
32 | ||
33 | /* Highbits contain state. Allows to distinguish sockopt_seq | |
34 | * of listener and established: | |
35 | * s0 = new_listener() | |
36 | * sockopt(s0) - seq is 1 | |
37 | * s1 = accept(s0) - s1 inherits seq 1 if listener sk (s0) | |
38 | * sockopt(s0) - seq increments to 2 on s0 | |
39 | * sockopt(s1) // seq increments to 2 on s1 (different option) | |
40 | * new ssk completes join, inherits options from s0 // seq 2 | |
41 | * Needs sync from mptcp join logic, but ssk->seq == msk->seq | |
42 | * | |
43 | * Set High order bits to sk_state so ssk->seq == msk->seq test | |
44 | * will fail. | |
45 | */ | |
46 | ||
47 | return (u32)sk->sk_state << 24u; | |
48 | } | |
49 | ||
1b3e7ede FW |
50 | static void sockopt_seq_inc(struct mptcp_sock *msk) |
51 | { | |
52 | u32 seq = (msk->setsockopt_seq + 1) & 0x00ffffff; | |
53 | ||
54 | msk->setsockopt_seq = sockopt_seq_reset((struct sock *)msk) + seq; | |
55 | } | |
56 | ||
57 | static int mptcp_get_int_option(struct mptcp_sock *msk, sockptr_t optval, | |
58 | unsigned int optlen, int *val) | |
59 | { | |
60 | if (optlen < sizeof(int)) | |
61 | return -EINVAL; | |
62 | ||
63 | if (copy_from_sockptr(val, optval, sizeof(*val))) | |
64 | return -EFAULT; | |
65 | ||
66 | return 0; | |
67 | } | |
68 | ||
69 | static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, int val) | |
70 | { | |
71 | struct mptcp_subflow_context *subflow; | |
72 | struct sock *sk = (struct sock *)msk; | |
73 | ||
74 | lock_sock(sk); | |
75 | sockopt_seq_inc(msk); | |
76 | ||
77 | mptcp_for_each_subflow(msk, subflow) { | |
78 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
79 | bool slow = lock_sock_fast(ssk); | |
80 | ||
81 | switch (optname) { | |
a03c99b2 FW |
82 | case SO_DEBUG: |
83 | sock_valbool_flag(ssk, SOCK_DBG, !!val); | |
84 | break; | |
1b3e7ede FW |
85 | case SO_KEEPALIVE: |
86 | if (ssk->sk_prot->keepalive) | |
87 | ssk->sk_prot->keepalive(ssk, !!val); | |
88 | sock_valbool_flag(ssk, SOCK_KEEPOPEN, !!val); | |
89 | break; | |
90 | case SO_PRIORITY: | |
91 | ssk->sk_priority = val; | |
92 | break; | |
5d0a6bc8 FW |
93 | case SO_SNDBUF: |
94 | case SO_SNDBUFFORCE: | |
95 | ssk->sk_userlocks |= SOCK_SNDBUF_LOCK; | |
96 | WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); | |
97 | break; | |
98 | case SO_RCVBUF: | |
99 | case SO_RCVBUFFORCE: | |
100 | ssk->sk_userlocks |= SOCK_RCVBUF_LOCK; | |
101 | WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); | |
102 | break; | |
36704413 FW |
103 | case SO_MARK: |
104 | if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) { | |
105 | ssk->sk_mark = sk->sk_mark; | |
106 | sk_dst_reset(ssk); | |
107 | } | |
108 | break; | |
6f0d7198 FW |
109 | case SO_INCOMING_CPU: |
110 | WRITE_ONCE(ssk->sk_incoming_cpu, val); | |
111 | break; | |
1b3e7ede FW |
112 | } |
113 | ||
114 | subflow->setsockopt_seq = msk->setsockopt_seq; | |
115 | unlock_sock_fast(ssk, slow); | |
116 | } | |
117 | ||
118 | release_sock(sk); | |
119 | } | |
120 | ||
121 | static int mptcp_sol_socket_intval(struct mptcp_sock *msk, int optname, int val) | |
122 | { | |
123 | sockptr_t optval = KERNEL_SOCKPTR(&val); | |
124 | struct sock *sk = (struct sock *)msk; | |
125 | int ret; | |
126 | ||
127 | ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, | |
128 | optval, sizeof(val)); | |
129 | if (ret) | |
130 | return ret; | |
131 | ||
132 | mptcp_sol_socket_sync_intval(msk, optname, val); | |
133 | return 0; | |
134 | } | |
135 | ||
6f0d7198 FW |
136 | static void mptcp_so_incoming_cpu(struct mptcp_sock *msk, int val) |
137 | { | |
138 | struct sock *sk = (struct sock *)msk; | |
139 | ||
140 | WRITE_ONCE(sk->sk_incoming_cpu, val); | |
141 | ||
142 | mptcp_sol_socket_sync_intval(msk, SO_INCOMING_CPU, val); | |
143 | } | |
144 | ||
9061f24b FW |
145 | static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optname, int val) |
146 | { | |
147 | sockptr_t optval = KERNEL_SOCKPTR(&val); | |
148 | struct mptcp_subflow_context *subflow; | |
149 | struct sock *sk = (struct sock *)msk; | |
150 | int ret; | |
151 | ||
152 | ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, | |
153 | optval, sizeof(val)); | |
154 | if (ret) | |
155 | return ret; | |
156 | ||
157 | lock_sock(sk); | |
158 | mptcp_for_each_subflow(msk, subflow) { | |
159 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
160 | bool slow = lock_sock_fast(ssk); | |
161 | ||
6c9a0a0f | 162 | sock_set_timestamp(sk, optname, !!val); |
9061f24b FW |
163 | unlock_sock_fast(ssk, slow); |
164 | } | |
165 | ||
166 | release_sock(sk); | |
167 | return 0; | |
168 | } | |
169 | ||
1b3e7ede | 170 | static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname, |
6c9a0a0f YL |
171 | sockptr_t optval, |
172 | unsigned int optlen) | |
1b3e7ede FW |
173 | { |
174 | int val, ret; | |
175 | ||
176 | ret = mptcp_get_int_option(msk, optval, optlen, &val); | |
177 | if (ret) | |
178 | return ret; | |
179 | ||
180 | switch (optname) { | |
181 | case SO_KEEPALIVE: | |
182 | mptcp_sol_socket_sync_intval(msk, optname, val); | |
183 | return 0; | |
a03c99b2 | 184 | case SO_DEBUG: |
36704413 | 185 | case SO_MARK: |
1b3e7ede | 186 | case SO_PRIORITY: |
5d0a6bc8 FW |
187 | case SO_SNDBUF: |
188 | case SO_SNDBUFFORCE: | |
189 | case SO_RCVBUF: | |
190 | case SO_RCVBUFFORCE: | |
1b3e7ede | 191 | return mptcp_sol_socket_intval(msk, optname, val); |
6f0d7198 FW |
192 | case SO_INCOMING_CPU: |
193 | mptcp_so_incoming_cpu(msk, val); | |
194 | return 0; | |
9061f24b FW |
195 | case SO_TIMESTAMP_OLD: |
196 | case SO_TIMESTAMP_NEW: | |
197 | case SO_TIMESTAMPNS_OLD: | |
198 | case SO_TIMESTAMPNS_NEW: | |
9061f24b | 199 | return mptcp_setsockopt_sol_socket_tstamp(msk, optname, val); |
1b3e7ede FW |
200 | } |
201 | ||
202 | return -ENOPROTOOPT; | |
203 | } | |
204 | ||
6c9a0a0f YL |
205 | static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk, |
206 | int optname, | |
207 | sockptr_t optval, | |
208 | unsigned int optlen) | |
209 | { | |
210 | struct mptcp_subflow_context *subflow; | |
211 | struct sock *sk = (struct sock *)msk; | |
d463126e YL |
212 | struct so_timestamping timestamping; |
213 | int ret; | |
6c9a0a0f | 214 | |
d463126e YL |
215 | if (optlen == sizeof(timestamping)) { |
216 | if (copy_from_sockptr(×tamping, optval, | |
217 | sizeof(timestamping))) | |
218 | return -EFAULT; | |
219 | } else if (optlen == sizeof(int)) { | |
220 | memset(×tamping, 0, sizeof(timestamping)); | |
221 | ||
222 | if (copy_from_sockptr(×tamping.flags, optval, sizeof(int))) | |
223 | return -EFAULT; | |
224 | } else { | |
225 | return -EINVAL; | |
226 | } | |
6c9a0a0f YL |
227 | |
228 | ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, | |
d463126e YL |
229 | KERNEL_SOCKPTR(×tamping), |
230 | sizeof(timestamping)); | |
6c9a0a0f YL |
231 | if (ret) |
232 | return ret; | |
233 | ||
234 | lock_sock(sk); | |
235 | ||
236 | mptcp_for_each_subflow(msk, subflow) { | |
237 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
238 | bool slow = lock_sock_fast(ssk); | |
239 | ||
d463126e | 240 | sock_set_timestamping(sk, optname, timestamping); |
6c9a0a0f YL |
241 | unlock_sock_fast(ssk, slow); |
242 | } | |
243 | ||
244 | release_sock(sk); | |
245 | ||
246 | return 0; | |
247 | } | |
248 | ||
268b1238 FW |
249 | static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t optval, |
250 | unsigned int optlen) | |
251 | { | |
252 | struct mptcp_subflow_context *subflow; | |
253 | struct sock *sk = (struct sock *)msk; | |
254 | struct linger ling; | |
255 | sockptr_t kopt; | |
256 | int ret; | |
257 | ||
258 | if (optlen < sizeof(ling)) | |
259 | return -EINVAL; | |
260 | ||
261 | if (copy_from_sockptr(&ling, optval, sizeof(ling))) | |
262 | return -EFAULT; | |
263 | ||
264 | kopt = KERNEL_SOCKPTR(&ling); | |
265 | ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, SO_LINGER, kopt, sizeof(ling)); | |
266 | if (ret) | |
267 | return ret; | |
268 | ||
269 | lock_sock(sk); | |
270 | sockopt_seq_inc(msk); | |
271 | mptcp_for_each_subflow(msk, subflow) { | |
272 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
273 | bool slow = lock_sock_fast(ssk); | |
274 | ||
275 | if (!ling.l_onoff) { | |
276 | sock_reset_flag(ssk, SOCK_LINGER); | |
277 | } else { | |
278 | ssk->sk_lingertime = sk->sk_lingertime; | |
279 | sock_set_flag(ssk, SOCK_LINGER); | |
280 | } | |
281 | ||
282 | subflow->setsockopt_seq = msk->setsockopt_seq; | |
283 | unlock_sock_fast(ssk, slow); | |
284 | } | |
285 | ||
286 | release_sock(sk); | |
287 | return 0; | |
288 | } | |
289 | ||
0abdde82 PA |
290 | static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, |
291 | sockptr_t optval, unsigned int optlen) | |
292 | { | |
293 | struct sock *sk = (struct sock *)msk; | |
294 | struct socket *ssock; | |
295 | int ret; | |
296 | ||
297 | switch (optname) { | |
298 | case SO_REUSEPORT: | |
299 | case SO_REUSEADDR: | |
5d0a6bc8 FW |
300 | case SO_BINDTODEVICE: |
301 | case SO_BINDTOIFINDEX: | |
0abdde82 PA |
302 | lock_sock(sk); |
303 | ssock = __mptcp_nmpc_socket(msk); | |
304 | if (!ssock) { | |
305 | release_sock(sk); | |
306 | return -EINVAL; | |
307 | } | |
308 | ||
309 | ret = sock_setsockopt(ssock, SOL_SOCKET, optname, optval, optlen); | |
310 | if (ret == 0) { | |
311 | if (optname == SO_REUSEPORT) | |
312 | sk->sk_reuseport = ssock->sk->sk_reuseport; | |
313 | else if (optname == SO_REUSEADDR) | |
314 | sk->sk_reuse = ssock->sk->sk_reuse; | |
5d0a6bc8 FW |
315 | else if (optname == SO_BINDTODEVICE) |
316 | sk->sk_bound_dev_if = ssock->sk->sk_bound_dev_if; | |
317 | else if (optname == SO_BINDTOIFINDEX) | |
318 | sk->sk_bound_dev_if = ssock->sk->sk_bound_dev_if; | |
0abdde82 PA |
319 | } |
320 | release_sock(sk); | |
321 | return ret; | |
1b3e7ede FW |
322 | case SO_KEEPALIVE: |
323 | case SO_PRIORITY: | |
5d0a6bc8 FW |
324 | case SO_SNDBUF: |
325 | case SO_SNDBUFFORCE: | |
326 | case SO_RCVBUF: | |
327 | case SO_RCVBUFFORCE: | |
36704413 | 328 | case SO_MARK: |
6f0d7198 | 329 | case SO_INCOMING_CPU: |
a03c99b2 | 330 | case SO_DEBUG: |
9061f24b FW |
331 | case SO_TIMESTAMP_OLD: |
332 | case SO_TIMESTAMP_NEW: | |
333 | case SO_TIMESTAMPNS_OLD: | |
334 | case SO_TIMESTAMPNS_NEW: | |
6c9a0a0f YL |
335 | return mptcp_setsockopt_sol_socket_int(msk, optname, optval, |
336 | optlen); | |
9061f24b FW |
337 | case SO_TIMESTAMPING_OLD: |
338 | case SO_TIMESTAMPING_NEW: | |
6c9a0a0f YL |
339 | return mptcp_setsockopt_sol_socket_timestamping(msk, optname, |
340 | optval, optlen); | |
268b1238 FW |
341 | case SO_LINGER: |
342 | return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen); | |
7a009a70 FW |
343 | case SO_RCVLOWAT: |
344 | case SO_RCVTIMEO_OLD: | |
345 | case SO_RCVTIMEO_NEW: | |
d6ab5ea2 GT |
346 | case SO_SNDTIMEO_OLD: |
347 | case SO_SNDTIMEO_NEW: | |
7a009a70 FW |
348 | case SO_BUSY_POLL: |
349 | case SO_PREFER_BUSY_POLL: | |
350 | case SO_BUSY_POLL_BUDGET: | |
351 | /* No need to copy: only relevant for msk */ | |
352 | return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen); | |
a03c99b2 FW |
353 | case SO_NO_CHECK: |
354 | case SO_DONTROUTE: | |
355 | case SO_BROADCAST: | |
356 | case SO_BSDCOMPAT: | |
357 | case SO_PASSCRED: | |
358 | case SO_PASSSEC: | |
359 | case SO_RXQ_OVFL: | |
360 | case SO_WIFI_STATUS: | |
361 | case SO_NOFCS: | |
362 | case SO_SELECT_ERR_QUEUE: | |
363 | return 0; | |
0abdde82 PA |
364 | } |
365 | ||
7a009a70 FW |
366 | /* SO_OOBINLINE is not supported, let's avoid the related mess |
367 | * SO_ATTACH_FILTER, SO_ATTACH_BPF, SO_ATTACH_REUSEPORT_CBPF, | |
368 | * SO_DETACH_REUSEPORT_BPF, SO_DETACH_FILTER, SO_LOCK_FILTER, | |
369 | * we must be careful with subflows | |
370 | * | |
371 | * SO_ATTACH_REUSEPORT_EBPF is not supported, at it checks | |
372 | * explicitly the sk_protocol field | |
373 | * | |
374 | * SO_PEEK_OFF is unsupported, as it is for plain TCP | |
375 | * SO_MAX_PACING_RATE is unsupported, we must be careful with subflows | |
376 | * SO_CNX_ADVICE is currently unsupported, could possibly be relevant, | |
377 | * but likely needs careful design | |
378 | * | |
379 | * SO_ZEROCOPY is currently unsupported, TODO in sndmsg | |
380 | * SO_TXTIME is currently unsupported | |
381 | */ | |
382 | ||
383 | return -EOPNOTSUPP; | |
0abdde82 PA |
384 | } |
385 | ||
386 | static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, | |
387 | sockptr_t optval, unsigned int optlen) | |
388 | { | |
389 | struct sock *sk = (struct sock *)msk; | |
390 | int ret = -EOPNOTSUPP; | |
391 | struct socket *ssock; | |
392 | ||
393 | switch (optname) { | |
394 | case IPV6_V6ONLY: | |
c9406a23 FW |
395 | case IPV6_TRANSPARENT: |
396 | case IPV6_FREEBIND: | |
0abdde82 PA |
397 | lock_sock(sk); |
398 | ssock = __mptcp_nmpc_socket(msk); | |
399 | if (!ssock) { | |
400 | release_sock(sk); | |
401 | return -EINVAL; | |
402 | } | |
403 | ||
404 | ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen); | |
c9406a23 FW |
405 | if (ret != 0) { |
406 | release_sock(sk); | |
407 | return ret; | |
408 | } | |
409 | ||
410 | sockopt_seq_inc(msk); | |
411 | ||
412 | switch (optname) { | |
413 | case IPV6_V6ONLY: | |
0abdde82 | 414 | sk->sk_ipv6only = ssock->sk->sk_ipv6only; |
c9406a23 FW |
415 | break; |
416 | case IPV6_TRANSPARENT: | |
417 | inet_sk(sk)->transparent = inet_sk(ssock->sk)->transparent; | |
418 | break; | |
419 | case IPV6_FREEBIND: | |
420 | inet_sk(sk)->freebind = inet_sk(ssock->sk)->freebind; | |
421 | break; | |
422 | } | |
0abdde82 PA |
423 | |
424 | release_sock(sk); | |
425 | break; | |
426 | } | |
427 | ||
428 | return ret; | |
429 | } | |
430 | ||
d9e4c129 PA |
431 | static bool mptcp_supported_sockopt(int level, int optname) |
432 | { | |
d9e4c129 PA |
433 | if (level == SOL_IP) { |
434 | switch (optname) { | |
435 | /* should work fine */ | |
436 | case IP_FREEBIND: | |
437 | case IP_TRANSPARENT: | |
438 | ||
439 | /* the following are control cmsg related */ | |
440 | case IP_PKTINFO: | |
441 | case IP_RECVTTL: | |
442 | case IP_RECVTOS: | |
443 | case IP_RECVOPTS: | |
444 | case IP_RETOPTS: | |
445 | case IP_PASSSEC: | |
446 | case IP_RECVORIGDSTADDR: | |
447 | case IP_CHECKSUM: | |
448 | case IP_RECVFRAGSIZE: | |
449 | ||
450 | /* common stuff that need some love */ | |
451 | case IP_TOS: | |
452 | case IP_TTL: | |
453 | case IP_BIND_ADDRESS_NO_PORT: | |
454 | case IP_MTU_DISCOVER: | |
455 | case IP_RECVERR: | |
456 | ||
457 | /* possibly less common may deserve some love */ | |
458 | case IP_MINTTL: | |
459 | ||
460 | /* the following is apparently a no-op for plain TCP */ | |
461 | case IP_RECVERR_RFC4884: | |
462 | return true; | |
463 | } | |
464 | ||
465 | /* IP_OPTIONS is not supported, needs subflow care */ | |
466 | /* IP_HDRINCL, IP_NODEFRAG are not supported, RAW specific */ | |
467 | /* IP_MULTICAST_TTL, IP_MULTICAST_LOOP, IP_UNICAST_IF, | |
468 | * IP_ADD_MEMBERSHIP, IP_ADD_SOURCE_MEMBERSHIP, IP_DROP_MEMBERSHIP, | |
469 | * IP_DROP_SOURCE_MEMBERSHIP, IP_BLOCK_SOURCE, IP_UNBLOCK_SOURCE, | |
470 | * MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP MCAST_JOIN_SOURCE_GROUP, | |
471 | * MCAST_LEAVE_SOURCE_GROUP, MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, | |
472 | * MCAST_MSFILTER, IP_MULTICAST_ALL are not supported, better not deal | |
473 | * with mcast stuff | |
474 | */ | |
475 | /* IP_IPSEC_POLICY, IP_XFRM_POLICY are nut supported, unrelated here */ | |
476 | return false; | |
477 | } | |
478 | if (level == SOL_IPV6) { | |
479 | switch (optname) { | |
480 | case IPV6_V6ONLY: | |
481 | ||
482 | /* the following are control cmsg related */ | |
483 | case IPV6_RECVPKTINFO: | |
484 | case IPV6_2292PKTINFO: | |
485 | case IPV6_RECVHOPLIMIT: | |
486 | case IPV6_2292HOPLIMIT: | |
487 | case IPV6_RECVRTHDR: | |
488 | case IPV6_2292RTHDR: | |
489 | case IPV6_RECVHOPOPTS: | |
490 | case IPV6_2292HOPOPTS: | |
491 | case IPV6_RECVDSTOPTS: | |
492 | case IPV6_2292DSTOPTS: | |
493 | case IPV6_RECVTCLASS: | |
494 | case IPV6_FLOWINFO: | |
495 | case IPV6_RECVPATHMTU: | |
496 | case IPV6_RECVORIGDSTADDR: | |
497 | case IPV6_RECVFRAGSIZE: | |
498 | ||
499 | /* the following ones need some love but are quite common */ | |
500 | case IPV6_TCLASS: | |
501 | case IPV6_TRANSPARENT: | |
502 | case IPV6_FREEBIND: | |
503 | case IPV6_PKTINFO: | |
504 | case IPV6_2292PKTOPTIONS: | |
505 | case IPV6_UNICAST_HOPS: | |
506 | case IPV6_MTU_DISCOVER: | |
507 | case IPV6_MTU: | |
508 | case IPV6_RECVERR: | |
509 | case IPV6_FLOWINFO_SEND: | |
510 | case IPV6_FLOWLABEL_MGR: | |
511 | case IPV6_MINHOPCOUNT: | |
512 | case IPV6_DONTFRAG: | |
513 | case IPV6_AUTOFLOWLABEL: | |
514 | ||
515 | /* the following one is a no-op for plain TCP */ | |
516 | case IPV6_RECVERR_RFC4884: | |
517 | return true; | |
518 | } | |
519 | ||
520 | /* IPV6_HOPOPTS, IPV6_RTHDRDSTOPTS, IPV6_RTHDR, IPV6_DSTOPTS are | |
521 | * not supported | |
522 | */ | |
523 | /* IPV6_MULTICAST_HOPS, IPV6_MULTICAST_LOOP, IPV6_UNICAST_IF, | |
524 | * IPV6_MULTICAST_IF, IPV6_ADDRFORM, | |
525 | * IPV6_ADD_MEMBERSHIP, IPV6_DROP_MEMBERSHIP, IPV6_JOIN_ANYCAST, | |
526 | * IPV6_LEAVE_ANYCAST, IPV6_MULTICAST_ALL, MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP, | |
527 | * MCAST_JOIN_SOURCE_GROUP, MCAST_LEAVE_SOURCE_GROUP, | |
528 | * MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, MCAST_MSFILTER | |
529 | * are not supported better not deal with mcast | |
530 | */ | |
531 | /* IPV6_ROUTER_ALERT, IPV6_ROUTER_ALERT_ISOLATE are not supported, since are evil */ | |
532 | ||
533 | /* IPV6_IPSEC_POLICY, IPV6_XFRM_POLICY are not supported */ | |
534 | /* IPV6_ADDR_PREFERENCES is not supported, we must be careful with subflows */ | |
535 | return false; | |
536 | } | |
537 | if (level == SOL_TCP) { | |
538 | switch (optname) { | |
539 | /* the following are no-op or should work just fine */ | |
540 | case TCP_THIN_DUPACK: | |
541 | case TCP_DEFER_ACCEPT: | |
542 | ||
543 | /* the following need some love */ | |
544 | case TCP_MAXSEG: | |
545 | case TCP_NODELAY: | |
546 | case TCP_THIN_LINEAR_TIMEOUTS: | |
547 | case TCP_CONGESTION: | |
d9e4c129 PA |
548 | case TCP_CORK: |
549 | case TCP_KEEPIDLE: | |
550 | case TCP_KEEPINTVL: | |
551 | case TCP_KEEPCNT: | |
552 | case TCP_SYNCNT: | |
553 | case TCP_SAVE_SYN: | |
554 | case TCP_LINGER2: | |
555 | case TCP_WINDOW_CLAMP: | |
556 | case TCP_QUICKACK: | |
557 | case TCP_USER_TIMEOUT: | |
558 | case TCP_TIMESTAMP: | |
559 | case TCP_NOTSENT_LOWAT: | |
560 | case TCP_TX_DELAY: | |
2c9e7765 | 561 | case TCP_INQ: |
54635bd0 | 562 | case TCP_FASTOPEN_CONNECT: |
d9e4c129 PA |
563 | return true; |
564 | } | |
565 | ||
566 | /* TCP_MD5SIG, TCP_MD5SIG_EXT are not supported, MD5 is not compatible with MPTCP */ | |
567 | ||
568 | /* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS, | |
569 | * TCP_REPAIR_WINDOW are not supported, better avoid this mess | |
570 | */ | |
54635bd0 | 571 | /* TCP_FASTOPEN_KEY, TCP_FASTOPEN, TCP_FASTOPEN_NO_COOKIE, |
d9e4c129 PA |
572 | * are not supported fastopen is currently unsupported |
573 | */ | |
d9e4c129 PA |
574 | } |
575 | return false; | |
576 | } | |
577 | ||
aa1fbd94 FW |
578 | static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t optval, |
579 | unsigned int optlen) | |
580 | { | |
581 | struct mptcp_subflow_context *subflow; | |
582 | struct sock *sk = (struct sock *)msk; | |
583 | char name[TCP_CA_NAME_MAX]; | |
584 | bool cap_net_admin; | |
585 | int ret; | |
586 | ||
587 | if (optlen < 1) | |
588 | return -EINVAL; | |
589 | ||
590 | ret = strncpy_from_sockptr(name, optval, | |
591 | min_t(long, TCP_CA_NAME_MAX - 1, optlen)); | |
592 | if (ret < 0) | |
593 | return -EFAULT; | |
594 | ||
595 | name[ret] = 0; | |
596 | ||
597 | cap_net_admin = ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN); | |
598 | ||
599 | ret = 0; | |
600 | lock_sock(sk); | |
601 | sockopt_seq_inc(msk); | |
602 | mptcp_for_each_subflow(msk, subflow) { | |
603 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
604 | int err; | |
605 | ||
606 | lock_sock(ssk); | |
607 | err = tcp_set_congestion_control(ssk, name, true, cap_net_admin); | |
608 | if (err < 0 && ret == 0) | |
609 | ret = err; | |
610 | subflow->setsockopt_seq = msk->setsockopt_seq; | |
611 | release_sock(ssk); | |
612 | } | |
613 | ||
614 | if (ret == 0) | |
20b5759f | 615 | strcpy(msk->ca_name, name); |
aa1fbd94 FW |
616 | |
617 | release_sock(sk); | |
618 | return ret; | |
619 | } | |
620 | ||
4f6e14bd MG |
621 | static int mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, sockptr_t optval, |
622 | unsigned int optlen) | |
623 | { | |
624 | struct mptcp_subflow_context *subflow; | |
625 | struct sock *sk = (struct sock *)msk; | |
626 | int val; | |
627 | ||
628 | if (optlen < sizeof(int)) | |
629 | return -EINVAL; | |
630 | ||
631 | if (copy_from_sockptr(&val, optval, sizeof(val))) | |
632 | return -EFAULT; | |
633 | ||
634 | lock_sock(sk); | |
635 | sockopt_seq_inc(msk); | |
636 | msk->cork = !!val; | |
637 | mptcp_for_each_subflow(msk, subflow) { | |
638 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
639 | ||
640 | lock_sock(ssk); | |
641 | __tcp_sock_set_cork(ssk, !!val); | |
642 | release_sock(ssk); | |
643 | } | |
644 | if (!val) | |
645 | mptcp_check_and_set_pending(sk); | |
646 | release_sock(sk); | |
647 | ||
648 | return 0; | |
649 | } | |
650 | ||
651 | static int mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, sockptr_t optval, | |
652 | unsigned int optlen) | |
653 | { | |
654 | struct mptcp_subflow_context *subflow; | |
655 | struct sock *sk = (struct sock *)msk; | |
656 | int val; | |
657 | ||
658 | if (optlen < sizeof(int)) | |
659 | return -EINVAL; | |
660 | ||
661 | if (copy_from_sockptr(&val, optval, sizeof(val))) | |
662 | return -EFAULT; | |
663 | ||
664 | lock_sock(sk); | |
665 | sockopt_seq_inc(msk); | |
666 | msk->nodelay = !!val; | |
667 | mptcp_for_each_subflow(msk, subflow) { | |
668 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
669 | ||
670 | lock_sock(ssk); | |
671 | __tcp_sock_set_nodelay(ssk, !!val); | |
672 | release_sock(ssk); | |
673 | } | |
674 | if (val) | |
675 | mptcp_check_and_set_pending(sk); | |
676 | release_sock(sk); | |
677 | ||
678 | return 0; | |
679 | } | |
680 | ||
c9406a23 FW |
681 | static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int optname, |
682 | sockptr_t optval, unsigned int optlen) | |
683 | { | |
684 | struct sock *sk = (struct sock *)msk; | |
685 | struct inet_sock *issk; | |
686 | struct socket *ssock; | |
687 | int err; | |
688 | ||
689 | err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); | |
690 | if (err != 0) | |
691 | return err; | |
692 | ||
693 | lock_sock(sk); | |
694 | ||
695 | ssock = __mptcp_nmpc_socket(msk); | |
696 | if (!ssock) { | |
697 | release_sock(sk); | |
698 | return -EINVAL; | |
699 | } | |
700 | ||
701 | issk = inet_sk(ssock->sk); | |
702 | ||
703 | switch (optname) { | |
704 | case IP_FREEBIND: | |
705 | issk->freebind = inet_sk(sk)->freebind; | |
706 | break; | |
707 | case IP_TRANSPARENT: | |
708 | issk->transparent = inet_sk(sk)->transparent; | |
709 | break; | |
710 | default: | |
711 | release_sock(sk); | |
712 | WARN_ON_ONCE(1); | |
713 | return -EOPNOTSUPP; | |
714 | } | |
715 | ||
716 | sockopt_seq_inc(msk); | |
717 | release_sock(sk); | |
718 | return 0; | |
719 | } | |
720 | ||
ffcacff8 PS |
721 | static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname, |
722 | sockptr_t optval, unsigned int optlen) | |
723 | { | |
724 | struct mptcp_subflow_context *subflow; | |
725 | struct sock *sk = (struct sock *)msk; | |
726 | int err, val; | |
727 | ||
728 | err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); | |
729 | ||
730 | if (err != 0) | |
731 | return err; | |
732 | ||
733 | lock_sock(sk); | |
734 | sockopt_seq_inc(msk); | |
735 | val = inet_sk(sk)->tos; | |
736 | mptcp_for_each_subflow(msk, subflow) { | |
737 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
738 | ||
739 | __ip_sock_set_tos(ssk, val); | |
740 | } | |
741 | release_sock(sk); | |
742 | ||
743 | return err; | |
744 | } | |
745 | ||
746 | static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname, | |
747 | sockptr_t optval, unsigned int optlen) | |
748 | { | |
749 | switch (optname) { | |
c9406a23 FW |
750 | case IP_FREEBIND: |
751 | case IP_TRANSPARENT: | |
752 | return mptcp_setsockopt_sol_ip_set_transparent(msk, optname, optval, optlen); | |
ffcacff8 PS |
753 | case IP_TOS: |
754 | return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen); | |
755 | } | |
756 | ||
757 | return -EOPNOTSUPP; | |
758 | } | |
759 | ||
ea1e301d FW |
760 | static int mptcp_setsockopt_sol_tcp_defer(struct mptcp_sock *msk, sockptr_t optval, |
761 | unsigned int optlen) | |
762 | { | |
763 | struct socket *listener; | |
764 | ||
765 | listener = __mptcp_nmpc_socket(msk); | |
766 | if (!listener) | |
767 | return 0; /* TCP_DEFER_ACCEPT does not fail */ | |
768 | ||
769 | return tcp_setsockopt(listener->sk, SOL_TCP, TCP_DEFER_ACCEPT, optval, optlen); | |
770 | } | |
771 | ||
d3d42904 MB |
772 | static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, |
773 | sockptr_t optval, unsigned int optlen) | |
54635bd0 BH |
774 | { |
775 | struct socket *sock; | |
776 | ||
d3d42904 | 777 | /* Limit to first subflow, before the connection establishment */ |
54635bd0 BH |
778 | sock = __mptcp_nmpc_socket(msk); |
779 | if (!sock) | |
780 | return -EINVAL; | |
781 | ||
d3d42904 | 782 | return tcp_setsockopt(sock->sk, level, optname, optval, optlen); |
54635bd0 BH |
783 | } |
784 | ||
aa1fbd94 FW |
785 | static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, |
786 | sockptr_t optval, unsigned int optlen) | |
787 | { | |
2c9e7765 FW |
788 | struct sock *sk = (void *)msk; |
789 | int ret, val; | |
790 | ||
aa1fbd94 | 791 | switch (optname) { |
2c9e7765 FW |
792 | case TCP_INQ: |
793 | ret = mptcp_get_int_option(msk, optval, optlen, &val); | |
794 | if (ret) | |
795 | return ret; | |
796 | if (val < 0 || val > 1) | |
797 | return -EINVAL; | |
798 | ||
799 | lock_sock(sk); | |
800 | msk->recvmsg_inq = !!val; | |
801 | release_sock(sk); | |
802 | return 0; | |
aa1fbd94 FW |
803 | case TCP_ULP: |
804 | return -EOPNOTSUPP; | |
805 | case TCP_CONGESTION: | |
806 | return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen); | |
4f6e14bd MG |
807 | case TCP_CORK: |
808 | return mptcp_setsockopt_sol_tcp_cork(msk, optval, optlen); | |
809 | case TCP_NODELAY: | |
810 | return mptcp_setsockopt_sol_tcp_nodelay(msk, optval, optlen); | |
ea1e301d FW |
811 | case TCP_DEFER_ACCEPT: |
812 | return mptcp_setsockopt_sol_tcp_defer(msk, optval, optlen); | |
54635bd0 | 813 | case TCP_FASTOPEN_CONNECT: |
d3d42904 MB |
814 | return mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, |
815 | optval, optlen); | |
aa1fbd94 FW |
816 | } |
817 | ||
818 | return -EOPNOTSUPP; | |
819 | } | |
820 | ||
0abdde82 PA |
821 | int mptcp_setsockopt(struct sock *sk, int level, int optname, |
822 | sockptr_t optval, unsigned int optlen) | |
823 | { | |
824 | struct mptcp_sock *msk = mptcp_sk(sk); | |
825 | struct sock *ssk; | |
826 | ||
827 | pr_debug("msk=%p", msk); | |
828 | ||
829 | if (level == SOL_SOCKET) | |
830 | return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen); | |
831 | ||
7a009a70 FW |
832 | if (!mptcp_supported_sockopt(level, optname)) |
833 | return -ENOPROTOOPT; | |
834 | ||
0abdde82 PA |
835 | /* @@ the meaning of setsockopt() when the socket is connected and |
836 | * there are multiple subflows is not yet defined. It is up to the | |
837 | * MPTCP-level socket to configure the subflows until the subflow | |
838 | * is in TCP fallback, when TCP socket options are passed through | |
839 | * to the one remaining subflow. | |
840 | */ | |
841 | lock_sock(sk); | |
842 | ssk = __mptcp_tcp_fallback(msk); | |
843 | release_sock(sk); | |
844 | if (ssk) | |
845 | return tcp_setsockopt(ssk, level, optname, optval, optlen); | |
846 | ||
ffcacff8 PS |
847 | if (level == SOL_IP) |
848 | return mptcp_setsockopt_v4(msk, optname, optval, optlen); | |
849 | ||
0abdde82 PA |
850 | if (level == SOL_IPV6) |
851 | return mptcp_setsockopt_v6(msk, optname, optval, optlen); | |
852 | ||
aa1fbd94 FW |
853 | if (level == SOL_TCP) |
854 | return mptcp_setsockopt_sol_tcp(msk, optname, optval, optlen); | |
855 | ||
856 | return -EOPNOTSUPP; | |
857 | } | |
858 | ||
859 | static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, | |
860 | char __user *optval, int __user *optlen) | |
861 | { | |
862 | struct sock *sk = (struct sock *)msk; | |
863 | struct socket *ssock; | |
864 | int ret = -EINVAL; | |
865 | struct sock *ssk; | |
866 | ||
867 | lock_sock(sk); | |
868 | ssk = msk->first; | |
869 | if (ssk) { | |
870 | ret = tcp_getsockopt(ssk, level, optname, optval, optlen); | |
871 | goto out; | |
872 | } | |
873 | ||
874 | ssock = __mptcp_nmpc_socket(msk); | |
875 | if (!ssock) | |
876 | goto out; | |
877 | ||
878 | ret = tcp_getsockopt(ssock->sk, level, optname, optval, optlen); | |
879 | ||
880 | out: | |
881 | release_sock(sk); | |
882 | return ret; | |
883 | } | |
884 | ||
61bc6e82 FW |
885 | void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) |
886 | { | |
61bc6e82 FW |
887 | u32 flags = 0; |
888 | u8 val; | |
889 | ||
55c42fa7 FW |
890 | memset(info, 0, sizeof(*info)); |
891 | ||
61bc6e82 FW |
892 | info->mptcpi_subflows = READ_ONCE(msk->pm.subflows); |
893 | info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled); | |
894 | info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted); | |
895 | info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used); | |
896 | info->mptcpi_subflows_max = mptcp_pm_get_subflows_max(msk); | |
897 | val = mptcp_pm_get_add_addr_signal_max(msk); | |
898 | info->mptcpi_add_addr_signal_max = val; | |
899 | val = mptcp_pm_get_add_addr_accept_max(msk); | |
900 | info->mptcpi_add_addr_accepted_max = val; | |
901 | info->mptcpi_local_addr_max = mptcp_pm_get_local_addr_max(msk); | |
902 | if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags)) | |
903 | flags |= MPTCP_INFO_FLAG_FALLBACK; | |
904 | if (READ_ONCE(msk->can_ack)) | |
905 | flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED; | |
906 | info->mptcpi_flags = flags; | |
907 | info->mptcpi_token = READ_ONCE(msk->token); | |
908 | info->mptcpi_write_seq = READ_ONCE(msk->write_seq); | |
909 | info->mptcpi_snd_una = READ_ONCE(msk->snd_una); | |
910 | info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq); | |
911 | info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled); | |
61bc6e82 FW |
912 | } |
913 | EXPORT_SYMBOL_GPL(mptcp_diag_fill_info); | |
914 | ||
55c42fa7 FW |
915 | static int mptcp_getsockopt_info(struct mptcp_sock *msk, char __user *optval, int __user *optlen) |
916 | { | |
917 | struct mptcp_info m_info; | |
918 | int len; | |
919 | ||
920 | if (get_user(len, optlen)) | |
921 | return -EFAULT; | |
922 | ||
923 | len = min_t(unsigned int, len, sizeof(struct mptcp_info)); | |
924 | ||
925 | mptcp_diag_fill_info(msk, &m_info); | |
926 | ||
927 | if (put_user(len, optlen)) | |
928 | return -EFAULT; | |
929 | ||
930 | if (copy_to_user(optval, &m_info, len)) | |
931 | return -EFAULT; | |
932 | ||
933 | return 0; | |
934 | } | |
935 | ||
06f15cee FW |
936 | static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd, |
937 | char __user *optval, | |
938 | u32 copied, | |
939 | int __user *optlen) | |
940 | { | |
941 | u32 copylen = min_t(u32, sfd->size_subflow_data, sizeof(*sfd)); | |
942 | ||
943 | if (copied) | |
944 | copied += sfd->size_subflow_data; | |
945 | else | |
946 | copied = copylen; | |
947 | ||
948 | if (put_user(copied, optlen)) | |
949 | return -EFAULT; | |
950 | ||
951 | if (copy_to_user(optval, sfd, copylen)) | |
952 | return -EFAULT; | |
953 | ||
954 | return 0; | |
955 | } | |
956 | ||
957 | static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd, | |
958 | char __user *optval, int __user *optlen) | |
959 | { | |
960 | int len, copylen; | |
961 | ||
962 | if (get_user(len, optlen)) | |
963 | return -EFAULT; | |
964 | ||
965 | /* if mptcp_subflow_data size is changed, need to adjust | |
966 | * this function to deal with programs using old version. | |
967 | */ | |
968 | BUILD_BUG_ON(sizeof(*sfd) != MIN_INFO_OPTLEN_SIZE); | |
969 | ||
970 | if (len < MIN_INFO_OPTLEN_SIZE) | |
971 | return -EINVAL; | |
972 | ||
973 | memset(sfd, 0, sizeof(*sfd)); | |
974 | ||
975 | copylen = min_t(unsigned int, len, sizeof(*sfd)); | |
976 | if (copy_from_user(sfd, optval, copylen)) | |
977 | return -EFAULT; | |
978 | ||
979 | /* size_subflow_data is u32, but len is signed */ | |
980 | if (sfd->size_subflow_data > INT_MAX || | |
981 | sfd->size_user > INT_MAX) | |
982 | return -EINVAL; | |
983 | ||
984 | if (sfd->size_subflow_data < MIN_INFO_OPTLEN_SIZE || | |
985 | sfd->size_subflow_data > len) | |
986 | return -EINVAL; | |
987 | ||
988 | if (sfd->num_subflows || sfd->size_kernel) | |
989 | return -EINVAL; | |
990 | ||
991 | return len - sfd->size_subflow_data; | |
992 | } | |
993 | ||
994 | static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval, | |
995 | int __user *optlen) | |
996 | { | |
997 | struct mptcp_subflow_context *subflow; | |
998 | struct sock *sk = &msk->sk.icsk_inet.sk; | |
999 | unsigned int sfcount = 0, copied = 0; | |
1000 | struct mptcp_subflow_data sfd; | |
1001 | char __user *infoptr; | |
1002 | int len; | |
1003 | ||
1004 | len = mptcp_get_subflow_data(&sfd, optval, optlen); | |
1005 | if (len < 0) | |
1006 | return len; | |
1007 | ||
1008 | sfd.size_kernel = sizeof(struct tcp_info); | |
1009 | sfd.size_user = min_t(unsigned int, sfd.size_user, | |
1010 | sizeof(struct tcp_info)); | |
1011 | ||
1012 | infoptr = optval + sfd.size_subflow_data; | |
1013 | ||
1014 | lock_sock(sk); | |
1015 | ||
1016 | mptcp_for_each_subflow(msk, subflow) { | |
1017 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
1018 | ||
1019 | ++sfcount; | |
1020 | ||
1021 | if (len && len >= sfd.size_user) { | |
1022 | struct tcp_info info; | |
1023 | ||
1024 | tcp_get_info(ssk, &info); | |
1025 | ||
1026 | if (copy_to_user(infoptr, &info, sfd.size_user)) { | |
1027 | release_sock(sk); | |
1028 | return -EFAULT; | |
1029 | } | |
1030 | ||
1031 | infoptr += sfd.size_user; | |
1032 | copied += sfd.size_user; | |
1033 | len -= sfd.size_user; | |
1034 | } | |
1035 | } | |
1036 | ||
1037 | release_sock(sk); | |
1038 | ||
1039 | sfd.num_subflows = sfcount; | |
1040 | ||
1041 | if (mptcp_put_subflow_data(&sfd, optval, copied, optlen)) | |
1042 | return -EFAULT; | |
1043 | ||
1044 | return 0; | |
1045 | } | |
1046 | ||
c11c5906 FW |
1047 | static void mptcp_get_sub_addrs(const struct sock *sk, struct mptcp_subflow_addrs *a) |
1048 | { | |
1049 | struct inet_sock *inet = inet_sk(sk); | |
1050 | ||
1051 | memset(a, 0, sizeof(*a)); | |
1052 | ||
1053 | if (sk->sk_family == AF_INET) { | |
1054 | a->sin_local.sin_family = AF_INET; | |
1055 | a->sin_local.sin_port = inet->inet_sport; | |
1056 | a->sin_local.sin_addr.s_addr = inet->inet_rcv_saddr; | |
1057 | ||
1058 | if (!a->sin_local.sin_addr.s_addr) | |
1059 | a->sin_local.sin_addr.s_addr = inet->inet_saddr; | |
1060 | ||
1061 | a->sin_remote.sin_family = AF_INET; | |
1062 | a->sin_remote.sin_port = inet->inet_dport; | |
1063 | a->sin_remote.sin_addr.s_addr = inet->inet_daddr; | |
1064 | #if IS_ENABLED(CONFIG_IPV6) | |
1065 | } else if (sk->sk_family == AF_INET6) { | |
1066 | const struct ipv6_pinfo *np = inet6_sk(sk); | |
1067 | ||
29211e7d TG |
1068 | if (WARN_ON_ONCE(!np)) |
1069 | return; | |
1070 | ||
c11c5906 FW |
1071 | a->sin6_local.sin6_family = AF_INET6; |
1072 | a->sin6_local.sin6_port = inet->inet_sport; | |
1073 | ||
1074 | if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) | |
1075 | a->sin6_local.sin6_addr = np->saddr; | |
1076 | else | |
1077 | a->sin6_local.sin6_addr = sk->sk_v6_rcv_saddr; | |
1078 | ||
1079 | a->sin6_remote.sin6_family = AF_INET6; | |
1080 | a->sin6_remote.sin6_port = inet->inet_dport; | |
1081 | a->sin6_remote.sin6_addr = sk->sk_v6_daddr; | |
1082 | #endif | |
1083 | } | |
1084 | } | |
1085 | ||
1086 | static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *optval, | |
1087 | int __user *optlen) | |
1088 | { | |
1089 | struct sock *sk = &msk->sk.icsk_inet.sk; | |
1090 | struct mptcp_subflow_context *subflow; | |
1091 | unsigned int sfcount = 0, copied = 0; | |
1092 | struct mptcp_subflow_data sfd; | |
1093 | char __user *addrptr; | |
1094 | int len; | |
1095 | ||
1096 | len = mptcp_get_subflow_data(&sfd, optval, optlen); | |
1097 | if (len < 0) | |
1098 | return len; | |
1099 | ||
1100 | sfd.size_kernel = sizeof(struct mptcp_subflow_addrs); | |
1101 | sfd.size_user = min_t(unsigned int, sfd.size_user, | |
1102 | sizeof(struct mptcp_subflow_addrs)); | |
1103 | ||
1104 | addrptr = optval + sfd.size_subflow_data; | |
1105 | ||
1106 | lock_sock(sk); | |
1107 | ||
1108 | mptcp_for_each_subflow(msk, subflow) { | |
1109 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
1110 | ||
1111 | ++sfcount; | |
1112 | ||
1113 | if (len && len >= sfd.size_user) { | |
1114 | struct mptcp_subflow_addrs a; | |
1115 | ||
1116 | mptcp_get_sub_addrs(ssk, &a); | |
1117 | ||
1118 | if (copy_to_user(addrptr, &a, sfd.size_user)) { | |
1119 | release_sock(sk); | |
1120 | return -EFAULT; | |
1121 | } | |
1122 | ||
1123 | addrptr += sfd.size_user; | |
1124 | copied += sfd.size_user; | |
1125 | len -= sfd.size_user; | |
1126 | } | |
1127 | } | |
1128 | ||
1129 | release_sock(sk); | |
1130 | ||
1131 | sfd.num_subflows = sfcount; | |
1132 | ||
1133 | if (mptcp_put_subflow_data(&sfd, optval, copied, optlen)) | |
1134 | return -EFAULT; | |
1135 | ||
1136 | return 0; | |
1137 | } | |
1138 | ||
2c9e7765 FW |
1139 | static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval, |
1140 | int __user *optlen, int val) | |
1141 | { | |
1142 | int len; | |
1143 | ||
1144 | if (get_user(len, optlen)) | |
1145 | return -EFAULT; | |
2c9e7765 FW |
1146 | if (len < 0) |
1147 | return -EINVAL; | |
1148 | ||
3b1e21eb FW |
1149 | if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) { |
1150 | unsigned char ucval = (unsigned char)val; | |
1151 | ||
1152 | len = 1; | |
1153 | if (put_user(len, optlen)) | |
1154 | return -EFAULT; | |
1155 | if (copy_to_user(optval, &ucval, 1)) | |
1156 | return -EFAULT; | |
1157 | } else { | |
1158 | len = min_t(unsigned int, len, sizeof(int)); | |
1159 | if (put_user(len, optlen)) | |
1160 | return -EFAULT; | |
1161 | if (copy_to_user(optval, &val, len)) | |
1162 | return -EFAULT; | |
1163 | } | |
2c9e7765 FW |
1164 | |
1165 | return 0; | |
1166 | } | |
1167 | ||
aa1fbd94 FW |
1168 | static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, |
1169 | char __user *optval, int __user *optlen) | |
1170 | { | |
1171 | switch (optname) { | |
1172 | case TCP_ULP: | |
1173 | case TCP_CONGESTION: | |
1174 | case TCP_INFO: | |
1175 | case TCP_CC_INFO: | |
ea1e301d | 1176 | case TCP_DEFER_ACCEPT: |
54635bd0 | 1177 | case TCP_FASTOPEN_CONNECT: |
aa1fbd94 FW |
1178 | return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname, |
1179 | optval, optlen); | |
2c9e7765 FW |
1180 | case TCP_INQ: |
1181 | return mptcp_put_int_option(msk, optval, optlen, msk->recvmsg_inq); | |
4f6e14bd MG |
1182 | case TCP_CORK: |
1183 | return mptcp_put_int_option(msk, optval, optlen, msk->cork); | |
1184 | case TCP_NODELAY: | |
1185 | return mptcp_put_int_option(msk, optval, optlen, msk->nodelay); | |
aa1fbd94 | 1186 | } |
0abdde82 PA |
1187 | return -EOPNOTSUPP; |
1188 | } | |
1189 | ||
3b1e21eb FW |
1190 | static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname, |
1191 | char __user *optval, int __user *optlen) | |
1192 | { | |
1193 | struct sock *sk = (void *)msk; | |
1194 | ||
1195 | switch (optname) { | |
1196 | case IP_TOS: | |
1197 | return mptcp_put_int_option(msk, optval, optlen, inet_sk(sk)->tos); | |
1198 | } | |
1199 | ||
1200 | return -EOPNOTSUPP; | |
1201 | } | |
1202 | ||
55c42fa7 FW |
1203 | static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname, |
1204 | char __user *optval, int __user *optlen) | |
1205 | { | |
1206 | switch (optname) { | |
1207 | case MPTCP_INFO: | |
1208 | return mptcp_getsockopt_info(msk, optval, optlen); | |
06f15cee FW |
1209 | case MPTCP_TCPINFO: |
1210 | return mptcp_getsockopt_tcpinfo(msk, optval, optlen); | |
c11c5906 FW |
1211 | case MPTCP_SUBFLOW_ADDRS: |
1212 | return mptcp_getsockopt_subflow_addrs(msk, optval, optlen); | |
55c42fa7 FW |
1213 | } |
1214 | ||
1215 | return -EOPNOTSUPP; | |
1216 | } | |
1217 | ||
0abdde82 PA |
1218 | int mptcp_getsockopt(struct sock *sk, int level, int optname, |
1219 | char __user *optval, int __user *option) | |
1220 | { | |
1221 | struct mptcp_sock *msk = mptcp_sk(sk); | |
1222 | struct sock *ssk; | |
1223 | ||
1224 | pr_debug("msk=%p", msk); | |
1225 | ||
1226 | /* @@ the meaning of setsockopt() when the socket is connected and | |
1227 | * there are multiple subflows is not yet defined. It is up to the | |
1228 | * MPTCP-level socket to configure the subflows until the subflow | |
1229 | * is in TCP fallback, when socket options are passed through | |
1230 | * to the one remaining subflow. | |
1231 | */ | |
1232 | lock_sock(sk); | |
1233 | ssk = __mptcp_tcp_fallback(msk); | |
1234 | release_sock(sk); | |
1235 | if (ssk) | |
1236 | return tcp_getsockopt(ssk, level, optname, optval, option); | |
1237 | ||
3b1e21eb FW |
1238 | if (level == SOL_IP) |
1239 | return mptcp_getsockopt_v4(msk, optname, optval, option); | |
aa1fbd94 FW |
1240 | if (level == SOL_TCP) |
1241 | return mptcp_getsockopt_sol_tcp(msk, optname, optval, option); | |
55c42fa7 FW |
1242 | if (level == SOL_MPTCP) |
1243 | return mptcp_getsockopt_sol_mptcp(msk, optname, optval, option); | |
0abdde82 PA |
1244 | return -EOPNOTSUPP; |
1245 | } | |
1246 | ||
1b3e7ede FW |
1247 | static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) |
1248 | { | |
5d0a6bc8 | 1249 | static const unsigned int tx_rx_locks = SOCK_RCVBUF_LOCK | SOCK_SNDBUF_LOCK; |
1b3e7ede FW |
1250 | struct sock *sk = (struct sock *)msk; |
1251 | ||
1252 | if (ssk->sk_prot->keepalive) { | |
1253 | if (sock_flag(sk, SOCK_KEEPOPEN)) | |
1254 | ssk->sk_prot->keepalive(ssk, 1); | |
1255 | else | |
1256 | ssk->sk_prot->keepalive(ssk, 0); | |
1257 | } | |
1258 | ||
1259 | ssk->sk_priority = sk->sk_priority; | |
5d0a6bc8 FW |
1260 | ssk->sk_bound_dev_if = sk->sk_bound_dev_if; |
1261 | ssk->sk_incoming_cpu = sk->sk_incoming_cpu; | |
ffcacff8 | 1262 | __ip_sock_set_tos(ssk, inet_sk(sk)->tos); |
5d0a6bc8 FW |
1263 | |
1264 | if (sk->sk_userlocks & tx_rx_locks) { | |
1265 | ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks; | |
1266 | if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) | |
1267 | WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); | |
1268 | if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) | |
1269 | WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); | |
1270 | } | |
1271 | ||
1272 | if (sock_flag(sk, SOCK_LINGER)) { | |
1273 | ssk->sk_lingertime = sk->sk_lingertime; | |
1274 | sock_set_flag(ssk, SOCK_LINGER); | |
1275 | } else { | |
1276 | sock_reset_flag(ssk, SOCK_LINGER); | |
1277 | } | |
1278 | ||
1279 | if (sk->sk_mark != ssk->sk_mark) { | |
1280 | ssk->sk_mark = sk->sk_mark; | |
1281 | sk_dst_reset(ssk); | |
1282 | } | |
1283 | ||
1284 | sock_valbool_flag(ssk, SOCK_DBG, sock_flag(sk, SOCK_DBG)); | |
1285 | ||
1286 | if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops) | |
20b5759f | 1287 | tcp_set_congestion_control(ssk, msk->ca_name, false, true); |
4f6e14bd MG |
1288 | __tcp_sock_set_cork(ssk, !!msk->cork); |
1289 | __tcp_sock_set_nodelay(ssk, !!msk->nodelay); | |
c9406a23 FW |
1290 | |
1291 | inet_sk(ssk)->transparent = inet_sk(sk)->transparent; | |
1292 | inet_sk(ssk)->freebind = inet_sk(sk)->freebind; | |
1b3e7ede FW |
1293 | } |
1294 | ||
df00b087 FW |
1295 | static void __mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk) |
1296 | { | |
1b3e7ede FW |
1297 | bool slow = lock_sock_fast(ssk); |
1298 | ||
1299 | sync_socket_options(msk, ssk); | |
1300 | ||
1301 | unlock_sock_fast(ssk, slow); | |
df00b087 FW |
1302 | } |
1303 | ||
78962489 FW |
1304 | void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk) |
1305 | { | |
df00b087 FW |
1306 | struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); |
1307 | ||
78962489 | 1308 | msk_owned_by_me(msk); |
df00b087 FW |
1309 | |
1310 | if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) { | |
1311 | __mptcp_sockopt_sync(msk, ssk); | |
1312 | ||
1313 | subflow->setsockopt_seq = msk->setsockopt_seq; | |
1314 | } | |
78962489 FW |
1315 | } |
1316 | ||
3e501490 | 1317 | void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk) |
78962489 | 1318 | { |
3e501490 | 1319 | struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); |
78962489 | 1320 | |
3e501490 | 1321 | msk_owned_by_me(msk); |
78962489 | 1322 | |
3e501490 PA |
1323 | if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) { |
1324 | sync_socket_options(msk, ssk); | |
78962489 | 1325 | |
3e501490 | 1326 | subflow->setsockopt_seq = msk->setsockopt_seq; |
78962489 FW |
1327 | } |
1328 | } |