Commit | Line | Data |
---|---|---|
0abdde82 PA |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Multipath TCP | |
3 | * | |
4 | * Copyright (c) 2021, Red Hat. | |
5 | */ | |
6 | ||
7 | #define pr_fmt(fmt) "MPTCP: " fmt | |
8 | ||
9 | #include <linux/kernel.h> | |
10 | #include <linux/module.h> | |
11 | #include <net/sock.h> | |
12 | #include <net/protocol.h> | |
13 | #include <net/tcp.h> | |
14 | #include <net/mptcp.h> | |
15 | #include "protocol.h" | |
16 | ||
49243207 PA |
17 | #define MIN_INFO_OPTLEN_SIZE 16 |
18 | #define MIN_FULL_INFO_OPTLEN_SIZE 40 | |
06f15cee | 19 | |
0abdde82 PA |
20 | static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk) |
21 | { | |
109cdeb8 | 22 | msk_owned_by_me(msk); |
0abdde82 PA |
23 | |
24 | if (likely(!__mptcp_check_fallback(msk))) | |
25 | return NULL; | |
26 | ||
27 | return msk->first; | |
28 | } | |
29 | ||
df00b087 FW |
30 | static u32 sockopt_seq_reset(const struct sock *sk) |
31 | { | |
32 | sock_owned_by_me(sk); | |
33 | ||
34 | /* Highbits contain state. Allows to distinguish sockopt_seq | |
35 | * of listener and established: | |
36 | * s0 = new_listener() | |
37 | * sockopt(s0) - seq is 1 | |
38 | * s1 = accept(s0) - s1 inherits seq 1 if listener sk (s0) | |
39 | * sockopt(s0) - seq increments to 2 on s0 | |
40 | * sockopt(s1) // seq increments to 2 on s1 (different option) | |
41 | * new ssk completes join, inherits options from s0 // seq 2 | |
42 | * Needs sync from mptcp join logic, but ssk->seq == msk->seq | |
43 | * | |
44 | * Set High order bits to sk_state so ssk->seq == msk->seq test | |
45 | * will fail. | |
46 | */ | |
47 | ||
48 | return (u32)sk->sk_state << 24u; | |
49 | } | |
50 | ||
1b3e7ede FW |
51 | static void sockopt_seq_inc(struct mptcp_sock *msk) |
52 | { | |
53 | u32 seq = (msk->setsockopt_seq + 1) & 0x00ffffff; | |
54 | ||
55 | msk->setsockopt_seq = sockopt_seq_reset((struct sock *)msk) + seq; | |
56 | } | |
57 | ||
58 | static int mptcp_get_int_option(struct mptcp_sock *msk, sockptr_t optval, | |
59 | unsigned int optlen, int *val) | |
60 | { | |
61 | if (optlen < sizeof(int)) | |
62 | return -EINVAL; | |
63 | ||
64 | if (copy_from_sockptr(val, optval, sizeof(*val))) | |
65 | return -EFAULT; | |
66 | ||
67 | return 0; | |
68 | } | |
69 | ||
70 | static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, int val) | |
71 | { | |
72 | struct mptcp_subflow_context *subflow; | |
73 | struct sock *sk = (struct sock *)msk; | |
74 | ||
75 | lock_sock(sk); | |
76 | sockopt_seq_inc(msk); | |
77 | ||
78 | mptcp_for_each_subflow(msk, subflow) { | |
79 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
80 | bool slow = lock_sock_fast(ssk); | |
81 | ||
82 | switch (optname) { | |
a03c99b2 FW |
83 | case SO_DEBUG: |
84 | sock_valbool_flag(ssk, SOCK_DBG, !!val); | |
85 | break; | |
1b3e7ede FW |
86 | case SO_KEEPALIVE: |
87 | if (ssk->sk_prot->keepalive) | |
88 | ssk->sk_prot->keepalive(ssk, !!val); | |
89 | sock_valbool_flag(ssk, SOCK_KEEPOPEN, !!val); | |
90 | break; | |
91 | case SO_PRIORITY: | |
92 | ssk->sk_priority = val; | |
93 | break; | |
5d0a6bc8 FW |
94 | case SO_SNDBUF: |
95 | case SO_SNDBUFFORCE: | |
96 | ssk->sk_userlocks |= SOCK_SNDBUF_LOCK; | |
97 | WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); | |
98 | break; | |
99 | case SO_RCVBUF: | |
100 | case SO_RCVBUFFORCE: | |
101 | ssk->sk_userlocks |= SOCK_RCVBUF_LOCK; | |
102 | WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); | |
103 | break; | |
36704413 FW |
104 | case SO_MARK: |
105 | if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) { | |
3c5b4d69 | 106 | WRITE_ONCE(ssk->sk_mark, sk->sk_mark); |
36704413 FW |
107 | sk_dst_reset(ssk); |
108 | } | |
109 | break; | |
6f0d7198 FW |
110 | case SO_INCOMING_CPU: |
111 | WRITE_ONCE(ssk->sk_incoming_cpu, val); | |
112 | break; | |
1b3e7ede FW |
113 | } |
114 | ||
115 | subflow->setsockopt_seq = msk->setsockopt_seq; | |
116 | unlock_sock_fast(ssk, slow); | |
117 | } | |
118 | ||
119 | release_sock(sk); | |
120 | } | |
121 | ||
122 | static int mptcp_sol_socket_intval(struct mptcp_sock *msk, int optname, int val) | |
123 | { | |
124 | sockptr_t optval = KERNEL_SOCKPTR(&val); | |
125 | struct sock *sk = (struct sock *)msk; | |
126 | int ret; | |
127 | ||
128 | ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, | |
129 | optval, sizeof(val)); | |
130 | if (ret) | |
131 | return ret; | |
132 | ||
133 | mptcp_sol_socket_sync_intval(msk, optname, val); | |
134 | return 0; | |
135 | } | |
136 | ||
6f0d7198 FW |
137 | static void mptcp_so_incoming_cpu(struct mptcp_sock *msk, int val) |
138 | { | |
139 | struct sock *sk = (struct sock *)msk; | |
140 | ||
141 | WRITE_ONCE(sk->sk_incoming_cpu, val); | |
142 | ||
143 | mptcp_sol_socket_sync_intval(msk, SO_INCOMING_CPU, val); | |
144 | } | |
145 | ||
9061f24b FW |
146 | static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optname, int val) |
147 | { | |
148 | sockptr_t optval = KERNEL_SOCKPTR(&val); | |
149 | struct mptcp_subflow_context *subflow; | |
150 | struct sock *sk = (struct sock *)msk; | |
151 | int ret; | |
152 | ||
153 | ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, | |
154 | optval, sizeof(val)); | |
155 | if (ret) | |
156 | return ret; | |
157 | ||
158 | lock_sock(sk); | |
159 | mptcp_for_each_subflow(msk, subflow) { | |
160 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
161 | bool slow = lock_sock_fast(ssk); | |
162 | ||
6c9a0a0f | 163 | sock_set_timestamp(sk, optname, !!val); |
9061f24b FW |
164 | unlock_sock_fast(ssk, slow); |
165 | } | |
166 | ||
167 | release_sock(sk); | |
168 | return 0; | |
169 | } | |
170 | ||
1b3e7ede | 171 | static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname, |
6c9a0a0f YL |
172 | sockptr_t optval, |
173 | unsigned int optlen) | |
1b3e7ede FW |
174 | { |
175 | int val, ret; | |
176 | ||
177 | ret = mptcp_get_int_option(msk, optval, optlen, &val); | |
178 | if (ret) | |
179 | return ret; | |
180 | ||
181 | switch (optname) { | |
182 | case SO_KEEPALIVE: | |
183 | mptcp_sol_socket_sync_intval(msk, optname, val); | |
184 | return 0; | |
a03c99b2 | 185 | case SO_DEBUG: |
36704413 | 186 | case SO_MARK: |
1b3e7ede | 187 | case SO_PRIORITY: |
5d0a6bc8 FW |
188 | case SO_SNDBUF: |
189 | case SO_SNDBUFFORCE: | |
190 | case SO_RCVBUF: | |
191 | case SO_RCVBUFFORCE: | |
1b3e7ede | 192 | return mptcp_sol_socket_intval(msk, optname, val); |
6f0d7198 FW |
193 | case SO_INCOMING_CPU: |
194 | mptcp_so_incoming_cpu(msk, val); | |
195 | return 0; | |
9061f24b FW |
196 | case SO_TIMESTAMP_OLD: |
197 | case SO_TIMESTAMP_NEW: | |
198 | case SO_TIMESTAMPNS_OLD: | |
199 | case SO_TIMESTAMPNS_NEW: | |
9061f24b | 200 | return mptcp_setsockopt_sol_socket_tstamp(msk, optname, val); |
1b3e7ede FW |
201 | } |
202 | ||
203 | return -ENOPROTOOPT; | |
204 | } | |
205 | ||
6c9a0a0f YL |
206 | static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk, |
207 | int optname, | |
208 | sockptr_t optval, | |
209 | unsigned int optlen) | |
210 | { | |
211 | struct mptcp_subflow_context *subflow; | |
212 | struct sock *sk = (struct sock *)msk; | |
d463126e YL |
213 | struct so_timestamping timestamping; |
214 | int ret; | |
6c9a0a0f | 215 | |
d463126e YL |
216 | if (optlen == sizeof(timestamping)) { |
217 | if (copy_from_sockptr(×tamping, optval, | |
218 | sizeof(timestamping))) | |
219 | return -EFAULT; | |
220 | } else if (optlen == sizeof(int)) { | |
221 | memset(×tamping, 0, sizeof(timestamping)); | |
222 | ||
223 | if (copy_from_sockptr(×tamping.flags, optval, sizeof(int))) | |
224 | return -EFAULT; | |
225 | } else { | |
226 | return -EINVAL; | |
227 | } | |
6c9a0a0f YL |
228 | |
229 | ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, | |
d463126e YL |
230 | KERNEL_SOCKPTR(×tamping), |
231 | sizeof(timestamping)); | |
6c9a0a0f YL |
232 | if (ret) |
233 | return ret; | |
234 | ||
235 | lock_sock(sk); | |
236 | ||
237 | mptcp_for_each_subflow(msk, subflow) { | |
238 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
239 | bool slow = lock_sock_fast(ssk); | |
240 | ||
d463126e | 241 | sock_set_timestamping(sk, optname, timestamping); |
6c9a0a0f YL |
242 | unlock_sock_fast(ssk, slow); |
243 | } | |
244 | ||
245 | release_sock(sk); | |
246 | ||
247 | return 0; | |
248 | } | |
249 | ||
268b1238 FW |
250 | static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t optval, |
251 | unsigned int optlen) | |
252 | { | |
253 | struct mptcp_subflow_context *subflow; | |
254 | struct sock *sk = (struct sock *)msk; | |
255 | struct linger ling; | |
256 | sockptr_t kopt; | |
257 | int ret; | |
258 | ||
259 | if (optlen < sizeof(ling)) | |
260 | return -EINVAL; | |
261 | ||
262 | if (copy_from_sockptr(&ling, optval, sizeof(ling))) | |
263 | return -EFAULT; | |
264 | ||
265 | kopt = KERNEL_SOCKPTR(&ling); | |
266 | ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, SO_LINGER, kopt, sizeof(ling)); | |
267 | if (ret) | |
268 | return ret; | |
269 | ||
270 | lock_sock(sk); | |
271 | sockopt_seq_inc(msk); | |
272 | mptcp_for_each_subflow(msk, subflow) { | |
273 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
274 | bool slow = lock_sock_fast(ssk); | |
275 | ||
276 | if (!ling.l_onoff) { | |
277 | sock_reset_flag(ssk, SOCK_LINGER); | |
278 | } else { | |
279 | ssk->sk_lingertime = sk->sk_lingertime; | |
280 | sock_set_flag(ssk, SOCK_LINGER); | |
281 | } | |
282 | ||
283 | subflow->setsockopt_seq = msk->setsockopt_seq; | |
284 | unlock_sock_fast(ssk, slow); | |
285 | } | |
286 | ||
287 | release_sock(sk); | |
288 | return 0; | |
289 | } | |
290 | ||
0abdde82 PA |
291 | static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, |
292 | sockptr_t optval, unsigned int optlen) | |
293 | { | |
294 | struct sock *sk = (struct sock *)msk; | |
295 | struct socket *ssock; | |
f0bc514b | 296 | struct sock *ssk; |
0abdde82 PA |
297 | int ret; |
298 | ||
299 | switch (optname) { | |
300 | case SO_REUSEPORT: | |
301 | case SO_REUSEADDR: | |
5d0a6bc8 FW |
302 | case SO_BINDTODEVICE: |
303 | case SO_BINDTOIFINDEX: | |
0abdde82 PA |
304 | lock_sock(sk); |
305 | ssock = __mptcp_nmpc_socket(msk); | |
ddb1a072 | 306 | if (IS_ERR(ssock)) { |
0abdde82 | 307 | release_sock(sk); |
ddb1a072 | 308 | return PTR_ERR(ssock); |
0abdde82 PA |
309 | } |
310 | ||
f0bc514b PA |
311 | ssk = msk->first; |
312 | ret = sk_setsockopt(ssk, SOL_SOCKET, optname, optval, optlen); | |
0abdde82 PA |
313 | if (ret == 0) { |
314 | if (optname == SO_REUSEPORT) | |
f0bc514b | 315 | sk->sk_reuseport = ssk->sk_reuseport; |
0abdde82 | 316 | else if (optname == SO_REUSEADDR) |
f0bc514b | 317 | sk->sk_reuse = ssk->sk_reuse; |
5d0a6bc8 | 318 | else if (optname == SO_BINDTODEVICE) |
f0bc514b | 319 | sk->sk_bound_dev_if = ssk->sk_bound_dev_if; |
5d0a6bc8 | 320 | else if (optname == SO_BINDTOIFINDEX) |
f0bc514b | 321 | sk->sk_bound_dev_if = ssk->sk_bound_dev_if; |
0abdde82 PA |
322 | } |
323 | release_sock(sk); | |
324 | return ret; | |
1b3e7ede FW |
325 | case SO_KEEPALIVE: |
326 | case SO_PRIORITY: | |
5d0a6bc8 FW |
327 | case SO_SNDBUF: |
328 | case SO_SNDBUFFORCE: | |
329 | case SO_RCVBUF: | |
330 | case SO_RCVBUFFORCE: | |
36704413 | 331 | case SO_MARK: |
6f0d7198 | 332 | case SO_INCOMING_CPU: |
a03c99b2 | 333 | case SO_DEBUG: |
9061f24b FW |
334 | case SO_TIMESTAMP_OLD: |
335 | case SO_TIMESTAMP_NEW: | |
336 | case SO_TIMESTAMPNS_OLD: | |
337 | case SO_TIMESTAMPNS_NEW: | |
6c9a0a0f YL |
338 | return mptcp_setsockopt_sol_socket_int(msk, optname, optval, |
339 | optlen); | |
9061f24b FW |
340 | case SO_TIMESTAMPING_OLD: |
341 | case SO_TIMESTAMPING_NEW: | |
6c9a0a0f YL |
342 | return mptcp_setsockopt_sol_socket_timestamping(msk, optname, |
343 | optval, optlen); | |
268b1238 FW |
344 | case SO_LINGER: |
345 | return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen); | |
7a009a70 FW |
346 | case SO_RCVLOWAT: |
347 | case SO_RCVTIMEO_OLD: | |
348 | case SO_RCVTIMEO_NEW: | |
d6ab5ea2 GT |
349 | case SO_SNDTIMEO_OLD: |
350 | case SO_SNDTIMEO_NEW: | |
7a009a70 FW |
351 | case SO_BUSY_POLL: |
352 | case SO_PREFER_BUSY_POLL: | |
353 | case SO_BUSY_POLL_BUDGET: | |
354 | /* No need to copy: only relevant for msk */ | |
355 | return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen); | |
a03c99b2 FW |
356 | case SO_NO_CHECK: |
357 | case SO_DONTROUTE: | |
358 | case SO_BROADCAST: | |
359 | case SO_BSDCOMPAT: | |
360 | case SO_PASSCRED: | |
5e2ff670 | 361 | case SO_PASSPIDFD: |
a03c99b2 FW |
362 | case SO_PASSSEC: |
363 | case SO_RXQ_OVFL: | |
364 | case SO_WIFI_STATUS: | |
365 | case SO_NOFCS: | |
366 | case SO_SELECT_ERR_QUEUE: | |
367 | return 0; | |
0abdde82 PA |
368 | } |
369 | ||
7a009a70 FW |
370 | /* SO_OOBINLINE is not supported, let's avoid the related mess |
371 | * SO_ATTACH_FILTER, SO_ATTACH_BPF, SO_ATTACH_REUSEPORT_CBPF, | |
372 | * SO_DETACH_REUSEPORT_BPF, SO_DETACH_FILTER, SO_LOCK_FILTER, | |
373 | * we must be careful with subflows | |
374 | * | |
375 | * SO_ATTACH_REUSEPORT_EBPF is not supported, at it checks | |
376 | * explicitly the sk_protocol field | |
377 | * | |
378 | * SO_PEEK_OFF is unsupported, as it is for plain TCP | |
379 | * SO_MAX_PACING_RATE is unsupported, we must be careful with subflows | |
380 | * SO_CNX_ADVICE is currently unsupported, could possibly be relevant, | |
381 | * but likely needs careful design | |
382 | * | |
383 | * SO_ZEROCOPY is currently unsupported, TODO in sndmsg | |
384 | * SO_TXTIME is currently unsupported | |
385 | */ | |
386 | ||
387 | return -EOPNOTSUPP; | |
0abdde82 PA |
388 | } |
389 | ||
390 | static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, | |
391 | sockptr_t optval, unsigned int optlen) | |
392 | { | |
393 | struct sock *sk = (struct sock *)msk; | |
394 | int ret = -EOPNOTSUPP; | |
395 | struct socket *ssock; | |
f0bc514b | 396 | struct sock *ssk; |
0abdde82 PA |
397 | |
398 | switch (optname) { | |
399 | case IPV6_V6ONLY: | |
c9406a23 FW |
400 | case IPV6_TRANSPARENT: |
401 | case IPV6_FREEBIND: | |
0abdde82 PA |
402 | lock_sock(sk); |
403 | ssock = __mptcp_nmpc_socket(msk); | |
ddb1a072 | 404 | if (IS_ERR(ssock)) { |
0abdde82 | 405 | release_sock(sk); |
ddb1a072 | 406 | return PTR_ERR(ssock); |
0abdde82 PA |
407 | } |
408 | ||
f0bc514b PA |
409 | ssk = msk->first; |
410 | ret = tcp_setsockopt(ssk, SOL_IPV6, optname, optval, optlen); | |
c9406a23 FW |
411 | if (ret != 0) { |
412 | release_sock(sk); | |
413 | return ret; | |
414 | } | |
415 | ||
416 | sockopt_seq_inc(msk); | |
417 | ||
418 | switch (optname) { | |
419 | case IPV6_V6ONLY: | |
f0bc514b | 420 | sk->sk_ipv6only = ssk->sk_ipv6only; |
c9406a23 FW |
421 | break; |
422 | case IPV6_TRANSPARENT: | |
f0bc514b | 423 | inet_sk(sk)->transparent = inet_sk(ssk)->transparent; |
c9406a23 FW |
424 | break; |
425 | case IPV6_FREEBIND: | |
f0bc514b | 426 | inet_sk(sk)->freebind = inet_sk(ssk)->freebind; |
c9406a23 FW |
427 | break; |
428 | } | |
0abdde82 PA |
429 | |
430 | release_sock(sk); | |
431 | break; | |
432 | } | |
433 | ||
434 | return ret; | |
435 | } | |
436 | ||
d9e4c129 PA |
437 | static bool mptcp_supported_sockopt(int level, int optname) |
438 | { | |
d9e4c129 PA |
439 | if (level == SOL_IP) { |
440 | switch (optname) { | |
441 | /* should work fine */ | |
442 | case IP_FREEBIND: | |
443 | case IP_TRANSPARENT: | |
444 | ||
445 | /* the following are control cmsg related */ | |
446 | case IP_PKTINFO: | |
447 | case IP_RECVTTL: | |
448 | case IP_RECVTOS: | |
449 | case IP_RECVOPTS: | |
450 | case IP_RETOPTS: | |
451 | case IP_PASSSEC: | |
452 | case IP_RECVORIGDSTADDR: | |
453 | case IP_CHECKSUM: | |
454 | case IP_RECVFRAGSIZE: | |
455 | ||
456 | /* common stuff that need some love */ | |
457 | case IP_TOS: | |
458 | case IP_TTL: | |
459 | case IP_BIND_ADDRESS_NO_PORT: | |
460 | case IP_MTU_DISCOVER: | |
461 | case IP_RECVERR: | |
462 | ||
463 | /* possibly less common may deserve some love */ | |
464 | case IP_MINTTL: | |
465 | ||
466 | /* the following is apparently a no-op for plain TCP */ | |
467 | case IP_RECVERR_RFC4884: | |
468 | return true; | |
469 | } | |
470 | ||
471 | /* IP_OPTIONS is not supported, needs subflow care */ | |
472 | /* IP_HDRINCL, IP_NODEFRAG are not supported, RAW specific */ | |
473 | /* IP_MULTICAST_TTL, IP_MULTICAST_LOOP, IP_UNICAST_IF, | |
474 | * IP_ADD_MEMBERSHIP, IP_ADD_SOURCE_MEMBERSHIP, IP_DROP_MEMBERSHIP, | |
475 | * IP_DROP_SOURCE_MEMBERSHIP, IP_BLOCK_SOURCE, IP_UNBLOCK_SOURCE, | |
476 | * MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP MCAST_JOIN_SOURCE_GROUP, | |
477 | * MCAST_LEAVE_SOURCE_GROUP, MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, | |
478 | * MCAST_MSFILTER, IP_MULTICAST_ALL are not supported, better not deal | |
479 | * with mcast stuff | |
480 | */ | |
481 | /* IP_IPSEC_POLICY, IP_XFRM_POLICY are nut supported, unrelated here */ | |
482 | return false; | |
483 | } | |
484 | if (level == SOL_IPV6) { | |
485 | switch (optname) { | |
486 | case IPV6_V6ONLY: | |
487 | ||
488 | /* the following are control cmsg related */ | |
489 | case IPV6_RECVPKTINFO: | |
490 | case IPV6_2292PKTINFO: | |
491 | case IPV6_RECVHOPLIMIT: | |
492 | case IPV6_2292HOPLIMIT: | |
493 | case IPV6_RECVRTHDR: | |
494 | case IPV6_2292RTHDR: | |
495 | case IPV6_RECVHOPOPTS: | |
496 | case IPV6_2292HOPOPTS: | |
497 | case IPV6_RECVDSTOPTS: | |
498 | case IPV6_2292DSTOPTS: | |
499 | case IPV6_RECVTCLASS: | |
500 | case IPV6_FLOWINFO: | |
501 | case IPV6_RECVPATHMTU: | |
502 | case IPV6_RECVORIGDSTADDR: | |
503 | case IPV6_RECVFRAGSIZE: | |
504 | ||
505 | /* the following ones need some love but are quite common */ | |
506 | case IPV6_TCLASS: | |
507 | case IPV6_TRANSPARENT: | |
508 | case IPV6_FREEBIND: | |
509 | case IPV6_PKTINFO: | |
510 | case IPV6_2292PKTOPTIONS: | |
511 | case IPV6_UNICAST_HOPS: | |
512 | case IPV6_MTU_DISCOVER: | |
513 | case IPV6_MTU: | |
514 | case IPV6_RECVERR: | |
515 | case IPV6_FLOWINFO_SEND: | |
516 | case IPV6_FLOWLABEL_MGR: | |
517 | case IPV6_MINHOPCOUNT: | |
518 | case IPV6_DONTFRAG: | |
519 | case IPV6_AUTOFLOWLABEL: | |
520 | ||
521 | /* the following one is a no-op for plain TCP */ | |
522 | case IPV6_RECVERR_RFC4884: | |
523 | return true; | |
524 | } | |
525 | ||
526 | /* IPV6_HOPOPTS, IPV6_RTHDRDSTOPTS, IPV6_RTHDR, IPV6_DSTOPTS are | |
527 | * not supported | |
528 | */ | |
529 | /* IPV6_MULTICAST_HOPS, IPV6_MULTICAST_LOOP, IPV6_UNICAST_IF, | |
530 | * IPV6_MULTICAST_IF, IPV6_ADDRFORM, | |
531 | * IPV6_ADD_MEMBERSHIP, IPV6_DROP_MEMBERSHIP, IPV6_JOIN_ANYCAST, | |
532 | * IPV6_LEAVE_ANYCAST, IPV6_MULTICAST_ALL, MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP, | |
533 | * MCAST_JOIN_SOURCE_GROUP, MCAST_LEAVE_SOURCE_GROUP, | |
534 | * MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, MCAST_MSFILTER | |
535 | * are not supported better not deal with mcast | |
536 | */ | |
537 | /* IPV6_ROUTER_ALERT, IPV6_ROUTER_ALERT_ISOLATE are not supported, since are evil */ | |
538 | ||
539 | /* IPV6_IPSEC_POLICY, IPV6_XFRM_POLICY are not supported */ | |
540 | /* IPV6_ADDR_PREFERENCES is not supported, we must be careful with subflows */ | |
541 | return false; | |
542 | } | |
543 | if (level == SOL_TCP) { | |
544 | switch (optname) { | |
545 | /* the following are no-op or should work just fine */ | |
546 | case TCP_THIN_DUPACK: | |
547 | case TCP_DEFER_ACCEPT: | |
548 | ||
549 | /* the following need some love */ | |
550 | case TCP_MAXSEG: | |
551 | case TCP_NODELAY: | |
552 | case TCP_THIN_LINEAR_TIMEOUTS: | |
553 | case TCP_CONGESTION: | |
d9e4c129 PA |
554 | case TCP_CORK: |
555 | case TCP_KEEPIDLE: | |
556 | case TCP_KEEPINTVL: | |
557 | case TCP_KEEPCNT: | |
558 | case TCP_SYNCNT: | |
559 | case TCP_SAVE_SYN: | |
560 | case TCP_LINGER2: | |
561 | case TCP_WINDOW_CLAMP: | |
562 | case TCP_QUICKACK: | |
563 | case TCP_USER_TIMEOUT: | |
564 | case TCP_TIMESTAMP: | |
565 | case TCP_NOTSENT_LOWAT: | |
566 | case TCP_TX_DELAY: | |
2c9e7765 | 567 | case TCP_INQ: |
4ffb0a02 | 568 | case TCP_FASTOPEN: |
54635bd0 | 569 | case TCP_FASTOPEN_CONNECT: |
cb99816c | 570 | case TCP_FASTOPEN_KEY: |
e64d4deb | 571 | case TCP_FASTOPEN_NO_COOKIE: |
d9e4c129 PA |
572 | return true; |
573 | } | |
574 | ||
575 | /* TCP_MD5SIG, TCP_MD5SIG_EXT are not supported, MD5 is not compatible with MPTCP */ | |
576 | ||
577 | /* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS, | |
578 | * TCP_REPAIR_WINDOW are not supported, better avoid this mess | |
579 | */ | |
d9e4c129 PA |
580 | } |
581 | return false; | |
582 | } | |
583 | ||
aa1fbd94 FW |
584 | static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t optval, |
585 | unsigned int optlen) | |
586 | { | |
587 | struct mptcp_subflow_context *subflow; | |
588 | struct sock *sk = (struct sock *)msk; | |
589 | char name[TCP_CA_NAME_MAX]; | |
590 | bool cap_net_admin; | |
591 | int ret; | |
592 | ||
593 | if (optlen < 1) | |
594 | return -EINVAL; | |
595 | ||
596 | ret = strncpy_from_sockptr(name, optval, | |
597 | min_t(long, TCP_CA_NAME_MAX - 1, optlen)); | |
598 | if (ret < 0) | |
599 | return -EFAULT; | |
600 | ||
601 | name[ret] = 0; | |
602 | ||
603 | cap_net_admin = ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN); | |
604 | ||
605 | ret = 0; | |
606 | lock_sock(sk); | |
607 | sockopt_seq_inc(msk); | |
608 | mptcp_for_each_subflow(msk, subflow) { | |
609 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
610 | int err; | |
611 | ||
612 | lock_sock(ssk); | |
613 | err = tcp_set_congestion_control(ssk, name, true, cap_net_admin); | |
614 | if (err < 0 && ret == 0) | |
615 | ret = err; | |
616 | subflow->setsockopt_seq = msk->setsockopt_seq; | |
617 | release_sock(ssk); | |
618 | } | |
619 | ||
620 | if (ret == 0) | |
20b5759f | 621 | strcpy(msk->ca_name, name); |
aa1fbd94 FW |
622 | |
623 | release_sock(sk); | |
624 | return ret; | |
625 | } | |
626 | ||
4f6e14bd MG |
627 | static int mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, sockptr_t optval, |
628 | unsigned int optlen) | |
629 | { | |
630 | struct mptcp_subflow_context *subflow; | |
631 | struct sock *sk = (struct sock *)msk; | |
632 | int val; | |
633 | ||
634 | if (optlen < sizeof(int)) | |
635 | return -EINVAL; | |
636 | ||
637 | if (copy_from_sockptr(&val, optval, sizeof(val))) | |
638 | return -EFAULT; | |
639 | ||
640 | lock_sock(sk); | |
641 | sockopt_seq_inc(msk); | |
642 | msk->cork = !!val; | |
643 | mptcp_for_each_subflow(msk, subflow) { | |
644 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
645 | ||
646 | lock_sock(ssk); | |
647 | __tcp_sock_set_cork(ssk, !!val); | |
648 | release_sock(ssk); | |
649 | } | |
650 | if (!val) | |
651 | mptcp_check_and_set_pending(sk); | |
652 | release_sock(sk); | |
653 | ||
654 | return 0; | |
655 | } | |
656 | ||
657 | static int mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, sockptr_t optval, | |
658 | unsigned int optlen) | |
659 | { | |
660 | struct mptcp_subflow_context *subflow; | |
661 | struct sock *sk = (struct sock *)msk; | |
662 | int val; | |
663 | ||
664 | if (optlen < sizeof(int)) | |
665 | return -EINVAL; | |
666 | ||
667 | if (copy_from_sockptr(&val, optval, sizeof(val))) | |
668 | return -EFAULT; | |
669 | ||
670 | lock_sock(sk); | |
671 | sockopt_seq_inc(msk); | |
672 | msk->nodelay = !!val; | |
673 | mptcp_for_each_subflow(msk, subflow) { | |
674 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
675 | ||
676 | lock_sock(ssk); | |
677 | __tcp_sock_set_nodelay(ssk, !!val); | |
678 | release_sock(ssk); | |
679 | } | |
680 | if (val) | |
681 | mptcp_check_and_set_pending(sk); | |
682 | release_sock(sk); | |
683 | ||
684 | return 0; | |
685 | } | |
686 | ||
c9406a23 FW |
687 | static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int optname, |
688 | sockptr_t optval, unsigned int optlen) | |
689 | { | |
690 | struct sock *sk = (struct sock *)msk; | |
691 | struct inet_sock *issk; | |
692 | struct socket *ssock; | |
693 | int err; | |
694 | ||
695 | err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); | |
696 | if (err != 0) | |
697 | return err; | |
698 | ||
699 | lock_sock(sk); | |
700 | ||
701 | ssock = __mptcp_nmpc_socket(msk); | |
ddb1a072 | 702 | if (IS_ERR(ssock)) { |
c9406a23 | 703 | release_sock(sk); |
ddb1a072 | 704 | return PTR_ERR(ssock); |
c9406a23 FW |
705 | } |
706 | ||
f0bc514b | 707 | issk = inet_sk(msk->first); |
c9406a23 FW |
708 | |
709 | switch (optname) { | |
710 | case IP_FREEBIND: | |
711 | issk->freebind = inet_sk(sk)->freebind; | |
712 | break; | |
713 | case IP_TRANSPARENT: | |
714 | issk->transparent = inet_sk(sk)->transparent; | |
715 | break; | |
716 | default: | |
717 | release_sock(sk); | |
718 | WARN_ON_ONCE(1); | |
719 | return -EOPNOTSUPP; | |
720 | } | |
721 | ||
722 | sockopt_seq_inc(msk); | |
723 | release_sock(sk); | |
724 | return 0; | |
725 | } | |
726 | ||
ffcacff8 PS |
727 | static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname, |
728 | sockptr_t optval, unsigned int optlen) | |
729 | { | |
730 | struct mptcp_subflow_context *subflow; | |
731 | struct sock *sk = (struct sock *)msk; | |
732 | int err, val; | |
733 | ||
734 | err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); | |
735 | ||
736 | if (err != 0) | |
737 | return err; | |
738 | ||
739 | lock_sock(sk); | |
740 | sockopt_seq_inc(msk); | |
741 | val = inet_sk(sk)->tos; | |
742 | mptcp_for_each_subflow(msk, subflow) { | |
743 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
744 | ||
745 | __ip_sock_set_tos(ssk, val); | |
746 | } | |
747 | release_sock(sk); | |
748 | ||
03e7d28c | 749 | return 0; |
ffcacff8 PS |
750 | } |
751 | ||
752 | static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname, | |
753 | sockptr_t optval, unsigned int optlen) | |
754 | { | |
755 | switch (optname) { | |
c9406a23 FW |
756 | case IP_FREEBIND: |
757 | case IP_TRANSPARENT: | |
758 | return mptcp_setsockopt_sol_ip_set_transparent(msk, optname, optval, optlen); | |
ffcacff8 PS |
759 | case IP_TOS: |
760 | return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen); | |
761 | } | |
762 | ||
763 | return -EOPNOTSUPP; | |
764 | } | |
765 | ||
d3d42904 MB |
766 | static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, |
767 | sockptr_t optval, unsigned int optlen) | |
54635bd0 | 768 | { |
21e43569 | 769 | struct sock *sk = (struct sock *)msk; |
54635bd0 | 770 | struct socket *sock; |
ddb1a072 | 771 | int ret; |
54635bd0 | 772 | |
d3d42904 | 773 | /* Limit to first subflow, before the connection establishment */ |
21e43569 | 774 | lock_sock(sk); |
54635bd0 | 775 | sock = __mptcp_nmpc_socket(msk); |
ddb1a072 PA |
776 | if (IS_ERR(sock)) { |
777 | ret = PTR_ERR(sock); | |
21e43569 | 778 | goto unlock; |
ddb1a072 | 779 | } |
54635bd0 | 780 | |
21e43569 PA |
781 | ret = tcp_setsockopt(sock->sk, level, optname, optval, optlen); |
782 | ||
783 | unlock: | |
784 | release_sock(sk); | |
785 | return ret; | |
54635bd0 BH |
786 | } |
787 | ||
aa1fbd94 FW |
788 | static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, |
789 | sockptr_t optval, unsigned int optlen) | |
790 | { | |
2c9e7765 FW |
791 | struct sock *sk = (void *)msk; |
792 | int ret, val; | |
793 | ||
aa1fbd94 | 794 | switch (optname) { |
2c9e7765 FW |
795 | case TCP_INQ: |
796 | ret = mptcp_get_int_option(msk, optval, optlen, &val); | |
797 | if (ret) | |
798 | return ret; | |
799 | if (val < 0 || val > 1) | |
800 | return -EINVAL; | |
801 | ||
802 | lock_sock(sk); | |
803 | msk->recvmsg_inq = !!val; | |
804 | release_sock(sk); | |
805 | return 0; | |
aa1fbd94 FW |
806 | case TCP_ULP: |
807 | return -EOPNOTSUPP; | |
808 | case TCP_CONGESTION: | |
809 | return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen); | |
4f6e14bd MG |
810 | case TCP_CORK: |
811 | return mptcp_setsockopt_sol_tcp_cork(msk, optval, optlen); | |
812 | case TCP_NODELAY: | |
813 | return mptcp_setsockopt_sol_tcp_nodelay(msk, optval, optlen); | |
ea1e301d | 814 | case TCP_DEFER_ACCEPT: |
caea6467 MB |
815 | /* See tcp.c: TCP_DEFER_ACCEPT does not fail */ |
816 | mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen); | |
817 | return 0; | |
4ffb0a02 | 818 | case TCP_FASTOPEN: |
54635bd0 | 819 | case TCP_FASTOPEN_CONNECT: |
cb99816c | 820 | case TCP_FASTOPEN_KEY: |
e64d4deb | 821 | case TCP_FASTOPEN_NO_COOKIE: |
d3d42904 MB |
822 | return mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, |
823 | optval, optlen); | |
aa1fbd94 FW |
824 | } |
825 | ||
826 | return -EOPNOTSUPP; | |
827 | } | |
828 | ||
0abdde82 PA |
829 | int mptcp_setsockopt(struct sock *sk, int level, int optname, |
830 | sockptr_t optval, unsigned int optlen) | |
831 | { | |
832 | struct mptcp_sock *msk = mptcp_sk(sk); | |
833 | struct sock *ssk; | |
834 | ||
835 | pr_debug("msk=%p", msk); | |
836 | ||
837 | if (level == SOL_SOCKET) | |
838 | return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen); | |
839 | ||
7a009a70 FW |
840 | if (!mptcp_supported_sockopt(level, optname)) |
841 | return -ENOPROTOOPT; | |
842 | ||
0abdde82 PA |
843 | /* @@ the meaning of setsockopt() when the socket is connected and |
844 | * there are multiple subflows is not yet defined. It is up to the | |
845 | * MPTCP-level socket to configure the subflows until the subflow | |
846 | * is in TCP fallback, when TCP socket options are passed through | |
847 | * to the one remaining subflow. | |
848 | */ | |
849 | lock_sock(sk); | |
850 | ssk = __mptcp_tcp_fallback(msk); | |
851 | release_sock(sk); | |
852 | if (ssk) | |
853 | return tcp_setsockopt(ssk, level, optname, optval, optlen); | |
854 | ||
ffcacff8 PS |
855 | if (level == SOL_IP) |
856 | return mptcp_setsockopt_v4(msk, optname, optval, optlen); | |
857 | ||
0abdde82 PA |
858 | if (level == SOL_IPV6) |
859 | return mptcp_setsockopt_v6(msk, optname, optval, optlen); | |
860 | ||
aa1fbd94 FW |
861 | if (level == SOL_TCP) |
862 | return mptcp_setsockopt_sol_tcp(msk, optname, optval, optlen); | |
863 | ||
864 | return -EOPNOTSUPP; | |
865 | } | |
866 | ||
867 | static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, | |
868 | char __user *optval, int __user *optlen) | |
869 | { | |
870 | struct sock *sk = (struct sock *)msk; | |
871 | struct socket *ssock; | |
aa1fbd94 | 872 | struct sock *ssk; |
f0bc514b | 873 | int ret; |
aa1fbd94 FW |
874 | |
875 | lock_sock(sk); | |
876 | ssk = msk->first; | |
877 | if (ssk) { | |
878 | ret = tcp_getsockopt(ssk, level, optname, optval, optlen); | |
879 | goto out; | |
880 | } | |
881 | ||
882 | ssock = __mptcp_nmpc_socket(msk); | |
ddb1a072 PA |
883 | if (IS_ERR(ssock)) { |
884 | ret = PTR_ERR(ssock); | |
aa1fbd94 | 885 | goto out; |
ddb1a072 | 886 | } |
aa1fbd94 | 887 | |
f0bc514b | 888 | ret = tcp_getsockopt(ssk, level, optname, optval, optlen); |
aa1fbd94 FW |
889 | |
890 | out: | |
891 | release_sock(sk); | |
892 | return ret; | |
893 | } | |
894 | ||
61bc6e82 FW |
895 | void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) |
896 | { | |
38967f42 | 897 | struct sock *sk = (struct sock *)msk; |
61bc6e82 | 898 | u32 flags = 0; |
38967f42 | 899 | bool slow; |
61bc6e82 | 900 | |
55c42fa7 FW |
901 | memset(info, 0, sizeof(*info)); |
902 | ||
61bc6e82 FW |
903 | info->mptcpi_subflows = READ_ONCE(msk->pm.subflows); |
904 | info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled); | |
905 | info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted); | |
906 | info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used); | |
e925a032 | 907 | |
38967f42 PA |
908 | if (inet_sk_state_load(sk) == TCP_LISTEN) |
909 | return; | |
910 | ||
e925a032 MB |
911 | /* The following limits only make sense for the in-kernel PM */ |
912 | if (mptcp_pm_is_kernel(msk)) { | |
913 | info->mptcpi_subflows_max = | |
914 | mptcp_pm_get_subflows_max(msk); | |
915 | info->mptcpi_add_addr_signal_max = | |
916 | mptcp_pm_get_add_addr_signal_max(msk); | |
917 | info->mptcpi_add_addr_accepted_max = | |
918 | mptcp_pm_get_add_addr_accept_max(msk); | |
919 | info->mptcpi_local_addr_max = | |
920 | mptcp_pm_get_local_addr_max(msk); | |
921 | } | |
922 | ||
61bc6e82 FW |
923 | if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags)) |
924 | flags |= MPTCP_INFO_FLAG_FALLBACK; | |
925 | if (READ_ONCE(msk->can_ack)) | |
926 | flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED; | |
927 | info->mptcpi_flags = flags; | |
38967f42 PA |
928 | mptcp_data_lock(sk); |
929 | info->mptcpi_snd_una = msk->snd_una; | |
930 | info->mptcpi_rcv_nxt = msk->ack_seq; | |
931 | info->mptcpi_bytes_acked = msk->bytes_acked; | |
932 | mptcp_data_unlock(sk); | |
933 | ||
934 | slow = lock_sock_fast(sk); | |
935 | info->mptcpi_csum_enabled = msk->csum_enabled; | |
936 | info->mptcpi_token = msk->token; | |
937 | info->mptcpi_write_seq = msk->write_seq; | |
938 | info->mptcpi_retransmits = inet_csk(sk)->icsk_retransmits; | |
939 | info->mptcpi_bytes_sent = msk->bytes_sent; | |
940 | info->mptcpi_bytes_received = msk->bytes_received; | |
941 | info->mptcpi_bytes_retrans = msk->bytes_retrans; | |
942 | unlock_sock_fast(sk, slow); | |
61bc6e82 FW |
943 | } |
944 | EXPORT_SYMBOL_GPL(mptcp_diag_fill_info); | |
945 | ||
55c42fa7 FW |
946 | static int mptcp_getsockopt_info(struct mptcp_sock *msk, char __user *optval, int __user *optlen) |
947 | { | |
948 | struct mptcp_info m_info; | |
949 | int len; | |
950 | ||
951 | if (get_user(len, optlen)) | |
952 | return -EFAULT; | |
953 | ||
954 | len = min_t(unsigned int, len, sizeof(struct mptcp_info)); | |
955 | ||
956 | mptcp_diag_fill_info(msk, &m_info); | |
957 | ||
958 | if (put_user(len, optlen)) | |
959 | return -EFAULT; | |
960 | ||
961 | if (copy_to_user(optval, &m_info, len)) | |
962 | return -EFAULT; | |
963 | ||
964 | return 0; | |
965 | } | |
966 | ||
06f15cee FW |
967 | static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd, |
968 | char __user *optval, | |
969 | u32 copied, | |
970 | int __user *optlen) | |
971 | { | |
972 | u32 copylen = min_t(u32, sfd->size_subflow_data, sizeof(*sfd)); | |
973 | ||
974 | if (copied) | |
975 | copied += sfd->size_subflow_data; | |
976 | else | |
977 | copied = copylen; | |
978 | ||
979 | if (put_user(copied, optlen)) | |
980 | return -EFAULT; | |
981 | ||
982 | if (copy_to_user(optval, sfd, copylen)) | |
983 | return -EFAULT; | |
984 | ||
985 | return 0; | |
986 | } | |
987 | ||
988 | static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd, | |
49243207 PA |
989 | char __user *optval, |
990 | int __user *optlen) | |
06f15cee FW |
991 | { |
992 | int len, copylen; | |
993 | ||
994 | if (get_user(len, optlen)) | |
995 | return -EFAULT; | |
996 | ||
997 | /* if mptcp_subflow_data size is changed, need to adjust | |
998 | * this function to deal with programs using old version. | |
999 | */ | |
1000 | BUILD_BUG_ON(sizeof(*sfd) != MIN_INFO_OPTLEN_SIZE); | |
1001 | ||
1002 | if (len < MIN_INFO_OPTLEN_SIZE) | |
1003 | return -EINVAL; | |
1004 | ||
1005 | memset(sfd, 0, sizeof(*sfd)); | |
1006 | ||
1007 | copylen = min_t(unsigned int, len, sizeof(*sfd)); | |
1008 | if (copy_from_user(sfd, optval, copylen)) | |
1009 | return -EFAULT; | |
1010 | ||
1011 | /* size_subflow_data is u32, but len is signed */ | |
1012 | if (sfd->size_subflow_data > INT_MAX || | |
1013 | sfd->size_user > INT_MAX) | |
1014 | return -EINVAL; | |
1015 | ||
1016 | if (sfd->size_subflow_data < MIN_INFO_OPTLEN_SIZE || | |
1017 | sfd->size_subflow_data > len) | |
1018 | return -EINVAL; | |
1019 | ||
1020 | if (sfd->num_subflows || sfd->size_kernel) | |
1021 | return -EINVAL; | |
1022 | ||
1023 | return len - sfd->size_subflow_data; | |
1024 | } | |
1025 | ||
1026 | static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval, | |
1027 | int __user *optlen) | |
1028 | { | |
1029 | struct mptcp_subflow_context *subflow; | |
80638684 | 1030 | struct sock *sk = (struct sock *)msk; |
06f15cee FW |
1031 | unsigned int sfcount = 0, copied = 0; |
1032 | struct mptcp_subflow_data sfd; | |
1033 | char __user *infoptr; | |
1034 | int len; | |
1035 | ||
1036 | len = mptcp_get_subflow_data(&sfd, optval, optlen); | |
1037 | if (len < 0) | |
1038 | return len; | |
1039 | ||
1040 | sfd.size_kernel = sizeof(struct tcp_info); | |
1041 | sfd.size_user = min_t(unsigned int, sfd.size_user, | |
1042 | sizeof(struct tcp_info)); | |
1043 | ||
1044 | infoptr = optval + sfd.size_subflow_data; | |
1045 | ||
1046 | lock_sock(sk); | |
1047 | ||
1048 | mptcp_for_each_subflow(msk, subflow) { | |
1049 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
1050 | ||
1051 | ++sfcount; | |
1052 | ||
1053 | if (len && len >= sfd.size_user) { | |
1054 | struct tcp_info info; | |
1055 | ||
1056 | tcp_get_info(ssk, &info); | |
1057 | ||
1058 | if (copy_to_user(infoptr, &info, sfd.size_user)) { | |
1059 | release_sock(sk); | |
1060 | return -EFAULT; | |
1061 | } | |
1062 | ||
1063 | infoptr += sfd.size_user; | |
1064 | copied += sfd.size_user; | |
1065 | len -= sfd.size_user; | |
1066 | } | |
1067 | } | |
1068 | ||
1069 | release_sock(sk); | |
1070 | ||
1071 | sfd.num_subflows = sfcount; | |
1072 | ||
1073 | if (mptcp_put_subflow_data(&sfd, optval, copied, optlen)) | |
1074 | return -EFAULT; | |
1075 | ||
1076 | return 0; | |
1077 | } | |
1078 | ||
c11c5906 FW |
1079 | static void mptcp_get_sub_addrs(const struct sock *sk, struct mptcp_subflow_addrs *a) |
1080 | { | |
abc17a11 | 1081 | const struct inet_sock *inet = inet_sk(sk); |
c11c5906 FW |
1082 | |
1083 | memset(a, 0, sizeof(*a)); | |
1084 | ||
1085 | if (sk->sk_family == AF_INET) { | |
1086 | a->sin_local.sin_family = AF_INET; | |
1087 | a->sin_local.sin_port = inet->inet_sport; | |
1088 | a->sin_local.sin_addr.s_addr = inet->inet_rcv_saddr; | |
1089 | ||
1090 | if (!a->sin_local.sin_addr.s_addr) | |
1091 | a->sin_local.sin_addr.s_addr = inet->inet_saddr; | |
1092 | ||
1093 | a->sin_remote.sin_family = AF_INET; | |
1094 | a->sin_remote.sin_port = inet->inet_dport; | |
1095 | a->sin_remote.sin_addr.s_addr = inet->inet_daddr; | |
1096 | #if IS_ENABLED(CONFIG_IPV6) | |
1097 | } else if (sk->sk_family == AF_INET6) { | |
1098 | const struct ipv6_pinfo *np = inet6_sk(sk); | |
1099 | ||
29211e7d TG |
1100 | if (WARN_ON_ONCE(!np)) |
1101 | return; | |
1102 | ||
c11c5906 FW |
1103 | a->sin6_local.sin6_family = AF_INET6; |
1104 | a->sin6_local.sin6_port = inet->inet_sport; | |
1105 | ||
1106 | if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) | |
1107 | a->sin6_local.sin6_addr = np->saddr; | |
1108 | else | |
1109 | a->sin6_local.sin6_addr = sk->sk_v6_rcv_saddr; | |
1110 | ||
1111 | a->sin6_remote.sin6_family = AF_INET6; | |
1112 | a->sin6_remote.sin6_port = inet->inet_dport; | |
1113 | a->sin6_remote.sin6_addr = sk->sk_v6_daddr; | |
1114 | #endif | |
1115 | } | |
1116 | } | |
1117 | ||
1118 | static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *optval, | |
1119 | int __user *optlen) | |
1120 | { | |
c11c5906 | 1121 | struct mptcp_subflow_context *subflow; |
80638684 | 1122 | struct sock *sk = (struct sock *)msk; |
c11c5906 FW |
1123 | unsigned int sfcount = 0, copied = 0; |
1124 | struct mptcp_subflow_data sfd; | |
1125 | char __user *addrptr; | |
1126 | int len; | |
1127 | ||
1128 | len = mptcp_get_subflow_data(&sfd, optval, optlen); | |
1129 | if (len < 0) | |
1130 | return len; | |
1131 | ||
1132 | sfd.size_kernel = sizeof(struct mptcp_subflow_addrs); | |
1133 | sfd.size_user = min_t(unsigned int, sfd.size_user, | |
1134 | sizeof(struct mptcp_subflow_addrs)); | |
1135 | ||
1136 | addrptr = optval + sfd.size_subflow_data; | |
1137 | ||
1138 | lock_sock(sk); | |
1139 | ||
1140 | mptcp_for_each_subflow(msk, subflow) { | |
1141 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
1142 | ||
1143 | ++sfcount; | |
1144 | ||
1145 | if (len && len >= sfd.size_user) { | |
1146 | struct mptcp_subflow_addrs a; | |
1147 | ||
1148 | mptcp_get_sub_addrs(ssk, &a); | |
1149 | ||
1150 | if (copy_to_user(addrptr, &a, sfd.size_user)) { | |
1151 | release_sock(sk); | |
1152 | return -EFAULT; | |
1153 | } | |
1154 | ||
1155 | addrptr += sfd.size_user; | |
1156 | copied += sfd.size_user; | |
1157 | len -= sfd.size_user; | |
1158 | } | |
1159 | } | |
1160 | ||
1161 | release_sock(sk); | |
1162 | ||
1163 | sfd.num_subflows = sfcount; | |
1164 | ||
1165 | if (mptcp_put_subflow_data(&sfd, optval, copied, optlen)) | |
1166 | return -EFAULT; | |
1167 | ||
1168 | return 0; | |
1169 | } | |
1170 | ||
49243207 PA |
1171 | static int mptcp_get_full_info(struct mptcp_full_info *mfi, |
1172 | char __user *optval, | |
1173 | int __user *optlen) | |
1174 | { | |
1175 | int len; | |
1176 | ||
1177 | BUILD_BUG_ON(offsetof(struct mptcp_full_info, mptcp_info) != | |
1178 | MIN_FULL_INFO_OPTLEN_SIZE); | |
1179 | ||
1180 | if (get_user(len, optlen)) | |
1181 | return -EFAULT; | |
1182 | ||
1183 | if (len < MIN_FULL_INFO_OPTLEN_SIZE) | |
1184 | return -EINVAL; | |
1185 | ||
1186 | memset(mfi, 0, sizeof(*mfi)); | |
1187 | if (copy_from_user(mfi, optval, MIN_FULL_INFO_OPTLEN_SIZE)) | |
1188 | return -EFAULT; | |
1189 | ||
1190 | if (mfi->size_tcpinfo_kernel || | |
1191 | mfi->size_sfinfo_kernel || | |
1192 | mfi->num_subflows) | |
1193 | return -EINVAL; | |
1194 | ||
1195 | if (mfi->size_sfinfo_user > INT_MAX || | |
1196 | mfi->size_tcpinfo_user > INT_MAX) | |
1197 | return -EINVAL; | |
1198 | ||
1199 | return len - MIN_FULL_INFO_OPTLEN_SIZE; | |
1200 | } | |
1201 | ||
1202 | static int mptcp_put_full_info(struct mptcp_full_info *mfi, | |
1203 | char __user *optval, | |
1204 | u32 copylen, | |
1205 | int __user *optlen) | |
1206 | { | |
1207 | copylen += MIN_FULL_INFO_OPTLEN_SIZE; | |
1208 | if (put_user(copylen, optlen)) | |
1209 | return -EFAULT; | |
1210 | ||
1211 | if (copy_to_user(optval, mfi, copylen)) | |
1212 | return -EFAULT; | |
1213 | return 0; | |
1214 | } | |
1215 | ||
1216 | static int mptcp_getsockopt_full_info(struct mptcp_sock *msk, char __user *optval, | |
1217 | int __user *optlen) | |
1218 | { | |
1219 | unsigned int sfcount = 0, copylen = 0; | |
1220 | struct mptcp_subflow_context *subflow; | |
1221 | struct sock *sk = (struct sock *)msk; | |
1222 | void __user *tcpinfoptr, *sfinfoptr; | |
1223 | struct mptcp_full_info mfi; | |
1224 | int len; | |
1225 | ||
1226 | len = mptcp_get_full_info(&mfi, optval, optlen); | |
1227 | if (len < 0) | |
1228 | return len; | |
1229 | ||
1230 | /* don't bother filling the mptcp info if there is not enough | |
1231 | * user-space-provided storage | |
1232 | */ | |
1233 | if (len > 0) { | |
1234 | mptcp_diag_fill_info(msk, &mfi.mptcp_info); | |
1235 | copylen += min_t(unsigned int, len, sizeof(struct mptcp_info)); | |
1236 | } | |
1237 | ||
1238 | mfi.size_tcpinfo_kernel = sizeof(struct tcp_info); | |
1239 | mfi.size_tcpinfo_user = min_t(unsigned int, mfi.size_tcpinfo_user, | |
1240 | sizeof(struct tcp_info)); | |
1241 | sfinfoptr = u64_to_user_ptr(mfi.subflow_info); | |
1242 | mfi.size_sfinfo_kernel = sizeof(struct mptcp_subflow_info); | |
1243 | mfi.size_sfinfo_user = min_t(unsigned int, mfi.size_sfinfo_user, | |
1244 | sizeof(struct mptcp_subflow_info)); | |
1245 | tcpinfoptr = u64_to_user_ptr(mfi.tcp_info); | |
1246 | ||
1247 | lock_sock(sk); | |
1248 | mptcp_for_each_subflow(msk, subflow) { | |
1249 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
1250 | struct mptcp_subflow_info sfinfo; | |
1251 | struct tcp_info tcp_info; | |
1252 | ||
1253 | if (sfcount++ >= mfi.size_arrays_user) | |
1254 | continue; | |
1255 | ||
1256 | /* fetch addr/tcp_info only if the user space buffers | |
1257 | * are wide enough | |
1258 | */ | |
1259 | memset(&sfinfo, 0, sizeof(sfinfo)); | |
1260 | sfinfo.id = subflow->subflow_id; | |
1261 | if (mfi.size_sfinfo_user > | |
1262 | offsetof(struct mptcp_subflow_info, addrs)) | |
1263 | mptcp_get_sub_addrs(ssk, &sfinfo.addrs); | |
1264 | if (copy_to_user(sfinfoptr, &sfinfo, mfi.size_sfinfo_user)) | |
1265 | goto fail_release; | |
1266 | ||
1267 | if (mfi.size_tcpinfo_user) { | |
1268 | tcp_get_info(ssk, &tcp_info); | |
1269 | if (copy_to_user(tcpinfoptr, &tcp_info, | |
1270 | mfi.size_tcpinfo_user)) | |
1271 | goto fail_release; | |
1272 | } | |
1273 | ||
1274 | tcpinfoptr += mfi.size_tcpinfo_user; | |
1275 | sfinfoptr += mfi.size_sfinfo_user; | |
1276 | } | |
1277 | release_sock(sk); | |
1278 | ||
1279 | mfi.num_subflows = sfcount; | |
1280 | if (mptcp_put_full_info(&mfi, optval, copylen, optlen)) | |
1281 | return -EFAULT; | |
1282 | ||
1283 | return 0; | |
1284 | ||
1285 | fail_release: | |
1286 | release_sock(sk); | |
1287 | return -EFAULT; | |
1288 | } | |
1289 | ||
2c9e7765 FW |
1290 | static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval, |
1291 | int __user *optlen, int val) | |
1292 | { | |
1293 | int len; | |
1294 | ||
1295 | if (get_user(len, optlen)) | |
1296 | return -EFAULT; | |
2c9e7765 FW |
1297 | if (len < 0) |
1298 | return -EINVAL; | |
1299 | ||
3b1e21eb FW |
1300 | if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) { |
1301 | unsigned char ucval = (unsigned char)val; | |
1302 | ||
1303 | len = 1; | |
1304 | if (put_user(len, optlen)) | |
1305 | return -EFAULT; | |
1306 | if (copy_to_user(optval, &ucval, 1)) | |
1307 | return -EFAULT; | |
1308 | } else { | |
1309 | len = min_t(unsigned int, len, sizeof(int)); | |
1310 | if (put_user(len, optlen)) | |
1311 | return -EFAULT; | |
1312 | if (copy_to_user(optval, &val, len)) | |
1313 | return -EFAULT; | |
1314 | } | |
2c9e7765 FW |
1315 | |
1316 | return 0; | |
1317 | } | |
1318 | ||
aa1fbd94 FW |
1319 | static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, |
1320 | char __user *optval, int __user *optlen) | |
1321 | { | |
1322 | switch (optname) { | |
1323 | case TCP_ULP: | |
1324 | case TCP_CONGESTION: | |
1325 | case TCP_INFO: | |
1326 | case TCP_CC_INFO: | |
ea1e301d | 1327 | case TCP_DEFER_ACCEPT: |
4ffb0a02 | 1328 | case TCP_FASTOPEN: |
54635bd0 | 1329 | case TCP_FASTOPEN_CONNECT: |
cb99816c | 1330 | case TCP_FASTOPEN_KEY: |
e64d4deb | 1331 | case TCP_FASTOPEN_NO_COOKIE: |
aa1fbd94 FW |
1332 | return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname, |
1333 | optval, optlen); | |
2c9e7765 FW |
1334 | case TCP_INQ: |
1335 | return mptcp_put_int_option(msk, optval, optlen, msk->recvmsg_inq); | |
4f6e14bd MG |
1336 | case TCP_CORK: |
1337 | return mptcp_put_int_option(msk, optval, optlen, msk->cork); | |
1338 | case TCP_NODELAY: | |
1339 | return mptcp_put_int_option(msk, optval, optlen, msk->nodelay); | |
aa1fbd94 | 1340 | } |
0abdde82 PA |
1341 | return -EOPNOTSUPP; |
1342 | } | |
1343 | ||
3b1e21eb FW |
1344 | static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname, |
1345 | char __user *optval, int __user *optlen) | |
1346 | { | |
1347 | struct sock *sk = (void *)msk; | |
1348 | ||
1349 | switch (optname) { | |
1350 | case IP_TOS: | |
1351 | return mptcp_put_int_option(msk, optval, optlen, inet_sk(sk)->tos); | |
1352 | } | |
1353 | ||
1354 | return -EOPNOTSUPP; | |
1355 | } | |
1356 | ||
55c42fa7 FW |
1357 | static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname, |
1358 | char __user *optval, int __user *optlen) | |
1359 | { | |
1360 | switch (optname) { | |
1361 | case MPTCP_INFO: | |
1362 | return mptcp_getsockopt_info(msk, optval, optlen); | |
49243207 PA |
1363 | case MPTCP_FULL_INFO: |
1364 | return mptcp_getsockopt_full_info(msk, optval, optlen); | |
06f15cee FW |
1365 | case MPTCP_TCPINFO: |
1366 | return mptcp_getsockopt_tcpinfo(msk, optval, optlen); | |
c11c5906 FW |
1367 | case MPTCP_SUBFLOW_ADDRS: |
1368 | return mptcp_getsockopt_subflow_addrs(msk, optval, optlen); | |
55c42fa7 FW |
1369 | } |
1370 | ||
1371 | return -EOPNOTSUPP; | |
1372 | } | |
1373 | ||
0abdde82 PA |
1374 | int mptcp_getsockopt(struct sock *sk, int level, int optname, |
1375 | char __user *optval, int __user *option) | |
1376 | { | |
1377 | struct mptcp_sock *msk = mptcp_sk(sk); | |
1378 | struct sock *ssk; | |
1379 | ||
1380 | pr_debug("msk=%p", msk); | |
1381 | ||
1382 | /* @@ the meaning of setsockopt() when the socket is connected and | |
1383 | * there are multiple subflows is not yet defined. It is up to the | |
1384 | * MPTCP-level socket to configure the subflows until the subflow | |
1385 | * is in TCP fallback, when socket options are passed through | |
1386 | * to the one remaining subflow. | |
1387 | */ | |
1388 | lock_sock(sk); | |
1389 | ssk = __mptcp_tcp_fallback(msk); | |
1390 | release_sock(sk); | |
1391 | if (ssk) | |
1392 | return tcp_getsockopt(ssk, level, optname, optval, option); | |
1393 | ||
3b1e21eb FW |
1394 | if (level == SOL_IP) |
1395 | return mptcp_getsockopt_v4(msk, optname, optval, option); | |
aa1fbd94 FW |
1396 | if (level == SOL_TCP) |
1397 | return mptcp_getsockopt_sol_tcp(msk, optname, optval, option); | |
55c42fa7 FW |
1398 | if (level == SOL_MPTCP) |
1399 | return mptcp_getsockopt_sol_mptcp(msk, optname, optval, option); | |
0abdde82 PA |
1400 | return -EOPNOTSUPP; |
1401 | } | |
1402 | ||
1b3e7ede FW |
1403 | static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) |
1404 | { | |
5d0a6bc8 | 1405 | static const unsigned int tx_rx_locks = SOCK_RCVBUF_LOCK | SOCK_SNDBUF_LOCK; |
1b3e7ede FW |
1406 | struct sock *sk = (struct sock *)msk; |
1407 | ||
1408 | if (ssk->sk_prot->keepalive) { | |
1409 | if (sock_flag(sk, SOCK_KEEPOPEN)) | |
1410 | ssk->sk_prot->keepalive(ssk, 1); | |
1411 | else | |
1412 | ssk->sk_prot->keepalive(ssk, 0); | |
1413 | } | |
1414 | ||
1415 | ssk->sk_priority = sk->sk_priority; | |
5d0a6bc8 FW |
1416 | ssk->sk_bound_dev_if = sk->sk_bound_dev_if; |
1417 | ssk->sk_incoming_cpu = sk->sk_incoming_cpu; | |
7e9740e0 | 1418 | ssk->sk_ipv6only = sk->sk_ipv6only; |
ffcacff8 | 1419 | __ip_sock_set_tos(ssk, inet_sk(sk)->tos); |
5d0a6bc8 FW |
1420 | |
1421 | if (sk->sk_userlocks & tx_rx_locks) { | |
1422 | ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks; | |
1423 | if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) | |
1424 | WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); | |
1425 | if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) | |
1426 | WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); | |
1427 | } | |
1428 | ||
1429 | if (sock_flag(sk, SOCK_LINGER)) { | |
1430 | ssk->sk_lingertime = sk->sk_lingertime; | |
1431 | sock_set_flag(ssk, SOCK_LINGER); | |
1432 | } else { | |
1433 | sock_reset_flag(ssk, SOCK_LINGER); | |
1434 | } | |
1435 | ||
1436 | if (sk->sk_mark != ssk->sk_mark) { | |
1437 | ssk->sk_mark = sk->sk_mark; | |
1438 | sk_dst_reset(ssk); | |
1439 | } | |
1440 | ||
1441 | sock_valbool_flag(ssk, SOCK_DBG, sock_flag(sk, SOCK_DBG)); | |
1442 | ||
1443 | if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops) | |
20b5759f | 1444 | tcp_set_congestion_control(ssk, msk->ca_name, false, true); |
4f6e14bd MG |
1445 | __tcp_sock_set_cork(ssk, !!msk->cork); |
1446 | __tcp_sock_set_nodelay(ssk, !!msk->nodelay); | |
c9406a23 FW |
1447 | |
1448 | inet_sk(ssk)->transparent = inet_sk(sk)->transparent; | |
1449 | inet_sk(ssk)->freebind = inet_sk(sk)->freebind; | |
1b3e7ede FW |
1450 | } |
1451 | ||
df00b087 FW |
1452 | static void __mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk) |
1453 | { | |
1b3e7ede FW |
1454 | bool slow = lock_sock_fast(ssk); |
1455 | ||
1456 | sync_socket_options(msk, ssk); | |
1457 | ||
1458 | unlock_sock_fast(ssk, slow); | |
df00b087 FW |
1459 | } |
1460 | ||
78962489 FW |
1461 | void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk) |
1462 | { | |
df00b087 FW |
1463 | struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); |
1464 | ||
78962489 | 1465 | msk_owned_by_me(msk); |
df00b087 FW |
1466 | |
1467 | if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) { | |
1468 | __mptcp_sockopt_sync(msk, ssk); | |
1469 | ||
1470 | subflow->setsockopt_seq = msk->setsockopt_seq; | |
1471 | } | |
78962489 FW |
1472 | } |
1473 | ||
3e501490 | 1474 | void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk) |
78962489 | 1475 | { |
3e501490 | 1476 | struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); |
78962489 | 1477 | |
3e501490 | 1478 | msk_owned_by_me(msk); |
78962489 | 1479 | |
3e501490 PA |
1480 | if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) { |
1481 | sync_socket_options(msk, ssk); | |
78962489 | 1482 | |
3e501490 | 1483 | subflow->setsockopt_seq = msk->setsockopt_seq; |
78962489 FW |
1484 | } |
1485 | } |