Commit | Line | Data |
---|---|---|
0abdde82 PA |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Multipath TCP | |
3 | * | |
4 | * Copyright (c) 2021, Red Hat. | |
5 | */ | |
6 | ||
7 | #define pr_fmt(fmt) "MPTCP: " fmt | |
8 | ||
9 | #include <linux/kernel.h> | |
10 | #include <linux/module.h> | |
11 | #include <net/sock.h> | |
12 | #include <net/protocol.h> | |
13 | #include <net/tcp.h> | |
14 | #include <net/mptcp.h> | |
15 | #include "protocol.h" | |
16 | ||
17 | static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk) | |
18 | { | |
19 | sock_owned_by_me((const struct sock *)msk); | |
20 | ||
21 | if (likely(!__mptcp_check_fallback(msk))) | |
22 | return NULL; | |
23 | ||
24 | return msk->first; | |
25 | } | |
26 | ||
df00b087 FW |
27 | static u32 sockopt_seq_reset(const struct sock *sk) |
28 | { | |
29 | sock_owned_by_me(sk); | |
30 | ||
31 | /* Highbits contain state. Allows to distinguish sockopt_seq | |
32 | * of listener and established: | |
33 | * s0 = new_listener() | |
34 | * sockopt(s0) - seq is 1 | |
35 | * s1 = accept(s0) - s1 inherits seq 1 if listener sk (s0) | |
36 | * sockopt(s0) - seq increments to 2 on s0 | |
37 | * sockopt(s1) // seq increments to 2 on s1 (different option) | |
38 | * new ssk completes join, inherits options from s0 // seq 2 | |
39 | * Needs sync from mptcp join logic, but ssk->seq == msk->seq | |
40 | * | |
41 | * Set High order bits to sk_state so ssk->seq == msk->seq test | |
42 | * will fail. | |
43 | */ | |
44 | ||
45 | return (u32)sk->sk_state << 24u; | |
46 | } | |
47 | ||
1b3e7ede FW |
48 | static void sockopt_seq_inc(struct mptcp_sock *msk) |
49 | { | |
50 | u32 seq = (msk->setsockopt_seq + 1) & 0x00ffffff; | |
51 | ||
52 | msk->setsockopt_seq = sockopt_seq_reset((struct sock *)msk) + seq; | |
53 | } | |
54 | ||
55 | static int mptcp_get_int_option(struct mptcp_sock *msk, sockptr_t optval, | |
56 | unsigned int optlen, int *val) | |
57 | { | |
58 | if (optlen < sizeof(int)) | |
59 | return -EINVAL; | |
60 | ||
61 | if (copy_from_sockptr(val, optval, sizeof(*val))) | |
62 | return -EFAULT; | |
63 | ||
64 | return 0; | |
65 | } | |
66 | ||
67 | static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, int val) | |
68 | { | |
69 | struct mptcp_subflow_context *subflow; | |
70 | struct sock *sk = (struct sock *)msk; | |
71 | ||
72 | lock_sock(sk); | |
73 | sockopt_seq_inc(msk); | |
74 | ||
75 | mptcp_for_each_subflow(msk, subflow) { | |
76 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
77 | bool slow = lock_sock_fast(ssk); | |
78 | ||
79 | switch (optname) { | |
a03c99b2 FW |
80 | case SO_DEBUG: |
81 | sock_valbool_flag(ssk, SOCK_DBG, !!val); | |
82 | break; | |
1b3e7ede FW |
83 | case SO_KEEPALIVE: |
84 | if (ssk->sk_prot->keepalive) | |
85 | ssk->sk_prot->keepalive(ssk, !!val); | |
86 | sock_valbool_flag(ssk, SOCK_KEEPOPEN, !!val); | |
87 | break; | |
88 | case SO_PRIORITY: | |
89 | ssk->sk_priority = val; | |
90 | break; | |
5d0a6bc8 FW |
91 | case SO_SNDBUF: |
92 | case SO_SNDBUFFORCE: | |
93 | ssk->sk_userlocks |= SOCK_SNDBUF_LOCK; | |
94 | WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); | |
95 | break; | |
96 | case SO_RCVBUF: | |
97 | case SO_RCVBUFFORCE: | |
98 | ssk->sk_userlocks |= SOCK_RCVBUF_LOCK; | |
99 | WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); | |
100 | break; | |
36704413 FW |
101 | case SO_MARK: |
102 | if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) { | |
103 | ssk->sk_mark = sk->sk_mark; | |
104 | sk_dst_reset(ssk); | |
105 | } | |
106 | break; | |
6f0d7198 FW |
107 | case SO_INCOMING_CPU: |
108 | WRITE_ONCE(ssk->sk_incoming_cpu, val); | |
109 | break; | |
1b3e7ede FW |
110 | } |
111 | ||
112 | subflow->setsockopt_seq = msk->setsockopt_seq; | |
113 | unlock_sock_fast(ssk, slow); | |
114 | } | |
115 | ||
116 | release_sock(sk); | |
117 | } | |
118 | ||
119 | static int mptcp_sol_socket_intval(struct mptcp_sock *msk, int optname, int val) | |
120 | { | |
121 | sockptr_t optval = KERNEL_SOCKPTR(&val); | |
122 | struct sock *sk = (struct sock *)msk; | |
123 | int ret; | |
124 | ||
125 | ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, | |
126 | optval, sizeof(val)); | |
127 | if (ret) | |
128 | return ret; | |
129 | ||
130 | mptcp_sol_socket_sync_intval(msk, optname, val); | |
131 | return 0; | |
132 | } | |
133 | ||
6f0d7198 FW |
134 | static void mptcp_so_incoming_cpu(struct mptcp_sock *msk, int val) |
135 | { | |
136 | struct sock *sk = (struct sock *)msk; | |
137 | ||
138 | WRITE_ONCE(sk->sk_incoming_cpu, val); | |
139 | ||
140 | mptcp_sol_socket_sync_intval(msk, SO_INCOMING_CPU, val); | |
141 | } | |
142 | ||
1b3e7ede FW |
143 | static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname, |
144 | sockptr_t optval, unsigned int optlen) | |
145 | { | |
146 | int val, ret; | |
147 | ||
148 | ret = mptcp_get_int_option(msk, optval, optlen, &val); | |
149 | if (ret) | |
150 | return ret; | |
151 | ||
152 | switch (optname) { | |
153 | case SO_KEEPALIVE: | |
154 | mptcp_sol_socket_sync_intval(msk, optname, val); | |
155 | return 0; | |
a03c99b2 | 156 | case SO_DEBUG: |
36704413 | 157 | case SO_MARK: |
1b3e7ede | 158 | case SO_PRIORITY: |
5d0a6bc8 FW |
159 | case SO_SNDBUF: |
160 | case SO_SNDBUFFORCE: | |
161 | case SO_RCVBUF: | |
162 | case SO_RCVBUFFORCE: | |
1b3e7ede | 163 | return mptcp_sol_socket_intval(msk, optname, val); |
6f0d7198 FW |
164 | case SO_INCOMING_CPU: |
165 | mptcp_so_incoming_cpu(msk, val); | |
166 | return 0; | |
1b3e7ede FW |
167 | } |
168 | ||
169 | return -ENOPROTOOPT; | |
170 | } | |
171 | ||
268b1238 FW |
172 | static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t optval, |
173 | unsigned int optlen) | |
174 | { | |
175 | struct mptcp_subflow_context *subflow; | |
176 | struct sock *sk = (struct sock *)msk; | |
177 | struct linger ling; | |
178 | sockptr_t kopt; | |
179 | int ret; | |
180 | ||
181 | if (optlen < sizeof(ling)) | |
182 | return -EINVAL; | |
183 | ||
184 | if (copy_from_sockptr(&ling, optval, sizeof(ling))) | |
185 | return -EFAULT; | |
186 | ||
187 | kopt = KERNEL_SOCKPTR(&ling); | |
188 | ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, SO_LINGER, kopt, sizeof(ling)); | |
189 | if (ret) | |
190 | return ret; | |
191 | ||
192 | lock_sock(sk); | |
193 | sockopt_seq_inc(msk); | |
194 | mptcp_for_each_subflow(msk, subflow) { | |
195 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
196 | bool slow = lock_sock_fast(ssk); | |
197 | ||
198 | if (!ling.l_onoff) { | |
199 | sock_reset_flag(ssk, SOCK_LINGER); | |
200 | } else { | |
201 | ssk->sk_lingertime = sk->sk_lingertime; | |
202 | sock_set_flag(ssk, SOCK_LINGER); | |
203 | } | |
204 | ||
205 | subflow->setsockopt_seq = msk->setsockopt_seq; | |
206 | unlock_sock_fast(ssk, slow); | |
207 | } | |
208 | ||
209 | release_sock(sk); | |
210 | return 0; | |
211 | } | |
212 | ||
0abdde82 PA |
213 | static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, |
214 | sockptr_t optval, unsigned int optlen) | |
215 | { | |
216 | struct sock *sk = (struct sock *)msk; | |
217 | struct socket *ssock; | |
218 | int ret; | |
219 | ||
220 | switch (optname) { | |
221 | case SO_REUSEPORT: | |
222 | case SO_REUSEADDR: | |
5d0a6bc8 FW |
223 | case SO_BINDTODEVICE: |
224 | case SO_BINDTOIFINDEX: | |
0abdde82 PA |
225 | lock_sock(sk); |
226 | ssock = __mptcp_nmpc_socket(msk); | |
227 | if (!ssock) { | |
228 | release_sock(sk); | |
229 | return -EINVAL; | |
230 | } | |
231 | ||
232 | ret = sock_setsockopt(ssock, SOL_SOCKET, optname, optval, optlen); | |
233 | if (ret == 0) { | |
234 | if (optname == SO_REUSEPORT) | |
235 | sk->sk_reuseport = ssock->sk->sk_reuseport; | |
236 | else if (optname == SO_REUSEADDR) | |
237 | sk->sk_reuse = ssock->sk->sk_reuse; | |
5d0a6bc8 FW |
238 | else if (optname == SO_BINDTODEVICE) |
239 | sk->sk_bound_dev_if = ssock->sk->sk_bound_dev_if; | |
240 | else if (optname == SO_BINDTOIFINDEX) | |
241 | sk->sk_bound_dev_if = ssock->sk->sk_bound_dev_if; | |
0abdde82 PA |
242 | } |
243 | release_sock(sk); | |
244 | return ret; | |
1b3e7ede FW |
245 | case SO_KEEPALIVE: |
246 | case SO_PRIORITY: | |
5d0a6bc8 FW |
247 | case SO_SNDBUF: |
248 | case SO_SNDBUFFORCE: | |
249 | case SO_RCVBUF: | |
250 | case SO_RCVBUFFORCE: | |
36704413 | 251 | case SO_MARK: |
6f0d7198 | 252 | case SO_INCOMING_CPU: |
a03c99b2 | 253 | case SO_DEBUG: |
1b3e7ede | 254 | return mptcp_setsockopt_sol_socket_int(msk, optname, optval, optlen); |
268b1238 FW |
255 | case SO_LINGER: |
256 | return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen); | |
a03c99b2 FW |
257 | case SO_NO_CHECK: |
258 | case SO_DONTROUTE: | |
259 | case SO_BROADCAST: | |
260 | case SO_BSDCOMPAT: | |
261 | case SO_PASSCRED: | |
262 | case SO_PASSSEC: | |
263 | case SO_RXQ_OVFL: | |
264 | case SO_WIFI_STATUS: | |
265 | case SO_NOFCS: | |
266 | case SO_SELECT_ERR_QUEUE: | |
267 | return 0; | |
0abdde82 PA |
268 | } |
269 | ||
270 | return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen); | |
271 | } | |
272 | ||
273 | static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, | |
274 | sockptr_t optval, unsigned int optlen) | |
275 | { | |
276 | struct sock *sk = (struct sock *)msk; | |
277 | int ret = -EOPNOTSUPP; | |
278 | struct socket *ssock; | |
279 | ||
280 | switch (optname) { | |
281 | case IPV6_V6ONLY: | |
282 | lock_sock(sk); | |
283 | ssock = __mptcp_nmpc_socket(msk); | |
284 | if (!ssock) { | |
285 | release_sock(sk); | |
286 | return -EINVAL; | |
287 | } | |
288 | ||
289 | ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen); | |
290 | if (ret == 0) | |
291 | sk->sk_ipv6only = ssock->sk->sk_ipv6only; | |
292 | ||
293 | release_sock(sk); | |
294 | break; | |
295 | } | |
296 | ||
297 | return ret; | |
298 | } | |
299 | ||
d9e4c129 PA |
300 | static bool mptcp_supported_sockopt(int level, int optname) |
301 | { | |
302 | if (level == SOL_SOCKET) { | |
303 | switch (optname) { | |
304 | case SO_DEBUG: | |
305 | case SO_REUSEPORT: | |
306 | case SO_REUSEADDR: | |
307 | ||
308 | /* the following ones need a better implementation, | |
309 | * but are quite common we want to preserve them | |
310 | */ | |
311 | case SO_BINDTODEVICE: | |
312 | case SO_SNDBUF: | |
313 | case SO_SNDBUFFORCE: | |
314 | case SO_RCVBUF: | |
315 | case SO_RCVBUFFORCE: | |
316 | case SO_KEEPALIVE: | |
317 | case SO_PRIORITY: | |
318 | case SO_LINGER: | |
319 | case SO_TIMESTAMP_OLD: | |
320 | case SO_TIMESTAMP_NEW: | |
321 | case SO_TIMESTAMPNS_OLD: | |
322 | case SO_TIMESTAMPNS_NEW: | |
323 | case SO_TIMESTAMPING_OLD: | |
324 | case SO_TIMESTAMPING_NEW: | |
325 | case SO_RCVLOWAT: | |
326 | case SO_RCVTIMEO_OLD: | |
327 | case SO_RCVTIMEO_NEW: | |
328 | case SO_SNDTIMEO_OLD: | |
329 | case SO_SNDTIMEO_NEW: | |
330 | case SO_MARK: | |
331 | case SO_INCOMING_CPU: | |
332 | case SO_BINDTOIFINDEX: | |
333 | case SO_BUSY_POLL: | |
334 | case SO_PREFER_BUSY_POLL: | |
335 | case SO_BUSY_POLL_BUDGET: | |
336 | ||
337 | /* next ones are no-op for plain TCP */ | |
338 | case SO_NO_CHECK: | |
339 | case SO_DONTROUTE: | |
340 | case SO_BROADCAST: | |
341 | case SO_BSDCOMPAT: | |
342 | case SO_PASSCRED: | |
343 | case SO_PASSSEC: | |
344 | case SO_RXQ_OVFL: | |
345 | case SO_WIFI_STATUS: | |
346 | case SO_NOFCS: | |
347 | case SO_SELECT_ERR_QUEUE: | |
348 | return true; | |
349 | } | |
350 | ||
351 | /* SO_OOBINLINE is not supported, let's avoid the related mess */ | |
352 | /* SO_ATTACH_FILTER, SO_ATTACH_BPF, SO_ATTACH_REUSEPORT_CBPF, | |
353 | * SO_DETACH_REUSEPORT_BPF, SO_DETACH_FILTER, SO_LOCK_FILTER, | |
354 | * we must be careful with subflows | |
355 | */ | |
356 | /* SO_ATTACH_REUSEPORT_EBPF is not supported, at it checks | |
357 | * explicitly the sk_protocol field | |
358 | */ | |
359 | /* SO_PEEK_OFF is unsupported, as it is for plain TCP */ | |
360 | /* SO_MAX_PACING_RATE is unsupported, we must be careful with subflows */ | |
361 | /* SO_CNX_ADVICE is currently unsupported, could possibly be relevant, | |
362 | * but likely needs careful design | |
363 | */ | |
364 | /* SO_ZEROCOPY is currently unsupported, TODO in sndmsg */ | |
365 | /* SO_TXTIME is currently unsupported */ | |
366 | return false; | |
367 | } | |
368 | if (level == SOL_IP) { | |
369 | switch (optname) { | |
370 | /* should work fine */ | |
371 | case IP_FREEBIND: | |
372 | case IP_TRANSPARENT: | |
373 | ||
374 | /* the following are control cmsg related */ | |
375 | case IP_PKTINFO: | |
376 | case IP_RECVTTL: | |
377 | case IP_RECVTOS: | |
378 | case IP_RECVOPTS: | |
379 | case IP_RETOPTS: | |
380 | case IP_PASSSEC: | |
381 | case IP_RECVORIGDSTADDR: | |
382 | case IP_CHECKSUM: | |
383 | case IP_RECVFRAGSIZE: | |
384 | ||
385 | /* common stuff that need some love */ | |
386 | case IP_TOS: | |
387 | case IP_TTL: | |
388 | case IP_BIND_ADDRESS_NO_PORT: | |
389 | case IP_MTU_DISCOVER: | |
390 | case IP_RECVERR: | |
391 | ||
392 | /* possibly less common may deserve some love */ | |
393 | case IP_MINTTL: | |
394 | ||
395 | /* the following is apparently a no-op for plain TCP */ | |
396 | case IP_RECVERR_RFC4884: | |
397 | return true; | |
398 | } | |
399 | ||
400 | /* IP_OPTIONS is not supported, needs subflow care */ | |
401 | /* IP_HDRINCL, IP_NODEFRAG are not supported, RAW specific */ | |
402 | /* IP_MULTICAST_TTL, IP_MULTICAST_LOOP, IP_UNICAST_IF, | |
403 | * IP_ADD_MEMBERSHIP, IP_ADD_SOURCE_MEMBERSHIP, IP_DROP_MEMBERSHIP, | |
404 | * IP_DROP_SOURCE_MEMBERSHIP, IP_BLOCK_SOURCE, IP_UNBLOCK_SOURCE, | |
405 | * MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP MCAST_JOIN_SOURCE_GROUP, | |
406 | * MCAST_LEAVE_SOURCE_GROUP, MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, | |
407 | * MCAST_MSFILTER, IP_MULTICAST_ALL are not supported, better not deal | |
408 | * with mcast stuff | |
409 | */ | |
410 | /* IP_IPSEC_POLICY, IP_XFRM_POLICY are nut supported, unrelated here */ | |
411 | return false; | |
412 | } | |
413 | if (level == SOL_IPV6) { | |
414 | switch (optname) { | |
415 | case IPV6_V6ONLY: | |
416 | ||
417 | /* the following are control cmsg related */ | |
418 | case IPV6_RECVPKTINFO: | |
419 | case IPV6_2292PKTINFO: | |
420 | case IPV6_RECVHOPLIMIT: | |
421 | case IPV6_2292HOPLIMIT: | |
422 | case IPV6_RECVRTHDR: | |
423 | case IPV6_2292RTHDR: | |
424 | case IPV6_RECVHOPOPTS: | |
425 | case IPV6_2292HOPOPTS: | |
426 | case IPV6_RECVDSTOPTS: | |
427 | case IPV6_2292DSTOPTS: | |
428 | case IPV6_RECVTCLASS: | |
429 | case IPV6_FLOWINFO: | |
430 | case IPV6_RECVPATHMTU: | |
431 | case IPV6_RECVORIGDSTADDR: | |
432 | case IPV6_RECVFRAGSIZE: | |
433 | ||
434 | /* the following ones need some love but are quite common */ | |
435 | case IPV6_TCLASS: | |
436 | case IPV6_TRANSPARENT: | |
437 | case IPV6_FREEBIND: | |
438 | case IPV6_PKTINFO: | |
439 | case IPV6_2292PKTOPTIONS: | |
440 | case IPV6_UNICAST_HOPS: | |
441 | case IPV6_MTU_DISCOVER: | |
442 | case IPV6_MTU: | |
443 | case IPV6_RECVERR: | |
444 | case IPV6_FLOWINFO_SEND: | |
445 | case IPV6_FLOWLABEL_MGR: | |
446 | case IPV6_MINHOPCOUNT: | |
447 | case IPV6_DONTFRAG: | |
448 | case IPV6_AUTOFLOWLABEL: | |
449 | ||
450 | /* the following one is a no-op for plain TCP */ | |
451 | case IPV6_RECVERR_RFC4884: | |
452 | return true; | |
453 | } | |
454 | ||
455 | /* IPV6_HOPOPTS, IPV6_RTHDRDSTOPTS, IPV6_RTHDR, IPV6_DSTOPTS are | |
456 | * not supported | |
457 | */ | |
458 | /* IPV6_MULTICAST_HOPS, IPV6_MULTICAST_LOOP, IPV6_UNICAST_IF, | |
459 | * IPV6_MULTICAST_IF, IPV6_ADDRFORM, | |
460 | * IPV6_ADD_MEMBERSHIP, IPV6_DROP_MEMBERSHIP, IPV6_JOIN_ANYCAST, | |
461 | * IPV6_LEAVE_ANYCAST, IPV6_MULTICAST_ALL, MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP, | |
462 | * MCAST_JOIN_SOURCE_GROUP, MCAST_LEAVE_SOURCE_GROUP, | |
463 | * MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, MCAST_MSFILTER | |
464 | * are not supported better not deal with mcast | |
465 | */ | |
466 | /* IPV6_ROUTER_ALERT, IPV6_ROUTER_ALERT_ISOLATE are not supported, since are evil */ | |
467 | ||
468 | /* IPV6_IPSEC_POLICY, IPV6_XFRM_POLICY are not supported */ | |
469 | /* IPV6_ADDR_PREFERENCES is not supported, we must be careful with subflows */ | |
470 | return false; | |
471 | } | |
472 | if (level == SOL_TCP) { | |
473 | switch (optname) { | |
474 | /* the following are no-op or should work just fine */ | |
475 | case TCP_THIN_DUPACK: | |
476 | case TCP_DEFER_ACCEPT: | |
477 | ||
478 | /* the following need some love */ | |
479 | case TCP_MAXSEG: | |
480 | case TCP_NODELAY: | |
481 | case TCP_THIN_LINEAR_TIMEOUTS: | |
482 | case TCP_CONGESTION: | |
483 | case TCP_ULP: | |
484 | case TCP_CORK: | |
485 | case TCP_KEEPIDLE: | |
486 | case TCP_KEEPINTVL: | |
487 | case TCP_KEEPCNT: | |
488 | case TCP_SYNCNT: | |
489 | case TCP_SAVE_SYN: | |
490 | case TCP_LINGER2: | |
491 | case TCP_WINDOW_CLAMP: | |
492 | case TCP_QUICKACK: | |
493 | case TCP_USER_TIMEOUT: | |
494 | case TCP_TIMESTAMP: | |
495 | case TCP_NOTSENT_LOWAT: | |
496 | case TCP_TX_DELAY: | |
497 | return true; | |
498 | } | |
499 | ||
500 | /* TCP_MD5SIG, TCP_MD5SIG_EXT are not supported, MD5 is not compatible with MPTCP */ | |
501 | ||
502 | /* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS, | |
503 | * TCP_REPAIR_WINDOW are not supported, better avoid this mess | |
504 | */ | |
505 | /* TCP_FASTOPEN_KEY, TCP_FASTOPEN TCP_FASTOPEN_CONNECT, TCP_FASTOPEN_NO_COOKIE, | |
506 | * are not supported fastopen is currently unsupported | |
507 | */ | |
508 | /* TCP_INQ is currently unsupported, needs some recvmsg work */ | |
509 | } | |
510 | return false; | |
511 | } | |
512 | ||
aa1fbd94 FW |
513 | static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t optval, |
514 | unsigned int optlen) | |
515 | { | |
516 | struct mptcp_subflow_context *subflow; | |
517 | struct sock *sk = (struct sock *)msk; | |
518 | char name[TCP_CA_NAME_MAX]; | |
519 | bool cap_net_admin; | |
520 | int ret; | |
521 | ||
522 | if (optlen < 1) | |
523 | return -EINVAL; | |
524 | ||
525 | ret = strncpy_from_sockptr(name, optval, | |
526 | min_t(long, TCP_CA_NAME_MAX - 1, optlen)); | |
527 | if (ret < 0) | |
528 | return -EFAULT; | |
529 | ||
530 | name[ret] = 0; | |
531 | ||
532 | cap_net_admin = ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN); | |
533 | ||
534 | ret = 0; | |
535 | lock_sock(sk); | |
536 | sockopt_seq_inc(msk); | |
537 | mptcp_for_each_subflow(msk, subflow) { | |
538 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
539 | int err; | |
540 | ||
541 | lock_sock(ssk); | |
542 | err = tcp_set_congestion_control(ssk, name, true, cap_net_admin); | |
543 | if (err < 0 && ret == 0) | |
544 | ret = err; | |
545 | subflow->setsockopt_seq = msk->setsockopt_seq; | |
546 | release_sock(ssk); | |
547 | } | |
548 | ||
549 | if (ret == 0) | |
550 | tcp_set_congestion_control(sk, name, false, cap_net_admin); | |
551 | ||
552 | release_sock(sk); | |
553 | return ret; | |
554 | } | |
555 | ||
556 | static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, | |
557 | sockptr_t optval, unsigned int optlen) | |
558 | { | |
559 | switch (optname) { | |
560 | case TCP_ULP: | |
561 | return -EOPNOTSUPP; | |
562 | case TCP_CONGESTION: | |
563 | return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen); | |
564 | } | |
565 | ||
566 | return -EOPNOTSUPP; | |
567 | } | |
568 | ||
0abdde82 PA |
569 | int mptcp_setsockopt(struct sock *sk, int level, int optname, |
570 | sockptr_t optval, unsigned int optlen) | |
571 | { | |
572 | struct mptcp_sock *msk = mptcp_sk(sk); | |
573 | struct sock *ssk; | |
574 | ||
575 | pr_debug("msk=%p", msk); | |
576 | ||
d9e4c129 PA |
577 | if (!mptcp_supported_sockopt(level, optname)) |
578 | return -ENOPROTOOPT; | |
579 | ||
0abdde82 PA |
580 | if (level == SOL_SOCKET) |
581 | return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen); | |
582 | ||
583 | /* @@ the meaning of setsockopt() when the socket is connected and | |
584 | * there are multiple subflows is not yet defined. It is up to the | |
585 | * MPTCP-level socket to configure the subflows until the subflow | |
586 | * is in TCP fallback, when TCP socket options are passed through | |
587 | * to the one remaining subflow. | |
588 | */ | |
589 | lock_sock(sk); | |
590 | ssk = __mptcp_tcp_fallback(msk); | |
591 | release_sock(sk); | |
592 | if (ssk) | |
593 | return tcp_setsockopt(ssk, level, optname, optval, optlen); | |
594 | ||
595 | if (level == SOL_IPV6) | |
596 | return mptcp_setsockopt_v6(msk, optname, optval, optlen); | |
597 | ||
aa1fbd94 FW |
598 | if (level == SOL_TCP) |
599 | return mptcp_setsockopt_sol_tcp(msk, optname, optval, optlen); | |
600 | ||
601 | return -EOPNOTSUPP; | |
602 | } | |
603 | ||
604 | static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, | |
605 | char __user *optval, int __user *optlen) | |
606 | { | |
607 | struct sock *sk = (struct sock *)msk; | |
608 | struct socket *ssock; | |
609 | int ret = -EINVAL; | |
610 | struct sock *ssk; | |
611 | ||
612 | lock_sock(sk); | |
613 | ssk = msk->first; | |
614 | if (ssk) { | |
615 | ret = tcp_getsockopt(ssk, level, optname, optval, optlen); | |
616 | goto out; | |
617 | } | |
618 | ||
619 | ssock = __mptcp_nmpc_socket(msk); | |
620 | if (!ssock) | |
621 | goto out; | |
622 | ||
623 | ret = tcp_getsockopt(ssock->sk, level, optname, optval, optlen); | |
624 | ||
625 | out: | |
626 | release_sock(sk); | |
627 | return ret; | |
628 | } | |
629 | ||
630 | static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, | |
631 | char __user *optval, int __user *optlen) | |
632 | { | |
633 | switch (optname) { | |
634 | case TCP_ULP: | |
635 | case TCP_CONGESTION: | |
636 | case TCP_INFO: | |
637 | case TCP_CC_INFO: | |
638 | return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname, | |
639 | optval, optlen); | |
640 | } | |
0abdde82 PA |
641 | return -EOPNOTSUPP; |
642 | } | |
643 | ||
644 | int mptcp_getsockopt(struct sock *sk, int level, int optname, | |
645 | char __user *optval, int __user *option) | |
646 | { | |
647 | struct mptcp_sock *msk = mptcp_sk(sk); | |
648 | struct sock *ssk; | |
649 | ||
650 | pr_debug("msk=%p", msk); | |
651 | ||
652 | /* @@ the meaning of setsockopt() when the socket is connected and | |
653 | * there are multiple subflows is not yet defined. It is up to the | |
654 | * MPTCP-level socket to configure the subflows until the subflow | |
655 | * is in TCP fallback, when socket options are passed through | |
656 | * to the one remaining subflow. | |
657 | */ | |
658 | lock_sock(sk); | |
659 | ssk = __mptcp_tcp_fallback(msk); | |
660 | release_sock(sk); | |
661 | if (ssk) | |
662 | return tcp_getsockopt(ssk, level, optname, optval, option); | |
663 | ||
aa1fbd94 FW |
664 | if (level == SOL_TCP) |
665 | return mptcp_getsockopt_sol_tcp(msk, optname, optval, option); | |
0abdde82 PA |
666 | return -EOPNOTSUPP; |
667 | } | |
668 | ||
1b3e7ede FW |
669 | static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) |
670 | { | |
5d0a6bc8 | 671 | static const unsigned int tx_rx_locks = SOCK_RCVBUF_LOCK | SOCK_SNDBUF_LOCK; |
1b3e7ede FW |
672 | struct sock *sk = (struct sock *)msk; |
673 | ||
674 | if (ssk->sk_prot->keepalive) { | |
675 | if (sock_flag(sk, SOCK_KEEPOPEN)) | |
676 | ssk->sk_prot->keepalive(ssk, 1); | |
677 | else | |
678 | ssk->sk_prot->keepalive(ssk, 0); | |
679 | } | |
680 | ||
681 | ssk->sk_priority = sk->sk_priority; | |
5d0a6bc8 FW |
682 | ssk->sk_bound_dev_if = sk->sk_bound_dev_if; |
683 | ssk->sk_incoming_cpu = sk->sk_incoming_cpu; | |
684 | ||
685 | if (sk->sk_userlocks & tx_rx_locks) { | |
686 | ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks; | |
687 | if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) | |
688 | WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); | |
689 | if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) | |
690 | WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); | |
691 | } | |
692 | ||
693 | if (sock_flag(sk, SOCK_LINGER)) { | |
694 | ssk->sk_lingertime = sk->sk_lingertime; | |
695 | sock_set_flag(ssk, SOCK_LINGER); | |
696 | } else { | |
697 | sock_reset_flag(ssk, SOCK_LINGER); | |
698 | } | |
699 | ||
700 | if (sk->sk_mark != ssk->sk_mark) { | |
701 | ssk->sk_mark = sk->sk_mark; | |
702 | sk_dst_reset(ssk); | |
703 | } | |
704 | ||
705 | sock_valbool_flag(ssk, SOCK_DBG, sock_flag(sk, SOCK_DBG)); | |
706 | ||
707 | if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops) | |
708 | tcp_set_congestion_control(ssk, inet_csk(sk)->icsk_ca_ops->name, false, true); | |
1b3e7ede FW |
709 | } |
710 | ||
df00b087 FW |
711 | static void __mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk) |
712 | { | |
1b3e7ede FW |
713 | bool slow = lock_sock_fast(ssk); |
714 | ||
715 | sync_socket_options(msk, ssk); | |
716 | ||
717 | unlock_sock_fast(ssk, slow); | |
df00b087 FW |
718 | } |
719 | ||
78962489 FW |
720 | void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk) |
721 | { | |
df00b087 FW |
722 | struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); |
723 | ||
78962489 | 724 | msk_owned_by_me(msk); |
df00b087 FW |
725 | |
726 | if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) { | |
727 | __mptcp_sockopt_sync(msk, ssk); | |
728 | ||
729 | subflow->setsockopt_seq = msk->setsockopt_seq; | |
730 | } | |
78962489 FW |
731 | } |
732 | ||
733 | void mptcp_sockopt_sync_all(struct mptcp_sock *msk) | |
734 | { | |
735 | struct mptcp_subflow_context *subflow; | |
df00b087 FW |
736 | struct sock *sk = (struct sock *)msk; |
737 | u32 seq; | |
78962489 | 738 | |
df00b087 | 739 | seq = sockopt_seq_reset(sk); |
78962489 FW |
740 | |
741 | mptcp_for_each_subflow(msk, subflow) { | |
742 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
df00b087 | 743 | u32 sseq = READ_ONCE(subflow->setsockopt_seq); |
78962489 | 744 | |
df00b087 FW |
745 | if (sseq != msk->setsockopt_seq) { |
746 | __mptcp_sockopt_sync(msk, ssk); | |
747 | WRITE_ONCE(subflow->setsockopt_seq, seq); | |
748 | } else if (sseq != seq) { | |
749 | WRITE_ONCE(subflow->setsockopt_seq, seq); | |
750 | } | |
78962489 FW |
751 | |
752 | cond_resched(); | |
753 | } | |
df00b087 FW |
754 | |
755 | msk->setsockopt_seq = seq; | |
78962489 | 756 | } |