net: hsr: Use correct offset for HSR TLV values in supervisory HSR frames
[linux-block.git] / net / unix / af_unix.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * NET4: Implementation of BSD Unix domain sockets.
4 *
113aa838 5 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
1da177e4 6 *
1da177e4
LT
7 * Fixes:
8 * Linus Torvalds : Assorted bug cures.
9 * Niibe Yutaka : async I/O support.
10 * Carsten Paeth : PF_UNIX check, address fixes.
11 * Alan Cox : Limit size of allocated blocks.
12 * Alan Cox : Fixed the stupid socketpair bug.
13 * Alan Cox : BSD compatibility fine tuning.
14 * Alan Cox : Fixed a bug in connect when interrupted.
15 * Alan Cox : Sorted out a proper draft version of
16 * file descriptor passing hacked up from
17 * Mike Shaver's work.
18 * Marty Leisner : Fixes to fd passing
19 * Nick Nevin : recvmsg bugfix.
20 * Alan Cox : Started proper garbage collector
21 * Heiko EiBfeldt : Missing verify_area check
22 * Alan Cox : Started POSIXisms
23 * Andreas Schwab : Replace inode by dentry for proper
24 * reference counting
25 * Kirk Petersen : Made this a module
26 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
27 * Lots of bug fixes.
28 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
29 * by above two patches.
30 * Andrea Arcangeli : If possible we block in connect(2)
31 * if the max backlog of the listen socket
32 * is been reached. This won't break
33 * old apps and it will avoid huge amount
34 * of socks hashed (this for unix_gc()
35 * performances reasons).
36 * Security fix that limits the max
37 * number of socks to 2*max_files and
38 * the number of skb queueable in the
39 * dgram receiver.
40 * Artur Skawina : Hash function optimizations
41 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
42 * Malcolm Beattie : Set peercred for socketpair
43 * Michal Ostrowski : Module initialization cleanup.
44 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
45 * the core infrastructure is doing that
46 * for all net proto families now (2.5.69+)
47 *
1da177e4
LT
48 * Known differences from reference BSD that was tested:
49 *
50 * [TO FIX]
51 * ECONNREFUSED is not returned from one end of a connected() socket to the
52 * other the moment one end closes.
53 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
54 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
55 * [NOT TO FIX]
56 * accept() returns a path name even if the connecting socket has closed
57 * in the meantime (BSD loses the path and gives up).
58 * accept() returns 0 length path for an unbound connector. BSD returns 16
59 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
61 * BSD af_unix apparently has connect forgetting to block properly.
62 * (need to check this with the POSIX spec in detail)
63 *
64 * Differences from 2.0.0-11-... (ANK)
65 * Bug fixes and improvements.
66 * - client shutdown killed server socket.
67 * - removed all useless cli/sti pairs.
68 *
69 * Semantic changes/extensions.
70 * - generic control message passing.
71 * - SCM_CREDENTIALS control message.
72 * - "Abstract" (not FS based) socket bindings.
73 * Abstract names are sequences of bytes (not zero terminated)
74 * started by 0, so that this name space does not intersect
75 * with BSD names.
76 */
77
5cc208be 78#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
79
1da177e4 80#include <linux/module.h>
1da177e4 81#include <linux/kernel.h>
1da177e4 82#include <linux/signal.h>
3f07c014 83#include <linux/sched/signal.h>
1da177e4
LT
84#include <linux/errno.h>
85#include <linux/string.h>
86#include <linux/stat.h>
87#include <linux/dcache.h>
88#include <linux/namei.h>
89#include <linux/socket.h>
90#include <linux/un.h>
91#include <linux/fcntl.h>
b6459415 92#include <linux/filter.h>
1da177e4
LT
93#include <linux/termios.h>
94#include <linux/sockios.h>
95#include <linux/net.h>
96#include <linux/in.h>
97#include <linux/fs.h>
98#include <linux/slab.h>
7c0f6ba6 99#include <linux/uaccess.h>
1da177e4
LT
100#include <linux/skbuff.h>
101#include <linux/netdevice.h>
457c4cbc 102#include <net/net_namespace.h>
1da177e4 103#include <net/sock.h>
c752f073 104#include <net/tcp_states.h>
1da177e4
LT
105#include <net/af_unix.h>
106#include <linux/proc_fs.h>
107#include <linux/seq_file.h>
108#include <net/scm.h>
109#include <linux/init.h>
110#include <linux/poll.h>
1da177e4
LT
111#include <linux/rtnetlink.h>
112#include <linux/mount.h>
113#include <net/checksum.h>
114#include <linux/security.h>
509f15b9 115#include <linux/splice.h>
2b15af6f 116#include <linux/freezer.h>
ba94f308 117#include <linux/file.h>
2c860a43 118#include <linux/btf_ids.h>
859051dd 119#include <linux/bpf-cgroup.h>
1da177e4 120
f4e65870
JA
121#include "scm.h"
122
518de9b3 123static atomic_long_t unix_nr_socks;
51bae889
KI
124static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2];
125static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
1da177e4 126
f452be49 127/* SMP locking strategy:
2f7ca90a
KI
128 * hash table is protected with spinlock.
129 * each socket state is protected by separate spinlock.
f452be49 130 */
1da177e4 131
f452be49 132static unsigned int unix_unbound_hash(struct sock *sk)
7123aaa3 133{
f452be49 134 unsigned long hash = (unsigned long)sk;
7123aaa3
ED
135
136 hash ^= hash >> 16;
137 hash ^= hash >> 8;
f452be49
KI
138 hash ^= sk->sk_type;
139
cf21b355 140 return hash & UNIX_HASH_MOD;
f452be49
KI
141}
142
143static unsigned int unix_bsd_hash(struct inode *i)
144{
f302d180 145 return i->i_ino & UNIX_HASH_MOD;
f452be49
KI
146}
147
148static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr,
149 int addr_len, int type)
150{
151 __wsum csum = csum_partial(sunaddr, addr_len, 0);
152 unsigned int hash;
153
154 hash = (__force unsigned int)csum_fold(csum);
155 hash ^= hash >> 8;
156 hash ^= type;
157
cf21b355 158 return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD);
7123aaa3
ED
159}
160
79b05bea
KI
161static void unix_table_double_lock(struct net *net,
162 unsigned int hash1, unsigned int hash2)
afd20b92 163{
cf21b355
KI
164 if (hash1 == hash2) {
165 spin_lock(&net->unx.table.locks[hash1]);
166 return;
167 }
168
afd20b92
KI
169 if (hash1 > hash2)
170 swap(hash1, hash2);
171
79b05bea
KI
172 spin_lock(&net->unx.table.locks[hash1]);
173 spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING);
afd20b92
KI
174}
175
79b05bea
KI
176static void unix_table_double_unlock(struct net *net,
177 unsigned int hash1, unsigned int hash2)
afd20b92 178{
cf21b355
KI
179 if (hash1 == hash2) {
180 spin_unlock(&net->unx.table.locks[hash1]);
181 return;
182 }
183
79b05bea
KI
184 spin_unlock(&net->unx.table.locks[hash1]);
185 spin_unlock(&net->unx.table.locks[hash2]);
afd20b92
KI
186}
187
877ce7c1 188#ifdef CONFIG_SECURITY_NETWORK
dc49c1f9 189static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
877ce7c1 190{
37a9a8df 191 UNIXCB(skb).secid = scm->secid;
877ce7c1
CZ
192}
193
194static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
195{
37a9a8df
SS
196 scm->secid = UNIXCB(skb).secid;
197}
198
199static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
200{
201 return (scm->secid == UNIXCB(skb).secid);
877ce7c1
CZ
202}
203#else
dc49c1f9 204static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
877ce7c1
CZ
205{ }
206
207static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
208{ }
37a9a8df
SS
209
210static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
211{
212 return true;
213}
877ce7c1
CZ
214#endif /* CONFIG_SECURITY_NETWORK */
215
1da177e4
LT
216static inline int unix_our_peer(struct sock *sk, struct sock *osk)
217{
218 return unix_peer(osk) == sk;
219}
220
221static inline int unix_may_send(struct sock *sk, struct sock *osk)
222{
6eba6a37 223 return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
1da177e4
LT
224}
225
86b18aaa 226static inline int unix_recvq_full(const struct sock *sk)
3c73419c
RW
227{
228 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
229}
230
86b18aaa
QC
231static inline int unix_recvq_full_lockless(const struct sock *sk)
232{
233 return skb_queue_len_lockless(&sk->sk_receive_queue) >
234 READ_ONCE(sk->sk_max_ack_backlog);
235}
236
fa7ff56f 237struct sock *unix_peer_get(struct sock *s)
1da177e4
LT
238{
239 struct sock *peer;
240
1c92b4e5 241 unix_state_lock(s);
1da177e4
LT
242 peer = unix_peer(s);
243 if (peer)
244 sock_hold(peer);
1c92b4e5 245 unix_state_unlock(s);
1da177e4
LT
246 return peer;
247}
fa7ff56f 248EXPORT_SYMBOL_GPL(unix_peer_get);
1da177e4 249
12f21c49
KI
250static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr,
251 int addr_len)
252{
253 struct unix_address *addr;
254
255 addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL);
256 if (!addr)
257 return NULL;
258
259 refcount_set(&addr->refcnt, 1);
260 addr->len = addr_len;
261 memcpy(addr->name, sunaddr, addr_len);
262
263 return addr;
264}
265
1da177e4
LT
266static inline void unix_release_addr(struct unix_address *addr)
267{
8c9814b9 268 if (refcount_dec_and_test(&addr->refcnt))
1da177e4
LT
269 kfree(addr);
270}
271
272/*
273 * Check unix socket name:
274 * - should be not zero length.
275 * - if started by not zero, should be NULL terminated (FS object)
276 * - if started by zero, it is abstract name.
277 */
ac7bfa62 278
b8a58aa6
KI
279static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len)
280{
281 if (addr_len <= offsetof(struct sockaddr_un, sun_path) ||
282 addr_len > sizeof(*sunaddr))
283 return -EINVAL;
284
285 if (sunaddr->sun_family != AF_UNIX)
286 return -EINVAL;
287
288 return 0;
289}
290
ecb4534b 291static int unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len)
d2d8c9fd 292{
ecb4534b
KI
293 struct sockaddr_storage *addr = (struct sockaddr_storage *)sunaddr;
294 short offset = offsetof(struct sockaddr_storage, __data);
295
296 BUILD_BUG_ON(offset != offsetof(struct sockaddr_un, sun_path));
297
d2d8c9fd
KI
298 /* This may look like an off by one error but it is a bit more
299 * subtle. 108 is the longest valid AF_UNIX path for a binding.
300 * sun_path[108] doesn't as such exist. However in kernel space
301 * we are guaranteed that it is a valid memory location in our
302 * kernel address buffer because syscall functions always pass
303 * a pointer of struct sockaddr_storage which has a bigger buffer
ecb4534b
KI
304 * than 108. Also, we must terminate sun_path for strlen() in
305 * getname_kernel().
306 */
307 addr->__data[addr_len - offset] = 0;
308
309 /* Don't pass sunaddr->sun_path to strlen(). Otherwise, 108 will
310 * cause panic if CONFIG_FORTIFY_SOURCE=y. Let __fortify_strlen()
311 * know the actual buffer.
d2d8c9fd 312 */
ecb4534b 313 return strlen(addr->__data) + offset + 1;
d2d8c9fd
KI
314}
315
1da177e4
LT
316static void __unix_remove_socket(struct sock *sk)
317{
318 sk_del_node_init(sk);
319}
320
cf2f225e 321static void __unix_insert_socket(struct net *net, struct sock *sk)
1da177e4 322{
dd29c67d 323 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
cf2f225e 324 sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]);
1da177e4
LT
325}
326
cf2f225e
KI
327static void __unix_set_addr_hash(struct net *net, struct sock *sk,
328 struct unix_address *addr, unsigned int hash)
185ab886
AV
329{
330 __unix_remove_socket(sk);
331 smp_store_release(&unix_sk(sk)->addr, addr);
e6b4b873
KI
332
333 sk->sk_hash = hash;
cf2f225e 334 __unix_insert_socket(net, sk);
185ab886
AV
335}
336
79b05bea 337static void unix_remove_socket(struct net *net, struct sock *sk)
1da177e4 338{
79b05bea 339 spin_lock(&net->unx.table.locks[sk->sk_hash]);
1da177e4 340 __unix_remove_socket(sk);
79b05bea 341 spin_unlock(&net->unx.table.locks[sk->sk_hash]);
1da177e4
LT
342}
343
79b05bea 344static void unix_insert_unbound_socket(struct net *net, struct sock *sk)
1da177e4 345{
79b05bea 346 spin_lock(&net->unx.table.locks[sk->sk_hash]);
cf2f225e 347 __unix_insert_socket(net, sk);
79b05bea 348 spin_unlock(&net->unx.table.locks[sk->sk_hash]);
1da177e4
LT
349}
350
51bae889
KI
351static void unix_insert_bsd_socket(struct sock *sk)
352{
353 spin_lock(&bsd_socket_locks[sk->sk_hash]);
354 sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]);
355 spin_unlock(&bsd_socket_locks[sk->sk_hash]);
356}
357
358static void unix_remove_bsd_socket(struct sock *sk)
359{
360 if (!hlist_unhashed(&sk->sk_bind_node)) {
361 spin_lock(&bsd_socket_locks[sk->sk_hash]);
362 __sk_del_bind_node(sk);
363 spin_unlock(&bsd_socket_locks[sk->sk_hash]);
364
365 sk_node_init(&sk->sk_bind_node);
366 }
367}
368
097e66c5
DL
369static struct sock *__unix_find_socket_byname(struct net *net,
370 struct sockaddr_un *sunname,
be752283 371 int len, unsigned int hash)
1da177e4
LT
372{
373 struct sock *s;
1da177e4 374
cf2f225e 375 sk_for_each(s, &net->unx.table.buckets[hash]) {
1da177e4
LT
376 struct unix_sock *u = unix_sk(s);
377
378 if (u->addr->len == len &&
379 !memcmp(u->addr->name, sunname, len))
262ce0af 380 return s;
1da177e4 381 }
262ce0af 382 return NULL;
1da177e4
LT
383}
384
097e66c5
DL
385static inline struct sock *unix_find_socket_byname(struct net *net,
386 struct sockaddr_un *sunname,
be752283 387 int len, unsigned int hash)
1da177e4
LT
388{
389 struct sock *s;
390
79b05bea 391 spin_lock(&net->unx.table.locks[hash]);
be752283 392 s = __unix_find_socket_byname(net, sunname, len, hash);
1da177e4
LT
393 if (s)
394 sock_hold(s);
79b05bea 395 spin_unlock(&net->unx.table.locks[hash]);
1da177e4
LT
396 return s;
397}
398
51bae889 399static struct sock *unix_find_socket_byinode(struct inode *i)
1da177e4 400{
f452be49 401 unsigned int hash = unix_bsd_hash(i);
1da177e4 402 struct sock *s;
1da177e4 403
51bae889
KI
404 spin_lock(&bsd_socket_locks[hash]);
405 sk_for_each_bound(s, &bsd_socket_buckets[hash]) {
40ffe67d 406 struct dentry *dentry = unix_sk(s)->path.dentry;
1da177e4 407
beef5121 408 if (dentry && d_backing_inode(dentry) == i) {
1da177e4 409 sock_hold(s);
51bae889 410 spin_unlock(&bsd_socket_locks[hash]);
afd20b92 411 return s;
1da177e4
LT
412 }
413 }
51bae889 414 spin_unlock(&bsd_socket_locks[hash]);
afd20b92 415 return NULL;
1da177e4
LT
416}
417
7d267278
RW
418/* Support code for asymmetrically connected dgram sockets
419 *
420 * If a datagram socket is connected to a socket not itself connected
421 * to the first socket (eg, /dev/log), clients may only enqueue more
422 * messages if the present receive queue of the server socket is not
423 * "too large". This means there's a second writeability condition
424 * poll and sendmsg need to test. The dgram recv code will do a wake
425 * up on the peer_wait wait queue of a socket upon reception of a
426 * datagram which needs to be propagated to sleeping would-be writers
427 * since these might not have sent anything so far. This can't be
428 * accomplished via poll_wait because the lifetime of the server
429 * socket might be less than that of its clients if these break their
430 * association with it or if the server socket is closed while clients
431 * are still connected to it and there's no way to inform "a polling
432 * implementation" that it should let go of a certain wait queue
433 *
ac6424b9 434 * In order to propagate a wake up, a wait_queue_entry_t of the client
7d267278
RW
435 * socket is enqueued on the peer_wait queue of the server socket
436 * whose wake function does a wake_up on the ordinary client socket
437 * wait queue. This connection is established whenever a write (or
438 * poll for write) hit the flow control condition and broken when the
439 * association to the server socket is dissolved or after a wake up
440 * was relayed.
441 */
442
ac6424b9 443static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
7d267278
RW
444 void *key)
445{
446 struct unix_sock *u;
447 wait_queue_head_t *u_sleep;
448
449 u = container_of(q, struct unix_sock, peer_wake);
450
451 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
452 q);
453 u->peer_wake.private = NULL;
454
455 /* relaying can only happen while the wq still exists */
456 u_sleep = sk_sleep(&u->sk);
457 if (u_sleep)
3ad6f93e 458 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
7d267278
RW
459
460 return 0;
461}
462
463static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
464{
465 struct unix_sock *u, *u_other;
466 int rc;
467
468 u = unix_sk(sk);
469 u_other = unix_sk(other);
470 rc = 0;
471 spin_lock(&u_other->peer_wait.lock);
472
473 if (!u->peer_wake.private) {
474 u->peer_wake.private = other;
475 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
476
477 rc = 1;
478 }
479
480 spin_unlock(&u_other->peer_wait.lock);
481 return rc;
482}
483
484static void unix_dgram_peer_wake_disconnect(struct sock *sk,
485 struct sock *other)
486{
487 struct unix_sock *u, *u_other;
488
489 u = unix_sk(sk);
490 u_other = unix_sk(other);
491 spin_lock(&u_other->peer_wait.lock);
492
493 if (u->peer_wake.private == other) {
494 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
495 u->peer_wake.private = NULL;
496 }
497
498 spin_unlock(&u_other->peer_wait.lock);
499}
500
501static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
502 struct sock *other)
503{
504 unix_dgram_peer_wake_disconnect(sk, other);
505 wake_up_interruptible_poll(sk_sleep(sk),
a9a08845
LT
506 EPOLLOUT |
507 EPOLLWRNORM |
508 EPOLLWRBAND);
7d267278
RW
509}
510
511/* preconditions:
512 * - unix_peer(sk) == other
513 * - association is stable
514 */
515static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
516{
517 int connected;
518
519 connected = unix_dgram_peer_wake_connect(sk, other);
520
51f7e951
JB
521 /* If other is SOCK_DEAD, we want to make sure we signal
522 * POLLOUT, such that a subsequent write() can get a
523 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
524 * to other and its full, we will hang waiting for POLLOUT.
525 */
662a8094 526 if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD))
7d267278
RW
527 return 1;
528
529 if (connected)
530 unix_dgram_peer_wake_disconnect(sk, other);
531
532 return 0;
533}
534
1586a587 535static int unix_writable(const struct sock *sk)
1da177e4 536{
1586a587 537 return sk->sk_state != TCP_LISTEN &&
14afee4b 538 (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
1da177e4
LT
539}
540
541static void unix_write_space(struct sock *sk)
542{
43815482
ED
543 struct socket_wq *wq;
544
545 rcu_read_lock();
1da177e4 546 if (unix_writable(sk)) {
43815482 547 wq = rcu_dereference(sk->sk_wq);
1ce0bf50 548 if (skwq_has_sleeper(wq))
67426b75 549 wake_up_interruptible_sync_poll(&wq->wait,
a9a08845 550 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
8d8ad9d7 551 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
1da177e4 552 }
43815482 553 rcu_read_unlock();
1da177e4
LT
554}
555
556/* When dgram socket disconnects (or changes its peer), we clear its receive
557 * queue of packets arrived from previous peer. First, it allows to do
558 * flow control based only on wmem_alloc; second, sk connected to peer
559 * may receive messages only from that peer. */
560static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
561{
b03efcfb 562 if (!skb_queue_empty(&sk->sk_receive_queue)) {
1da177e4
LT
563 skb_queue_purge(&sk->sk_receive_queue);
564 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
565
566 /* If one link of bidirectional dgram pipe is disconnected,
567 * we signal error. Messages are lost. Do not make this,
568 * when peer was not connected to us.
569 */
570 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
cc04410a 571 WRITE_ONCE(other->sk_err, ECONNRESET);
e3ae2365 572 sk_error_report(other);
1da177e4
LT
573 }
574 }
dc56ad70 575 other->sk_state = TCP_CLOSE;
1da177e4
LT
576}
577
578static void unix_sock_destructor(struct sock *sk)
579{
580 struct unix_sock *u = unix_sk(sk);
581
582 skb_queue_purge(&sk->sk_receive_queue);
583
dd29c67d
ED
584 DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc));
585 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
586 DEBUG_NET_WARN_ON_ONCE(sk->sk_socket);
1da177e4 587 if (!sock_flag(sk, SOCK_DEAD)) {
5cc208be 588 pr_info("Attempt to release alive unix socket: %p\n", sk);
1da177e4
LT
589 return;
590 }
591
592 if (u->addr)
593 unix_release_addr(u->addr);
594
518de9b3 595 atomic_long_dec(&unix_nr_socks);
a8076d8d 596 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
1da177e4 597#ifdef UNIX_REFCNT_DEBUG
5cc208be 598 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
518de9b3 599 atomic_long_read(&unix_nr_socks));
1da177e4
LT
600#endif
601}
602
ded34e0f 603static void unix_release_sock(struct sock *sk, int embrion)
1da177e4
LT
604{
605 struct unix_sock *u = unix_sk(sk);
1da177e4
LT
606 struct sock *skpair;
607 struct sk_buff *skb;
79b05bea 608 struct path path;
1da177e4
LT
609 int state;
610
79b05bea 611 unix_remove_socket(sock_net(sk), sk);
51bae889 612 unix_remove_bsd_socket(sk);
1da177e4
LT
613
614 /* Clear state */
1c92b4e5 615 unix_state_lock(sk);
1da177e4 616 sock_orphan(sk);
e1d09c2c 617 WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
40ffe67d
AV
618 path = u->path;
619 u->path.dentry = NULL;
620 u->path.mnt = NULL;
1da177e4
LT
621 state = sk->sk_state;
622 sk->sk_state = TCP_CLOSE;
a494bd64
ED
623
624 skpair = unix_peer(sk);
625 unix_peer(sk) = NULL;
626
1c92b4e5 627 unix_state_unlock(sk);
1da177e4 628
7a62ed61
KI
629#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
630 if (u->oob_skb) {
631 kfree_skb(u->oob_skb);
632 u->oob_skb = NULL;
633 }
634#endif
635
1da177e4
LT
636 wake_up_interruptible_all(&u->peer_wait);
637
e27dfcea 638 if (skpair != NULL) {
1da177e4 639 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
1c92b4e5 640 unix_state_lock(skpair);
1da177e4 641 /* No more writes */
e1d09c2c 642 WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
1da177e4 643 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
cc04410a 644 WRITE_ONCE(skpair->sk_err, ECONNRESET);
1c92b4e5 645 unix_state_unlock(skpair);
1da177e4 646 skpair->sk_state_change(skpair);
8d8ad9d7 647 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
1da177e4 648 }
7d267278
RW
649
650 unix_dgram_peer_wake_disconnect(sk, skpair);
1da177e4 651 sock_put(skpair); /* It may now die */
1da177e4
LT
652 }
653
654 /* Try to flush out this socket. Throw out buffers at least */
655
656 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
e27dfcea 657 if (state == TCP_LISTEN)
1da177e4
LT
658 unix_release_sock(skb->sk, 1);
659 /* passed fds are erased in the kfree_skb hook */
73ed5d25 660 UNIXCB(skb).consumed = skb->len;
1da177e4
LT
661 kfree_skb(skb);
662 }
663
40ffe67d
AV
664 if (path.dentry)
665 path_put(&path);
1da177e4
LT
666
667 sock_put(sk);
668
669 /* ---- Socket is dead now and most probably destroyed ---- */
670
671 /*
e04dae84 672 * Fixme: BSD difference: In BSD all sockets connected to us get
1da177e4
LT
673 * ECONNRESET and we die on the spot. In Linux we behave
674 * like files and pipes do and wait for the last
675 * dereference.
676 *
677 * Can't we simply set sock->err?
678 *
679 * What the above comment does talk about? --ANK(980817)
680 */
681
ade32bd8 682 if (READ_ONCE(unix_tot_inflight))
ac7bfa62 683 unix_gc(); /* Garbage collect fds */
1da177e4
LT
684}
685
109f6e39
EB
686static void init_peercred(struct sock *sk)
687{
35306eb2
ED
688 const struct cred *old_cred;
689 struct pid *old_pid;
690
691 spin_lock(&sk->sk_peer_lock);
692 old_pid = sk->sk_peer_pid;
693 old_cred = sk->sk_peer_cred;
109f6e39
EB
694 sk->sk_peer_pid = get_pid(task_tgid(current));
695 sk->sk_peer_cred = get_current_cred();
35306eb2
ED
696 spin_unlock(&sk->sk_peer_lock);
697
698 put_pid(old_pid);
699 put_cred(old_cred);
109f6e39
EB
700}
701
702static void copy_peercred(struct sock *sk, struct sock *peersk)
703{
35306eb2
ED
704 const struct cred *old_cred;
705 struct pid *old_pid;
706
707 if (sk < peersk) {
708 spin_lock(&sk->sk_peer_lock);
709 spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
710 } else {
711 spin_lock(&peersk->sk_peer_lock);
712 spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
713 }
714 old_pid = sk->sk_peer_pid;
715 old_cred = sk->sk_peer_cred;
109f6e39
EB
716 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
717 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
35306eb2
ED
718
719 spin_unlock(&sk->sk_peer_lock);
720 spin_unlock(&peersk->sk_peer_lock);
721
722 put_pid(old_pid);
723 put_cred(old_cred);
109f6e39
EB
724}
725
1da177e4
LT
726static int unix_listen(struct socket *sock, int backlog)
727{
728 int err;
729 struct sock *sk = sock->sk;
730 struct unix_sock *u = unix_sk(sk);
731
732 err = -EOPNOTSUPP;
6eba6a37
ED
733 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
734 goto out; /* Only stream/seqpacket sockets accept */
1da177e4
LT
735 err = -EINVAL;
736 if (!u->addr)
6eba6a37 737 goto out; /* No listens on an unbound socket */
1c92b4e5 738 unix_state_lock(sk);
1da177e4
LT
739 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
740 goto out_unlock;
741 if (backlog > sk->sk_max_ack_backlog)
742 wake_up_interruptible_all(&u->peer_wait);
743 sk->sk_max_ack_backlog = backlog;
744 sk->sk_state = TCP_LISTEN;
745 /* set credentials so connect can copy them */
109f6e39 746 init_peercred(sk);
1da177e4
LT
747 err = 0;
748
749out_unlock:
1c92b4e5 750 unix_state_unlock(sk);
1da177e4
LT
751out:
752 return err;
753}
754
755static int unix_release(struct socket *);
756static int unix_bind(struct socket *, struct sockaddr *, int);
757static int unix_stream_connect(struct socket *, struct sockaddr *,
758 int addr_len, int flags);
759static int unix_socketpair(struct socket *, struct socket *);
cdfbabfb 760static int unix_accept(struct socket *, struct socket *, int, bool);
9b2c45d4 761static int unix_getname(struct socket *, struct sockaddr *, int);
a11e1d43
LT
762static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
763static __poll_t unix_dgram_poll(struct file *, struct socket *,
764 poll_table *);
1da177e4 765static int unix_ioctl(struct socket *, unsigned int, unsigned long);
5f6beb9e
AB
766#ifdef CONFIG_COMPAT
767static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
768#endif
1da177e4 769static int unix_shutdown(struct socket *, int);
1b784140
YX
770static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
771static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
2b514574
HFS
772static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
773 struct pipe_inode_info *, size_t size,
774 unsigned int flags);
1b784140
YX
775static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
776static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
965b57b4
CW
777static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
778static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
1da177e4
LT
779static int unix_dgram_connect(struct socket *, struct sockaddr *,
780 int, int);
1b784140
YX
781static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
782static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
783 int);
1da177e4 784
5c05a164 785#ifdef CONFIG_PROC_FS
de437089
KT
786static int unix_count_nr_fds(struct sock *sk)
787{
788 struct sk_buff *skb;
789 struct unix_sock *u;
790 int nr_fds = 0;
791
792 spin_lock(&sk->sk_receive_queue.lock);
793 skb = skb_peek(&sk->sk_receive_queue);
794 while (skb) {
795 u = unix_sk(skb->sk);
796 nr_fds += atomic_read(&u->scm_stat.nr_fds);
797 skb = skb_peek_next(skb, &sk->sk_receive_queue);
798 }
799 spin_unlock(&sk->sk_receive_queue.lock);
800
801 return nr_fds;
802}
803
3c32da19
KT
804static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
805{
806 struct sock *sk = sock->sk;
b27401a3 807 unsigned char s_state;
3c32da19 808 struct unix_sock *u;
b27401a3 809 int nr_fds = 0;
3c32da19
KT
810
811 if (sk) {
b27401a3 812 s_state = READ_ONCE(sk->sk_state);
de437089 813 u = unix_sk(sk);
de437089 814
b27401a3
KT
815 /* SOCK_STREAM and SOCK_SEQPACKET sockets never change their
816 * sk_state after switching to TCP_ESTABLISHED or TCP_LISTEN.
817 * SOCK_DGRAM is ordinary. So, no lock is needed.
818 */
819 if (sock->type == SOCK_DGRAM || s_state == TCP_ESTABLISHED)
de437089 820 nr_fds = atomic_read(&u->scm_stat.nr_fds);
b27401a3 821 else if (s_state == TCP_LISTEN)
de437089 822 nr_fds = unix_count_nr_fds(sk);
b27401a3 823
de437089 824 seq_printf(m, "scm_fds: %u\n", nr_fds);
3c32da19
KT
825 }
826}
3a12500e
TK
827#else
828#define unix_show_fdinfo NULL
829#endif
f55bb7f9 830
90ddc4f0 831static const struct proto_ops unix_stream_ops = {
1da177e4
LT
832 .family = PF_UNIX,
833 .owner = THIS_MODULE,
834 .release = unix_release,
835 .bind = unix_bind,
836 .connect = unix_stream_connect,
837 .socketpair = unix_socketpair,
838 .accept = unix_accept,
839 .getname = unix_getname,
a11e1d43 840 .poll = unix_poll,
1da177e4 841 .ioctl = unix_ioctl,
5f6beb9e
AB
842#ifdef CONFIG_COMPAT
843 .compat_ioctl = unix_compat_ioctl,
844#endif
1da177e4
LT
845 .listen = unix_listen,
846 .shutdown = unix_shutdown,
1da177e4
LT
847 .sendmsg = unix_stream_sendmsg,
848 .recvmsg = unix_stream_recvmsg,
965b57b4 849 .read_skb = unix_stream_read_skb,
1da177e4 850 .mmap = sock_no_mmap,
2b514574 851 .splice_read = unix_stream_splice_read,
56667da7 852 .set_peek_off = sk_set_peek_off,
3c32da19 853 .show_fdinfo = unix_show_fdinfo,
1da177e4
LT
854};
855
90ddc4f0 856static const struct proto_ops unix_dgram_ops = {
1da177e4
LT
857 .family = PF_UNIX,
858 .owner = THIS_MODULE,
859 .release = unix_release,
860 .bind = unix_bind,
861 .connect = unix_dgram_connect,
862 .socketpair = unix_socketpair,
863 .accept = sock_no_accept,
864 .getname = unix_getname,
a11e1d43 865 .poll = unix_dgram_poll,
1da177e4 866 .ioctl = unix_ioctl,
5f6beb9e
AB
867#ifdef CONFIG_COMPAT
868 .compat_ioctl = unix_compat_ioctl,
869#endif
1da177e4
LT
870 .listen = sock_no_listen,
871 .shutdown = unix_shutdown,
1da177e4 872 .sendmsg = unix_dgram_sendmsg,
965b57b4 873 .read_skb = unix_read_skb,
1da177e4
LT
874 .recvmsg = unix_dgram_recvmsg,
875 .mmap = sock_no_mmap,
56667da7 876 .set_peek_off = sk_set_peek_off,
3c32da19 877 .show_fdinfo = unix_show_fdinfo,
1da177e4
LT
878};
879
90ddc4f0 880static const struct proto_ops unix_seqpacket_ops = {
1da177e4
LT
881 .family = PF_UNIX,
882 .owner = THIS_MODULE,
883 .release = unix_release,
884 .bind = unix_bind,
885 .connect = unix_stream_connect,
886 .socketpair = unix_socketpair,
887 .accept = unix_accept,
888 .getname = unix_getname,
a11e1d43 889 .poll = unix_dgram_poll,
1da177e4 890 .ioctl = unix_ioctl,
5f6beb9e
AB
891#ifdef CONFIG_COMPAT
892 .compat_ioctl = unix_compat_ioctl,
893#endif
1da177e4
LT
894 .listen = unix_listen,
895 .shutdown = unix_shutdown,
1da177e4 896 .sendmsg = unix_seqpacket_sendmsg,
a05d2ad1 897 .recvmsg = unix_seqpacket_recvmsg,
1da177e4 898 .mmap = sock_no_mmap,
56667da7 899 .set_peek_off = sk_set_peek_off,
3c32da19 900 .show_fdinfo = unix_show_fdinfo,
1da177e4
LT
901};
902
c7272e15
CW
903static void unix_close(struct sock *sk, long timeout)
904{
905 /* Nothing to do here, unix socket does not need a ->close().
906 * This is merely for sockmap.
907 */
908}
909
94531cfc
JW
910static void unix_unhash(struct sock *sk)
911{
912 /* Nothing to do here, unix socket does not need a ->unhash().
913 * This is merely for sockmap.
914 */
915}
916
7b26952a
AM
917static bool unix_bpf_bypass_getsockopt(int level, int optname)
918{
919 if (level == SOL_SOCKET) {
920 switch (optname) {
921 case SO_PEERPIDFD:
922 return true;
923 default:
924 return false;
925 }
926 }
927
928 return false;
929}
930
94531cfc 931struct proto unix_dgram_proto = {
0edf0824 932 .name = "UNIX",
248969ae 933 .owner = THIS_MODULE,
248969ae 934 .obj_size = sizeof(struct unix_sock),
c7272e15 935 .close = unix_close,
7b26952a 936 .bpf_bypass_getsockopt = unix_bpf_bypass_getsockopt,
c6382918 937#ifdef CONFIG_BPF_SYSCALL
94531cfc 938 .psock_update_sk_prot = unix_dgram_bpf_update_proto,
c6382918 939#endif
1da177e4
LT
940};
941
94531cfc
JW
942struct proto unix_stream_proto = {
943 .name = "UNIX-STREAM",
248969ae 944 .owner = THIS_MODULE,
248969ae 945 .obj_size = sizeof(struct unix_sock),
c7272e15 946 .close = unix_close,
94531cfc 947 .unhash = unix_unhash,
7b26952a 948 .bpf_bypass_getsockopt = unix_bpf_bypass_getsockopt,
c6382918 949#ifdef CONFIG_BPF_SYSCALL
94531cfc 950 .psock_update_sk_prot = unix_stream_bpf_update_proto,
c6382918 951#endif
1da177e4
LT
952};
953
94531cfc 954static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
1da177e4 955{
1da177e4 956 struct unix_sock *u;
f4bd73b5
KI
957 struct sock *sk;
958 int err;
1da177e4 959
518de9b3 960 atomic_long_inc(&unix_nr_socks);
f4bd73b5
KI
961 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) {
962 err = -ENFILE;
963 goto err;
964 }
1da177e4 965
94531cfc
JW
966 if (type == SOCK_STREAM)
967 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
968 else /*dgram and seqpacket */
969 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
970
f4bd73b5
KI
971 if (!sk) {
972 err = -ENOMEM;
973 goto err;
974 }
1da177e4 975
6eba6a37 976 sock_init_data(sock, sk);
1da177e4 977
e6b4b873 978 sk->sk_hash = unix_unbound_hash(sk);
3aa9799e 979 sk->sk_allocation = GFP_KERNEL_ACCOUNT;
1da177e4 980 sk->sk_write_space = unix_write_space;
a0a53c8b 981 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
1da177e4
LT
982 sk->sk_destruct = unix_sock_destructor;
983 u = unix_sk(sk);
40ffe67d
AV
984 u->path.dentry = NULL;
985 u->path.mnt = NULL;
fd19f329 986 spin_lock_init(&u->lock);
516e0cc5 987 atomic_long_set(&u->inflight, 0);
1fd05ba5 988 INIT_LIST_HEAD(&u->link);
6e1ce3c3
LT
989 mutex_init(&u->iolock); /* single task reading lock */
990 mutex_init(&u->bindlock); /* single task binding lock */
1da177e4 991 init_waitqueue_head(&u->peer_wait);
7d267278 992 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
3c32da19 993 memset(&u->scm_stat, 0, sizeof(struct scm_stat));
79b05bea 994 unix_insert_unbound_socket(net, sk);
f4bd73b5 995
340c3d33 996 sock_prot_inuse_add(net, sk->sk_prot, 1);
f4bd73b5 997
1da177e4 998 return sk;
f4bd73b5
KI
999
1000err:
1001 atomic_long_dec(&unix_nr_socks);
1002 return ERR_PTR(err);
1da177e4
LT
1003}
1004
3f378b68
EP
1005static int unix_create(struct net *net, struct socket *sock, int protocol,
1006 int kern)
1da177e4 1007{
f4bd73b5
KI
1008 struct sock *sk;
1009
1da177e4
LT
1010 if (protocol && protocol != PF_UNIX)
1011 return -EPROTONOSUPPORT;
1012
1013 sock->state = SS_UNCONNECTED;
1014
1015 switch (sock->type) {
1016 case SOCK_STREAM:
1017 sock->ops = &unix_stream_ops;
1018 break;
1019 /*
1020 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
1021 * nothing uses it.
1022 */
1023 case SOCK_RAW:
e27dfcea 1024 sock->type = SOCK_DGRAM;
df561f66 1025 fallthrough;
1da177e4
LT
1026 case SOCK_DGRAM:
1027 sock->ops = &unix_dgram_ops;
1028 break;
1029 case SOCK_SEQPACKET:
1030 sock->ops = &unix_seqpacket_ops;
1031 break;
1032 default:
1033 return -ESOCKTNOSUPPORT;
1034 }
1035
f4bd73b5
KI
1036 sk = unix_create1(net, sock, kern, sock->type);
1037 if (IS_ERR(sk))
1038 return PTR_ERR(sk);
1039
1040 return 0;
1da177e4
LT
1041}
1042
1043static int unix_release(struct socket *sock)
1044{
1045 struct sock *sk = sock->sk;
1046
1047 if (!sk)
1048 return 0;
1049
c7272e15 1050 sk->sk_prot->close(sk, 0);
ded34e0f 1051 unix_release_sock(sk, 0);
1da177e4
LT
1052 sock->sk = NULL;
1053
ded34e0f 1054 return 0;
1da177e4
LT
1055}
1056
51bae889
KI
1057static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len,
1058 int type)
fa39ef0e
KI
1059{
1060 struct inode *inode;
1061 struct path path;
1062 struct sock *sk;
1063 int err;
1064
d2d8c9fd 1065 unix_mkname_bsd(sunaddr, addr_len);
fa39ef0e
KI
1066 err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path);
1067 if (err)
1068 goto fail;
1069
1070 err = path_permission(&path, MAY_WRITE);
1071 if (err)
1072 goto path_put;
1073
1074 err = -ECONNREFUSED;
1075 inode = d_backing_inode(path.dentry);
1076 if (!S_ISSOCK(inode->i_mode))
1077 goto path_put;
1078
51bae889 1079 sk = unix_find_socket_byinode(inode);
fa39ef0e
KI
1080 if (!sk)
1081 goto path_put;
1082
1083 err = -EPROTOTYPE;
1084 if (sk->sk_type == type)
1085 touch_atime(&path);
1086 else
1087 goto sock_put;
1088
1089 path_put(&path);
1090
1091 return sk;
1092
1093sock_put:
1094 sock_put(sk);
1095path_put:
1096 path_put(&path);
1097fail:
aed26f55 1098 return ERR_PTR(err);
fa39ef0e
KI
1099}
1100
1101static struct sock *unix_find_abstract(struct net *net,
1102 struct sockaddr_un *sunaddr,
d2d8c9fd 1103 int addr_len, int type)
fa39ef0e 1104{
f452be49 1105 unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type);
fa39ef0e
KI
1106 struct dentry *dentry;
1107 struct sock *sk;
1108
f452be49 1109 sk = unix_find_socket_byname(net, sunaddr, addr_len, hash);
aed26f55
KI
1110 if (!sk)
1111 return ERR_PTR(-ECONNREFUSED);
fa39ef0e
KI
1112
1113 dentry = unix_sk(sk)->path.dentry;
1114 if (dentry)
1115 touch_atime(&unix_sk(sk)->path);
1116
1117 return sk;
1118}
1119
1120static struct sock *unix_find_other(struct net *net,
1121 struct sockaddr_un *sunaddr,
d2d8c9fd 1122 int addr_len, int type)
fa39ef0e
KI
1123{
1124 struct sock *sk;
1125
1126 if (sunaddr->sun_path[0])
51bae889 1127 sk = unix_find_bsd(sunaddr, addr_len, type);
fa39ef0e 1128 else
d2d8c9fd 1129 sk = unix_find_abstract(net, sunaddr, addr_len, type);
fa39ef0e
KI
1130
1131 return sk;
1132}
1133
f7ed31f4 1134static int unix_autobind(struct sock *sk)
1da177e4 1135{
afd20b92 1136 unsigned int new_hash, old_hash = sk->sk_hash;
1da177e4 1137 struct unix_sock *u = unix_sk(sk);
79b05bea 1138 struct net *net = sock_net(sk);
6eba6a37 1139 struct unix_address *addr;
9acbc584 1140 u32 lastnum, ordernum;
f7ed31f4 1141 int err;
1da177e4 1142
6e1ce3c3 1143 err = mutex_lock_interruptible(&u->bindlock);
37ab4fa7
SL
1144 if (err)
1145 return err;
1da177e4 1146
1da177e4
LT
1147 if (u->addr)
1148 goto out;
1149
1150 err = -ENOMEM;
755662ce
KI
1151 addr = kzalloc(sizeof(*addr) +
1152 offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL);
1da177e4
LT
1153 if (!addr)
1154 goto out;
1155
9acbc584 1156 addr->len = offsetof(struct sockaddr_un, sun_path) + 6;
1da177e4 1157 addr->name->sun_family = AF_UNIX;
8c9814b9 1158 refcount_set(&addr->refcnt, 1);
1da177e4 1159
a251c17a 1160 ordernum = get_random_u32();
9acbc584 1161 lastnum = ordernum & 0xFFFFF;
1da177e4 1162retry:
9acbc584
KI
1163 ordernum = (ordernum + 1) & 0xFFFFF;
1164 sprintf(addr->name->sun_path + 1, "%05x", ordernum);
1da177e4 1165
e6b4b873 1166 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
79b05bea 1167 unix_table_double_lock(net, old_hash, new_hash);
1da177e4 1168
79b05bea
KI
1169 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) {
1170 unix_table_double_unlock(net, old_hash, new_hash);
afd20b92 1171
9acbc584 1172 /* __unix_find_socket_byname() may take long time if many names
8df73ff9
TH
1173 * are already in use.
1174 */
1175 cond_resched();
9acbc584
KI
1176
1177 if (ordernum == lastnum) {
1178 /* Give up if all names seems to be in use. */
8df73ff9 1179 err = -ENOSPC;
9acbc584 1180 unix_release_addr(addr);
8df73ff9
TH
1181 goto out;
1182 }
9acbc584 1183
1da177e4
LT
1184 goto retry;
1185 }
1da177e4 1186
cf2f225e 1187 __unix_set_addr_hash(net, sk, addr, new_hash);
79b05bea 1188 unix_table_double_unlock(net, old_hash, new_hash);
1da177e4
LT
1189 err = 0;
1190
6e1ce3c3 1191out: mutex_unlock(&u->bindlock);
1da177e4
LT
1192 return err;
1193}
1194
12f21c49
KI
1195static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
1196 int addr_len)
faf02010 1197{
71e6be6f
AV
1198 umode_t mode = S_IFSOCK |
1199 (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
afd20b92 1200 unsigned int new_hash, old_hash = sk->sk_hash;
12f21c49 1201 struct unix_sock *u = unix_sk(sk);
79b05bea 1202 struct net *net = sock_net(sk);
abf08576 1203 struct mnt_idmap *idmap;
12f21c49 1204 struct unix_address *addr;
38f7bd94 1205 struct dentry *dentry;
12f21c49 1206 struct path parent;
71e6be6f
AV
1207 int err;
1208
ecb4534b 1209 addr_len = unix_mkname_bsd(sunaddr, addr_len);
12f21c49
KI
1210 addr = unix_create_addr(sunaddr, addr_len);
1211 if (!addr)
1212 return -ENOMEM;
1213
38f7bd94
LT
1214 /*
1215 * Get the parent directory, calculate the hash for last
1216 * component.
1217 */
71e6be6f 1218 dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0);
12f21c49
KI
1219 if (IS_ERR(dentry)) {
1220 err = PTR_ERR(dentry);
1221 goto out;
1222 }
faf02010 1223
38f7bd94
LT
1224 /*
1225 * All right, let's create it.
1226 */
abf08576 1227 idmap = mnt_idmap(parent.mnt);
71e6be6f 1228 err = security_path_mknod(&parent, dentry, mode, 0);
56c1731b 1229 if (!err)
abf08576 1230 err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0);
c0c3b8d3 1231 if (err)
12f21c49 1232 goto out_path;
fa42d910 1233 err = mutex_lock_interruptible(&u->bindlock);
c0c3b8d3
AV
1234 if (err)
1235 goto out_unlink;
1236 if (u->addr)
1237 goto out_unlock;
fa42d910 1238
e6b4b873 1239 new_hash = unix_bsd_hash(d_backing_inode(dentry));
79b05bea 1240 unix_table_double_lock(net, old_hash, new_hash);
56c1731b
AV
1241 u->path.mnt = mntget(parent.mnt);
1242 u->path.dentry = dget(dentry);
cf2f225e 1243 __unix_set_addr_hash(net, sk, addr, new_hash);
79b05bea 1244 unix_table_double_unlock(net, old_hash, new_hash);
51bae889 1245 unix_insert_bsd_socket(sk);
fa42d910 1246 mutex_unlock(&u->bindlock);
56c1731b 1247 done_path_create(&parent, dentry);
fa42d910 1248 return 0;
c0c3b8d3
AV
1249
1250out_unlock:
1251 mutex_unlock(&u->bindlock);
1252 err = -EINVAL;
1253out_unlink:
1254 /* failed after successful mknod? unlink what we'd created... */
abf08576 1255 vfs_unlink(idmap, d_inode(parent.dentry), dentry, NULL);
12f21c49 1256out_path:
c0c3b8d3 1257 done_path_create(&parent, dentry);
12f21c49
KI
1258out:
1259 unix_release_addr(addr);
1260 return err == -EEXIST ? -EADDRINUSE : err;
fa42d910
AV
1261}
1262
12f21c49
KI
1263static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
1264 int addr_len)
fa42d910 1265{
afd20b92 1266 unsigned int new_hash, old_hash = sk->sk_hash;
fa42d910 1267 struct unix_sock *u = unix_sk(sk);
79b05bea 1268 struct net *net = sock_net(sk);
12f21c49 1269 struct unix_address *addr;
fa42d910
AV
1270 int err;
1271
12f21c49
KI
1272 addr = unix_create_addr(sunaddr, addr_len);
1273 if (!addr)
1274 return -ENOMEM;
1275
fa42d910
AV
1276 err = mutex_lock_interruptible(&u->bindlock);
1277 if (err)
12f21c49 1278 goto out;
fa42d910
AV
1279
1280 if (u->addr) {
12f21c49
KI
1281 err = -EINVAL;
1282 goto out_mutex;
fa42d910
AV
1283 }
1284
e6b4b873 1285 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
79b05bea 1286 unix_table_double_lock(net, old_hash, new_hash);
12f21c49 1287
79b05bea 1288 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash))
12f21c49
KI
1289 goto out_spin;
1290
cf2f225e 1291 __unix_set_addr_hash(net, sk, addr, new_hash);
79b05bea 1292 unix_table_double_unlock(net, old_hash, new_hash);
fa42d910
AV
1293 mutex_unlock(&u->bindlock);
1294 return 0;
12f21c49
KI
1295
1296out_spin:
79b05bea 1297 unix_table_double_unlock(net, old_hash, new_hash);
12f21c49
KI
1298 err = -EADDRINUSE;
1299out_mutex:
1300 mutex_unlock(&u->bindlock);
1301out:
1302 unix_release_addr(addr);
1303 return err;
fa42d910
AV
1304}
1305
1da177e4
LT
1306static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1307{
e27dfcea 1308 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
5c32a3ed 1309 struct sock *sk = sock->sk;
5c32a3ed 1310 int err;
1da177e4 1311
b8a58aa6
KI
1312 if (addr_len == offsetof(struct sockaddr_un, sun_path) &&
1313 sunaddr->sun_family == AF_UNIX)
f7ed31f4 1314 return unix_autobind(sk);
1da177e4 1315
b8a58aa6
KI
1316 err = unix_validate_addr(sunaddr, addr_len);
1317 if (err)
1318 return err;
1319
12f21c49
KI
1320 if (sunaddr->sun_path[0])
1321 err = unix_bind_bsd(sk, sunaddr, addr_len);
fa42d910 1322 else
12f21c49
KI
1323 err = unix_bind_abstract(sk, sunaddr, addr_len);
1324
1325 return err;
1da177e4
LT
1326}
1327
278a3de5
DM
1328static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1329{
1330 if (unlikely(sk1 == sk2) || !sk2) {
1331 unix_state_lock(sk1);
1332 return;
1333 }
4d322dce
ED
1334 if (sk1 > sk2)
1335 swap(sk1, sk2);
1336
1337 unix_state_lock(sk1);
1338 unix_state_lock_nested(sk2, U_LOCK_SECOND);
278a3de5
DM
1339}
1340
1341static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1342{
1343 if (unlikely(sk1 == sk2) || !sk2) {
1344 unix_state_unlock(sk1);
1345 return;
1346 }
1347 unix_state_unlock(sk1);
1348 unix_state_unlock(sk2);
1349}
1350
1da177e4
LT
1351static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1352 int alen, int flags)
1353{
e27dfcea 1354 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
340c3d33 1355 struct sock *sk = sock->sk;
1da177e4 1356 struct sock *other;
1da177e4
LT
1357 int err;
1358
defbcf2d
MJ
1359 err = -EINVAL;
1360 if (alen < offsetofend(struct sockaddr, sa_family))
1361 goto out;
1362
1da177e4 1363 if (addr->sa_family != AF_UNSPEC) {
b8a58aa6
KI
1364 err = unix_validate_addr(sunaddr, alen);
1365 if (err)
1366 goto out;
1367
859051dd
DDM
1368 err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, addr, &alen);
1369 if (err)
1370 goto out;
1371
5e2ff670
AM
1372 if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
1373 test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
f7ed31f4
KI
1374 !unix_sk(sk)->addr) {
1375 err = unix_autobind(sk);
1376 if (err)
1377 goto out;
1378 }
1da177e4 1379
278a3de5 1380restart:
340c3d33 1381 other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type);
aed26f55
KI
1382 if (IS_ERR(other)) {
1383 err = PTR_ERR(other);
1da177e4 1384 goto out;
aed26f55 1385 }
1da177e4 1386
278a3de5
DM
1387 unix_state_double_lock(sk, other);
1388
1389 /* Apparently VFS overslept socket death. Retry. */
1390 if (sock_flag(other, SOCK_DEAD)) {
1391 unix_state_double_unlock(sk, other);
1392 sock_put(other);
1393 goto restart;
1394 }
1da177e4
LT
1395
1396 err = -EPERM;
1397 if (!unix_may_send(sk, other))
1398 goto out_unlock;
1399
1400 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1401 if (err)
1402 goto out_unlock;
1403
dc56ad70 1404 sk->sk_state = other->sk_state = TCP_ESTABLISHED;
1da177e4
LT
1405 } else {
1406 /*
1407 * 1003.1g breaking connected state with AF_UNSPEC
1408 */
1409 other = NULL;
278a3de5 1410 unix_state_double_lock(sk, other);
1da177e4
LT
1411 }
1412
1413 /*
1414 * If it was connected, reconnect.
1415 */
1416 if (unix_peer(sk)) {
1417 struct sock *old_peer = unix_peer(sk);
dc56ad70 1418
e27dfcea 1419 unix_peer(sk) = other;
dc56ad70
ED
1420 if (!other)
1421 sk->sk_state = TCP_CLOSE;
7d267278
RW
1422 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1423
278a3de5 1424 unix_state_double_unlock(sk, other);
1da177e4
LT
1425
1426 if (other != old_peer)
1427 unix_dgram_disconnected(sk, old_peer);
1428 sock_put(old_peer);
1429 } else {
e27dfcea 1430 unix_peer(sk) = other;
278a3de5 1431 unix_state_double_unlock(sk, other);
1da177e4 1432 }
83301b53 1433
ac7bfa62 1434 return 0;
1da177e4
LT
1435
1436out_unlock:
278a3de5 1437 unix_state_double_unlock(sk, other);
1da177e4
LT
1438 sock_put(other);
1439out:
1440 return err;
1441}
1442
1443static long unix_wait_for_peer(struct sock *other, long timeo)
48851e9e 1444 __releases(&unix_sk(other)->lock)
1da177e4
LT
1445{
1446 struct unix_sock *u = unix_sk(other);
1447 int sched;
1448 DEFINE_WAIT(wait);
1449
1450 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1451
1452 sched = !sock_flag(other, SOCK_DEAD) &&
1453 !(other->sk_shutdown & RCV_SHUTDOWN) &&
679ed006 1454 unix_recvq_full_lockless(other);
1da177e4 1455
1c92b4e5 1456 unix_state_unlock(other);
1da177e4
LT
1457
1458 if (sched)
1459 timeo = schedule_timeout(timeo);
1460
1461 finish_wait(&u->peer_wait, &wait);
1462 return timeo;
1463}
1464
1465static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1466 int addr_len, int flags)
1467{
e27dfcea 1468 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
340c3d33 1469 struct sock *sk = sock->sk, *newsk = NULL, *other = NULL;
1da177e4 1470 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
340c3d33 1471 struct net *net = sock_net(sk);
1da177e4 1472 struct sk_buff *skb = NULL;
1da177e4 1473 long timeo;
340c3d33
KI
1474 int err;
1475 int st;
1da177e4 1476
b8a58aa6
KI
1477 err = unix_validate_addr(sunaddr, addr_len);
1478 if (err)
1479 goto out;
1480
859051dd
DDM
1481 err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, &addr_len);
1482 if (err)
1483 goto out;
1484
5e2ff670
AM
1485 if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
1486 test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) {
f7ed31f4
KI
1487 err = unix_autobind(sk);
1488 if (err)
1489 goto out;
1490 }
1da177e4
LT
1491
1492 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1493
1494 /* First of all allocate resources.
1495 If we will make it after state is locked,
1496 we will have to recheck all again in any case.
1497 */
1498
1da177e4 1499 /* create new sock for complete connection */
340c3d33 1500 newsk = unix_create1(net, NULL, 0, sock->type);
f4bd73b5
KI
1501 if (IS_ERR(newsk)) {
1502 err = PTR_ERR(newsk);
1503 newsk = NULL;
1da177e4 1504 goto out;
f4bd73b5
KI
1505 }
1506
1507 err = -ENOMEM;
1da177e4
LT
1508
1509 /* Allocate skb for sending to listening sock */
1510 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1511 if (skb == NULL)
1512 goto out;
1513
1514restart:
1515 /* Find listening sock. */
d2d8c9fd 1516 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type);
aed26f55
KI
1517 if (IS_ERR(other)) {
1518 err = PTR_ERR(other);
1519 other = NULL;
1da177e4 1520 goto out;
aed26f55 1521 }
1da177e4
LT
1522
1523 /* Latch state of peer */
1c92b4e5 1524 unix_state_lock(other);
1da177e4
LT
1525
1526 /* Apparently VFS overslept socket death. Retry. */
1527 if (sock_flag(other, SOCK_DEAD)) {
1c92b4e5 1528 unix_state_unlock(other);
1da177e4
LT
1529 sock_put(other);
1530 goto restart;
1531 }
1532
1533 err = -ECONNREFUSED;
1534 if (other->sk_state != TCP_LISTEN)
1535 goto out_unlock;
77238f2b
TS
1536 if (other->sk_shutdown & RCV_SHUTDOWN)
1537 goto out_unlock;
1da177e4 1538
3c73419c 1539 if (unix_recvq_full(other)) {
1da177e4
LT
1540 err = -EAGAIN;
1541 if (!timeo)
1542 goto out_unlock;
1543
1544 timeo = unix_wait_for_peer(other, timeo);
1545
1546 err = sock_intr_errno(timeo);
1547 if (signal_pending(current))
1548 goto out;
1549 sock_put(other);
1550 goto restart;
ac7bfa62 1551 }
1da177e4
LT
1552
1553 /* Latch our state.
1554
e5537bfc 1555 It is tricky place. We need to grab our state lock and cannot
1da177e4
LT
1556 drop lock on peer. It is dangerous because deadlock is
1557 possible. Connect to self case and simultaneous
1558 attempt to connect are eliminated by checking socket
1559 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1560 check this before attempt to grab lock.
1561
1562 Well, and we have to recheck the state after socket locked.
1563 */
1564 st = sk->sk_state;
1565
1566 switch (st) {
1567 case TCP_CLOSE:
1568 /* This is ok... continue with connect */
1569 break;
1570 case TCP_ESTABLISHED:
1571 /* Socket is already connected */
1572 err = -EISCONN;
1573 goto out_unlock;
1574 default:
1575 err = -EINVAL;
1576 goto out_unlock;
1577 }
1578
4d322dce 1579 unix_state_lock_nested(sk, U_LOCK_SECOND);
1da177e4
LT
1580
1581 if (sk->sk_state != st) {
1c92b4e5
DM
1582 unix_state_unlock(sk);
1583 unix_state_unlock(other);
1da177e4
LT
1584 sock_put(other);
1585 goto restart;
1586 }
1587
3610cda5 1588 err = security_unix_stream_connect(sk, other, newsk);
1da177e4 1589 if (err) {
1c92b4e5 1590 unix_state_unlock(sk);
1da177e4
LT
1591 goto out_unlock;
1592 }
1593
1594 /* The way is open! Fastly set all the necessary fields... */
1595
1596 sock_hold(sk);
1597 unix_peer(newsk) = sk;
1598 newsk->sk_state = TCP_ESTABLISHED;
1599 newsk->sk_type = sk->sk_type;
109f6e39 1600 init_peercred(newsk);
1da177e4 1601 newu = unix_sk(newsk);
eaefd110 1602 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1da177e4
LT
1603 otheru = unix_sk(other);
1604
ae3b5641
AV
1605 /* copy address information from listening to new sock
1606 *
1607 * The contents of *(otheru->addr) and otheru->path
1608 * are seen fully set up here, since we have found
2f7ca90a
KI
1609 * otheru in hash under its lock. Insertion into the
1610 * hash chain we'd found it in had been done in an
1611 * earlier critical area protected by the chain's lock,
ae3b5641
AV
1612 * the same one where we'd set *(otheru->addr) contents,
1613 * as well as otheru->path and otheru->addr itself.
1614 *
1615 * Using smp_store_release() here to set newu->addr
1616 * is enough to make those stores, as well as stores
1617 * to newu->path visible to anyone who gets newu->addr
1618 * by smp_load_acquire(). IOW, the same warranties
1619 * as for unix_sock instances bound in unix_bind() or
1620 * in unix_autobind().
1621 */
40ffe67d
AV
1622 if (otheru->path.dentry) {
1623 path_get(&otheru->path);
1624 newu->path = otheru->path;
1da177e4 1625 }
ae3b5641
AV
1626 refcount_inc(&otheru->addr->refcnt);
1627 smp_store_release(&newu->addr, otheru->addr);
1da177e4
LT
1628
1629 /* Set credentials */
109f6e39 1630 copy_peercred(sk, other);
1da177e4 1631
1da177e4
LT
1632 sock->state = SS_CONNECTED;
1633 sk->sk_state = TCP_ESTABLISHED;
830a1e5c
BL
1634 sock_hold(newsk);
1635
4e857c58 1636 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
830a1e5c 1637 unix_peer(sk) = newsk;
1da177e4 1638
1c92b4e5 1639 unix_state_unlock(sk);
1da177e4 1640
4e03d073 1641 /* take ten and send info to listening sock */
1da177e4
LT
1642 spin_lock(&other->sk_receive_queue.lock);
1643 __skb_queue_tail(&other->sk_receive_queue, skb);
1da177e4 1644 spin_unlock(&other->sk_receive_queue.lock);
1c92b4e5 1645 unix_state_unlock(other);
676d2369 1646 other->sk_data_ready(other);
1da177e4
LT
1647 sock_put(other);
1648 return 0;
1649
1650out_unlock:
1651 if (other)
1c92b4e5 1652 unix_state_unlock(other);
1da177e4
LT
1653
1654out:
40d44446 1655 kfree_skb(skb);
1da177e4
LT
1656 if (newsk)
1657 unix_release_sock(newsk, 0);
1658 if (other)
1659 sock_put(other);
1660 return err;
1661}
1662
1663static int unix_socketpair(struct socket *socka, struct socket *sockb)
1664{
e27dfcea 1665 struct sock *ska = socka->sk, *skb = sockb->sk;
1da177e4
LT
1666
1667 /* Join our sockets back to back */
1668 sock_hold(ska);
1669 sock_hold(skb);
e27dfcea
JK
1670 unix_peer(ska) = skb;
1671 unix_peer(skb) = ska;
109f6e39
EB
1672 init_peercred(ska);
1673 init_peercred(skb);
1da177e4 1674
83301b53
CW
1675 ska->sk_state = TCP_ESTABLISHED;
1676 skb->sk_state = TCP_ESTABLISHED;
1677 socka->state = SS_CONNECTED;
1678 sockb->state = SS_CONNECTED;
1da177e4
LT
1679 return 0;
1680}
1681
90c6bd34
DB
1682static void unix_sock_inherit_flags(const struct socket *old,
1683 struct socket *new)
1684{
1685 if (test_bit(SOCK_PASSCRED, &old->flags))
1686 set_bit(SOCK_PASSCRED, &new->flags);
5e2ff670
AM
1687 if (test_bit(SOCK_PASSPIDFD, &old->flags))
1688 set_bit(SOCK_PASSPIDFD, &new->flags);
90c6bd34
DB
1689 if (test_bit(SOCK_PASSSEC, &old->flags))
1690 set_bit(SOCK_PASSSEC, &new->flags);
1691}
1692
cdfbabfb
DH
1693static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1694 bool kern)
1da177e4
LT
1695{
1696 struct sock *sk = sock->sk;
1697 struct sock *tsk;
1698 struct sk_buff *skb;
1699 int err;
1700
1701 err = -EOPNOTSUPP;
6eba6a37 1702 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1da177e4
LT
1703 goto out;
1704
1705 err = -EINVAL;
1706 if (sk->sk_state != TCP_LISTEN)
1707 goto out;
1708
1709 /* If socket state is TCP_LISTEN it cannot change (for now...),
1710 * so that no locks are necessary.
1711 */
1712
f4b41f06
OH
1713 skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
1714 &err);
1da177e4
LT
1715 if (!skb) {
1716 /* This means receive shutdown. */
1717 if (err == 0)
1718 err = -EINVAL;
1719 goto out;
1720 }
1721
1722 tsk = skb->sk;
1723 skb_free_datagram(sk, skb);
1724 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1725
1726 /* attach accepted sock to socket */
1c92b4e5 1727 unix_state_lock(tsk);
1da177e4 1728 newsock->state = SS_CONNECTED;
90c6bd34 1729 unix_sock_inherit_flags(sock, newsock);
1da177e4 1730 sock_graft(tsk, newsock);
1c92b4e5 1731 unix_state_unlock(tsk);
1da177e4
LT
1732 return 0;
1733
1734out:
1735 return err;
1736}
1737
1738
9b2c45d4 1739static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1da177e4
LT
1740{
1741 struct sock *sk = sock->sk;
ae3b5641 1742 struct unix_address *addr;
13cfa97b 1743 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1da177e4
LT
1744 int err = 0;
1745
1746 if (peer) {
1747 sk = unix_peer_get(sk);
1748
1749 err = -ENOTCONN;
1750 if (!sk)
1751 goto out;
1752 err = 0;
1753 } else {
1754 sock_hold(sk);
1755 }
1756
ae3b5641
AV
1757 addr = smp_load_acquire(&unix_sk(sk)->addr);
1758 if (!addr) {
1da177e4
LT
1759 sunaddr->sun_family = AF_UNIX;
1760 sunaddr->sun_path[0] = 0;
755662ce 1761 err = offsetof(struct sockaddr_un, sun_path);
1da177e4 1762 } else {
9b2c45d4
DV
1763 err = addr->len;
1764 memcpy(sunaddr, addr->name, addr->len);
859051dd
DDM
1765
1766 if (peer)
1767 BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err,
1768 CGROUP_UNIX_GETPEERNAME);
1769 else
1770 BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err,
1771 CGROUP_UNIX_GETSOCKNAME);
1da177e4 1772 }
1da177e4
LT
1773 sock_put(sk);
1774out:
1775 return err;
1776}
1777
cbcf0112
MS
1778static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
1779{
1780 scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1781
1782 /*
1783 * Garbage collection of unix sockets starts by selecting a set of
1784 * candidate sockets which have reference only from being in flight
1785 * (total_refs == inflight_refs). This condition is checked once during
1786 * the candidate collection phase, and candidates are marked as such, so
1787 * that non-candidates can later be ignored. While inflight_refs is
1788 * protected by unix_gc_lock, total_refs (file count) is not, hence this
1789 * is an instantaneous decision.
1790 *
1791 * Once a candidate, however, the socket must not be reinstalled into a
1792 * file descriptor while the garbage collection is in progress.
1793 *
1794 * If the above conditions are met, then the directed graph of
1795 * candidates (*) does not change while unix_gc_lock is held.
1796 *
1797 * Any operations that changes the file count through file descriptors
1798 * (dup, close, sendmsg) does not change the graph since candidates are
1799 * not installed in fds.
1800 *
1801 * Dequeing a candidate via recvmsg would install it into an fd, but
1802 * that takes unix_gc_lock to decrement the inflight count, so it's
1803 * serialized with garbage collection.
1804 *
1805 * MSG_PEEK is special in that it does not change the inflight count,
1806 * yet does install the socket into an fd. The following lock/unlock
1807 * pair is to ensure serialization with garbage collection. It must be
1808 * done between incrementing the file count and installing the file into
1809 * an fd.
1810 *
1811 * If garbage collection starts after the barrier provided by the
1812 * lock/unlock, then it will see the elevated refcount and not mark this
1813 * as a candidate. If a garbage collection is already in progress
1814 * before the file count was incremented, then the lock/unlock pair will
1815 * ensure that garbage collection is finished before progressing to
1816 * installing the fd.
1817 *
1818 * (*) A -> B where B is on the queue of A or B is on the queue of C
1819 * which is on the queue of listening socket A.
1820 */
1821 spin_lock(&unix_gc_lock);
1822 spin_unlock(&unix_gc_lock);
1823}
1824
f78a5fda 1825static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
7361c36c
EB
1826{
1827 int err = 0;
16e57262 1828
f78a5fda 1829 UNIXCB(skb).pid = get_pid(scm->pid);
6b0ee8c0
EB
1830 UNIXCB(skb).uid = scm->creds.uid;
1831 UNIXCB(skb).gid = scm->creds.gid;
7361c36c 1832 UNIXCB(skb).fp = NULL;
37a9a8df 1833 unix_get_secdata(scm, skb);
7361c36c
EB
1834 if (scm->fp && send_fds)
1835 err = unix_attach_fds(scm, skb);
1836
1837 skb->destructor = unix_destruct_scm;
1838 return err;
1839}
1840
9490f886
HFS
1841static bool unix_passcred_enabled(const struct socket *sock,
1842 const struct sock *other)
1843{
1844 return test_bit(SOCK_PASSCRED, &sock->flags) ||
5e2ff670 1845 test_bit(SOCK_PASSPIDFD, &sock->flags) ||
9490f886 1846 !other->sk_socket ||
5e2ff670
AM
1847 test_bit(SOCK_PASSCRED, &other->sk_socket->flags) ||
1848 test_bit(SOCK_PASSPIDFD, &other->sk_socket->flags);
9490f886
HFS
1849}
1850
16e57262
ED
1851/*
1852 * Some apps rely on write() giving SCM_CREDENTIALS
1853 * We include credentials if source or destination socket
1854 * asserted SOCK_PASSCRED.
1855 */
1856static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1857 const struct sock *other)
1858{
6b0ee8c0 1859 if (UNIXCB(skb).pid)
16e57262 1860 return;
9490f886 1861 if (unix_passcred_enabled(sock, other)) {
16e57262 1862 UNIXCB(skb).pid = get_pid(task_tgid(current));
6e0895c2 1863 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
16e57262
ED
1864 }
1865}
1866
9490f886
HFS
1867static bool unix_skb_scm_eq(struct sk_buff *skb,
1868 struct scm_cookie *scm)
1869{
b146cbf2
KC
1870 return UNIXCB(skb).pid == scm->pid &&
1871 uid_eq(UNIXCB(skb).uid, scm->creds.uid) &&
1872 gid_eq(UNIXCB(skb).gid, scm->creds.gid) &&
9490f886
HFS
1873 unix_secdata_eq(scm, skb);
1874}
1875
3c32da19
KT
1876static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
1877{
1878 struct scm_fp_list *fp = UNIXCB(skb).fp;
1879 struct unix_sock *u = unix_sk(sk);
1880
3c32da19 1881 if (unlikely(fp && fp->count))
7782040b 1882 atomic_add(fp->count, &u->scm_stat.nr_fds);
3c32da19
KT
1883}
1884
1885static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
1886{
1887 struct scm_fp_list *fp = UNIXCB(skb).fp;
1888 struct unix_sock *u = unix_sk(sk);
1889
3c32da19 1890 if (unlikely(fp && fp->count))
7782040b 1891 atomic_sub(fp->count, &u->scm_stat.nr_fds);
3c32da19
KT
1892}
1893
1da177e4
LT
1894/*
1895 * Send AF_UNIX data.
1896 */
1897
1b784140
YX
1898static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1899 size_t len)
1da177e4 1900{
342dfc30 1901 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
340c3d33
KI
1902 struct sock *sk = sock->sk, *other = NULL;
1903 struct unix_sock *u = unix_sk(sk);
7cc05662 1904 struct scm_cookie scm;
340c3d33 1905 struct sk_buff *skb;
eb6a2481 1906 int data_len = 0;
7d267278 1907 int sk_locked;
340c3d33
KI
1908 long timeo;
1909 int err;
1da177e4 1910
5f23b734 1911 wait_for_unix_gc();
7cc05662 1912 err = scm_send(sock, msg, &scm, false);
1da177e4
LT
1913 if (err < 0)
1914 return err;
1915
1916 err = -EOPNOTSUPP;
1917 if (msg->msg_flags&MSG_OOB)
1918 goto out;
1919
1920 if (msg->msg_namelen) {
b8a58aa6
KI
1921 err = unix_validate_addr(sunaddr, msg->msg_namelen);
1922 if (err)
1923 goto out;
859051dd
DDM
1924
1925 err = BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk,
1926 msg->msg_name,
1927 &msg->msg_namelen,
1928 NULL);
1929 if (err)
1930 goto out;
1da177e4
LT
1931 } else {
1932 sunaddr = NULL;
1933 err = -ENOTCONN;
1934 other = unix_peer_get(sk);
1935 if (!other)
1936 goto out;
1937 }
1938
5e2ff670
AM
1939 if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
1940 test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) {
f7ed31f4
KI
1941 err = unix_autobind(sk);
1942 if (err)
1943 goto out;
1944 }
1da177e4
LT
1945
1946 err = -EMSGSIZE;
1947 if (len > sk->sk_sndbuf - 32)
1948 goto out;
1949
31ff6aa5 1950 if (len > SKB_MAX_ALLOC) {
eb6a2481
ED
1951 data_len = min_t(size_t,
1952 len - SKB_MAX_ALLOC,
1953 MAX_SKB_FRAGS * PAGE_SIZE);
31ff6aa5
KT
1954 data_len = PAGE_ALIGN(data_len);
1955
1956 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1957 }
eb6a2481
ED
1958
1959 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
28d64271
ED
1960 msg->msg_flags & MSG_DONTWAIT, &err,
1961 PAGE_ALLOC_COSTLY_ORDER);
e27dfcea 1962 if (skb == NULL)
1da177e4
LT
1963 goto out;
1964
7cc05662 1965 err = unix_scm_to_skb(&scm, skb, true);
25888e30 1966 if (err < 0)
7361c36c 1967 goto out_free;
877ce7c1 1968
eb6a2481
ED
1969 skb_put(skb, len - data_len);
1970 skb->data_len = data_len;
1971 skb->len = len;
c0371da6 1972 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1da177e4
LT
1973 if (err)
1974 goto out_free;
1975
1976 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1977
1978restart:
1979 if (!other) {
1980 err = -ECONNRESET;
1981 if (sunaddr == NULL)
1982 goto out_free;
1983
340c3d33 1984 other = unix_find_other(sock_net(sk), sunaddr, msg->msg_namelen,
d2d8c9fd 1985 sk->sk_type);
aed26f55
KI
1986 if (IS_ERR(other)) {
1987 err = PTR_ERR(other);
1988 other = NULL;
1da177e4 1989 goto out_free;
aed26f55 1990 }
1da177e4
LT
1991 }
1992
d6ae3bae
AC
1993 if (sk_filter(other, skb) < 0) {
1994 /* Toss the packet but do not return any error to the sender */
1995 err = len;
1996 goto out_free;
1997 }
1998
7d267278 1999 sk_locked = 0;
1c92b4e5 2000 unix_state_lock(other);
7d267278 2001restart_locked:
1da177e4
LT
2002 err = -EPERM;
2003 if (!unix_may_send(sk, other))
2004 goto out_unlock;
2005
7d267278 2006 if (unlikely(sock_flag(other, SOCK_DEAD))) {
1da177e4
LT
2007 /*
2008 * Check with 1003.1g - what should
2009 * datagram error
2010 */
1c92b4e5 2011 unix_state_unlock(other);
1da177e4
LT
2012 sock_put(other);
2013
7d267278
RW
2014 if (!sk_locked)
2015 unix_state_lock(sk);
2016
1da177e4 2017 err = 0;
3ff8bff7
KT
2018 if (sk->sk_type == SOCK_SEQPACKET) {
2019 /* We are here only when racing with unix_release_sock()
2020 * is clearing @other. Never change state to TCP_CLOSE
2021 * unlike SOCK_DGRAM wants.
2022 */
2023 unix_state_unlock(sk);
2024 err = -EPIPE;
2025 } else if (unix_peer(sk) == other) {
e27dfcea 2026 unix_peer(sk) = NULL;
7d267278
RW
2027 unix_dgram_peer_wake_disconnect_wakeup(sk, other);
2028
3ff8bff7 2029 sk->sk_state = TCP_CLOSE;
1c92b4e5 2030 unix_state_unlock(sk);
1da177e4
LT
2031
2032 unix_dgram_disconnected(sk, other);
2033 sock_put(other);
2034 err = -ECONNREFUSED;
2035 } else {
1c92b4e5 2036 unix_state_unlock(sk);
1da177e4
LT
2037 }
2038
2039 other = NULL;
2040 if (err)
2041 goto out_free;
2042 goto restart;
2043 }
2044
2045 err = -EPIPE;
2046 if (other->sk_shutdown & RCV_SHUTDOWN)
2047 goto out_unlock;
2048
2049 if (sk->sk_type != SOCK_SEQPACKET) {
2050 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
2051 if (err)
2052 goto out_unlock;
2053 }
2054
a5527dda
RW
2055 /* other == sk && unix_peer(other) != sk if
2056 * - unix_peer(sk) == NULL, destination address bound to sk
2057 * - unix_peer(sk) == sk by time of get but disconnected before lock
2058 */
2059 if (other != sk &&
86b18aaa
QC
2060 unlikely(unix_peer(other) != sk &&
2061 unix_recvq_full_lockless(other))) {
7d267278
RW
2062 if (timeo) {
2063 timeo = unix_wait_for_peer(other, timeo);
2064
2065 err = sock_intr_errno(timeo);
2066 if (signal_pending(current))
2067 goto out_free;
2068
2069 goto restart;
1da177e4
LT
2070 }
2071
7d267278
RW
2072 if (!sk_locked) {
2073 unix_state_unlock(other);
2074 unix_state_double_lock(sk, other);
2075 }
1da177e4 2076
7d267278
RW
2077 if (unix_peer(sk) != other ||
2078 unix_dgram_peer_wake_me(sk, other)) {
2079 err = -EAGAIN;
2080 sk_locked = 1;
2081 goto out_unlock;
2082 }
1da177e4 2083
7d267278
RW
2084 if (!sk_locked) {
2085 sk_locked = 1;
2086 goto restart_locked;
2087 }
1da177e4
LT
2088 }
2089
7d267278
RW
2090 if (unlikely(sk_locked))
2091 unix_state_unlock(sk);
2092
3f66116e
AC
2093 if (sock_flag(other, SOCK_RCVTSTAMP))
2094 __net_timestamp(skb);
16e57262 2095 maybe_add_creds(skb, sock, other);
3c32da19 2096 scm_stat_add(other, skb);
7782040b 2097 skb_queue_tail(&other->sk_receive_queue, skb);
1c92b4e5 2098 unix_state_unlock(other);
676d2369 2099 other->sk_data_ready(other);
1da177e4 2100 sock_put(other);
7cc05662 2101 scm_destroy(&scm);
1da177e4
LT
2102 return len;
2103
2104out_unlock:
7d267278
RW
2105 if (sk_locked)
2106 unix_state_unlock(sk);
1c92b4e5 2107 unix_state_unlock(other);
1da177e4
LT
2108out_free:
2109 kfree_skb(skb);
2110out:
2111 if (other)
2112 sock_put(other);
7cc05662 2113 scm_destroy(&scm);
1da177e4
LT
2114 return err;
2115}
2116
e370a723 2117/* We use paged skbs for stream sockets, and limit occupancy to 32768
d4e9a408 2118 * bytes, and a minimum of a full page.
e370a723
ED
2119 */
2120#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
ac7bfa62 2121
4edf21aa 2122#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2aab4b96
ED
2123static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other,
2124 struct scm_cookie *scm, bool fds_sent)
314001f0
RS
2125{
2126 struct unix_sock *ousk = unix_sk(other);
2127 struct sk_buff *skb;
2128 int err = 0;
2129
2130 skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err);
2131
2132 if (!skb)
2133 return err;
2134
2aab4b96
ED
2135 err = unix_scm_to_skb(scm, skb, !fds_sent);
2136 if (err < 0) {
2137 kfree_skb(skb);
2138 return err;
2139 }
314001f0 2140 skb_put(skb, 1);
314001f0
RS
2141 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
2142
2143 if (err) {
2144 kfree_skb(skb);
2145 return err;
2146 }
2147
2148 unix_state_lock(other);
19eed721
RS
2149
2150 if (sock_flag(other, SOCK_DEAD) ||
2151 (other->sk_shutdown & RCV_SHUTDOWN)) {
2152 unix_state_unlock(other);
2153 kfree_skb(skb);
2154 return -EPIPE;
2155 }
2156
314001f0
RS
2157 maybe_add_creds(skb, sock, other);
2158 skb_get(skb);
2159
2160 if (ousk->oob_skb)
19eed721 2161 consume_skb(ousk->oob_skb);
314001f0 2162
e82025c6 2163 WRITE_ONCE(ousk->oob_skb, skb);
314001f0
RS
2164
2165 scm_stat_add(other, skb);
2166 skb_queue_tail(&other->sk_receive_queue, skb);
2167 sk_send_sigurg(other);
2168 unix_state_unlock(other);
2169 other->sk_data_ready(other);
2170
2171 return err;
2172}
2173#endif
2174
1b784140
YX
2175static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
2176 size_t len)
1da177e4 2177{
1da177e4
LT
2178 struct sock *sk = sock->sk;
2179 struct sock *other = NULL;
6eba6a37 2180 int err, size;
f78a5fda 2181 struct sk_buff *skb;
e27dfcea 2182 int sent = 0;
7cc05662 2183 struct scm_cookie scm;
8ba69ba6 2184 bool fds_sent = false;
e370a723 2185 int data_len;
1da177e4 2186
5f23b734 2187 wait_for_unix_gc();
7cc05662 2188 err = scm_send(sock, msg, &scm, false);
1da177e4
LT
2189 if (err < 0)
2190 return err;
2191
2192 err = -EOPNOTSUPP;
314001f0 2193 if (msg->msg_flags & MSG_OOB) {
4edf21aa 2194#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
314001f0
RS
2195 if (len)
2196 len--;
2197 else
2198#endif
2199 goto out_err;
2200 }
1da177e4
LT
2201
2202 if (msg->msg_namelen) {
2203 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
2204 goto out_err;
2205 } else {
1da177e4 2206 err = -ENOTCONN;
830a1e5c 2207 other = unix_peer(sk);
1da177e4
LT
2208 if (!other)
2209 goto out_err;
2210 }
2211
2212 if (sk->sk_shutdown & SEND_SHUTDOWN)
2213 goto pipe_err;
2214
6eba6a37 2215 while (sent < len) {
e370a723 2216 size = len - sent;
1da177e4 2217
a0dbf5f8
DH
2218 if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
2219 skb = sock_alloc_send_pskb(sk, 0, 0,
2220 msg->msg_flags & MSG_DONTWAIT,
2221 &err, 0);
2222 } else {
2223 /* Keep two messages in the pipe so it schedules better */
2224 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1da177e4 2225
a0dbf5f8
DH
2226 /* allow fallback to order-0 allocations */
2227 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
ac7bfa62 2228
a0dbf5f8 2229 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1da177e4 2230
a0dbf5f8 2231 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
31ff6aa5 2232
a0dbf5f8
DH
2233 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
2234 msg->msg_flags & MSG_DONTWAIT, &err,
2235 get_order(UNIX_SKB_FRAGS_SZ));
2236 }
e370a723 2237 if (!skb)
1da177e4
LT
2238 goto out_err;
2239
f78a5fda 2240 /* Only send the fds in the first buffer */
7cc05662 2241 err = unix_scm_to_skb(&scm, skb, !fds_sent);
25888e30 2242 if (err < 0) {
7361c36c 2243 kfree_skb(skb);
f78a5fda 2244 goto out_err;
6209344f 2245 }
7361c36c 2246 fds_sent = true;
1da177e4 2247
a0dbf5f8
DH
2248 if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
2249 err = skb_splice_from_iter(skb, &msg->msg_iter, size,
2250 sk->sk_allocation);
2251 if (err < 0) {
2252 kfree_skb(skb);
2253 goto out_err;
2254 }
2255 size = err;
2256 refcount_add(size, &sk->sk_wmem_alloc);
2257 } else {
2258 skb_put(skb, size - data_len);
2259 skb->data_len = data_len;
2260 skb->len = size;
2261 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
2262 if (err) {
2263 kfree_skb(skb);
2264 goto out_err;
2265 }
1da177e4
LT
2266 }
2267
1c92b4e5 2268 unix_state_lock(other);
1da177e4
LT
2269
2270 if (sock_flag(other, SOCK_DEAD) ||
2271 (other->sk_shutdown & RCV_SHUTDOWN))
2272 goto pipe_err_free;
2273
16e57262 2274 maybe_add_creds(skb, sock, other);
3c32da19 2275 scm_stat_add(other, skb);
7782040b 2276 skb_queue_tail(&other->sk_receive_queue, skb);
1c92b4e5 2277 unix_state_unlock(other);
676d2369 2278 other->sk_data_ready(other);
e27dfcea 2279 sent += size;
1da177e4 2280 }
1da177e4 2281
4edf21aa 2282#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
314001f0 2283 if (msg->msg_flags & MSG_OOB) {
2aab4b96 2284 err = queue_oob(sock, msg, other, &scm, fds_sent);
314001f0
RS
2285 if (err)
2286 goto out_err;
2287 sent++;
2288 }
2289#endif
2290
7cc05662 2291 scm_destroy(&scm);
1da177e4
LT
2292
2293 return sent;
2294
2295pipe_err_free:
1c92b4e5 2296 unix_state_unlock(other);
1da177e4
LT
2297 kfree_skb(skb);
2298pipe_err:
6eba6a37
ED
2299 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
2300 send_sig(SIGPIPE, current, 0);
1da177e4
LT
2301 err = -EPIPE;
2302out_err:
7cc05662 2303 scm_destroy(&scm);
1da177e4
LT
2304 return sent ? : err;
2305}
2306
1b784140
YX
2307static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2308 size_t len)
1da177e4
LT
2309{
2310 int err;
2311 struct sock *sk = sock->sk;
ac7bfa62 2312
1da177e4
LT
2313 err = sock_error(sk);
2314 if (err)
2315 return err;
2316
2317 if (sk->sk_state != TCP_ESTABLISHED)
2318 return -ENOTCONN;
2319
2320 if (msg->msg_namelen)
2321 msg->msg_namelen = 0;
2322
1b784140 2323 return unix_dgram_sendmsg(sock, msg, len);
1da177e4 2324}
ac7bfa62 2325
1b784140
YX
2326static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2327 size_t size, int flags)
a05d2ad1
EB
2328{
2329 struct sock *sk = sock->sk;
2330
2331 if (sk->sk_state != TCP_ESTABLISHED)
2332 return -ENOTCONN;
2333
1b784140 2334 return unix_dgram_recvmsg(sock, msg, size, flags);
a05d2ad1
EB
2335}
2336
1da177e4
LT
2337static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2338{
ae3b5641 2339 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
1da177e4 2340
ae3b5641
AV
2341 if (addr) {
2342 msg->msg_namelen = addr->len;
2343 memcpy(msg->msg_name, addr->name, addr->len);
1da177e4
LT
2344 }
2345}
2346
9825d866
CW
2347int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
2348 int flags)
1da177e4 2349{
7cc05662 2350 struct scm_cookie scm;
9825d866 2351 struct socket *sock = sk->sk_socket;
1da177e4 2352 struct unix_sock *u = unix_sk(sk);
64874280
RW
2353 struct sk_buff *skb, *last;
2354 long timeo;
fd69c399 2355 int skip;
1da177e4
LT
2356 int err;
2357
2358 err = -EOPNOTSUPP;
2359 if (flags&MSG_OOB)
2360 goto out;
2361
64874280 2362 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1da177e4 2363
64874280 2364 do {
6e1ce3c3 2365 mutex_lock(&u->iolock);
f55bb7f9 2366
64874280 2367 skip = sk_peek_offset(sk, flags);
b50b0580 2368 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
e427cad6
PA
2369 &skip, &err, &last);
2370 if (skb) {
2371 if (!(flags & MSG_PEEK))
2372 scm_stat_del(sk, skb);
64874280 2373 break;
e427cad6 2374 }
64874280 2375
6e1ce3c3 2376 mutex_unlock(&u->iolock);
64874280
RW
2377
2378 if (err != -EAGAIN)
2379 break;
2380 } while (timeo &&
b50b0580
SD
2381 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2382 &err, &timeo, last));
64874280 2383
6e1ce3c3 2384 if (!skb) { /* implies iolock unlocked */
0a112258
FZ
2385 unix_state_lock(sk);
2386 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2387 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2388 (sk->sk_shutdown & RCV_SHUTDOWN))
2389 err = 0;
2390 unix_state_unlock(sk);
64874280 2391 goto out;
0a112258 2392 }
1da177e4 2393
77b75f4d
RW
2394 if (wq_has_sleeper(&u->peer_wait))
2395 wake_up_interruptible_sync_poll(&u->peer_wait,
a9a08845
LT
2396 EPOLLOUT | EPOLLWRNORM |
2397 EPOLLWRBAND);
1da177e4 2398
859051dd 2399 if (msg->msg_name) {
1da177e4
LT
2400 unix_copy_addr(msg, skb->sk);
2401
859051dd
DDM
2402 BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk,
2403 msg->msg_name,
2404 &msg->msg_namelen);
2405 }
2406
f55bb7f9
PE
2407 if (size > skb->len - skip)
2408 size = skb->len - skip;
2409 else if (size < skb->len - skip)
1da177e4
LT
2410 msg->msg_flags |= MSG_TRUNC;
2411
51f3d02b 2412 err = skb_copy_datagram_msg(skb, skip, msg, size);
1da177e4
LT
2413 if (err)
2414 goto out_free;
2415
3f66116e
AC
2416 if (sock_flag(sk, SOCK_RCVTSTAMP))
2417 __sock_recv_timestamp(msg, sk, skb);
2418
7cc05662
CH
2419 memset(&scm, 0, sizeof(scm));
2420
2421 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2422 unix_set_secdata(&scm, skb);
1da177e4 2423
6eba6a37 2424 if (!(flags & MSG_PEEK)) {
1da177e4 2425 if (UNIXCB(skb).fp)
7cc05662 2426 unix_detach_fds(&scm, skb);
f55bb7f9
PE
2427
2428 sk_peek_offset_bwd(sk, skb->len);
6eba6a37 2429 } else {
1da177e4
LT
2430 /* It is questionable: on PEEK we could:
2431 - do not return fds - good, but too simple 8)
2432 - return fds, and do not return them on read (old strategy,
2433 apparently wrong)
2434 - clone fds (I chose it for now, it is the most universal
2435 solution)
ac7bfa62
YH
2436
2437 POSIX 1003.1g does not actually define this clearly
2438 at all. POSIX 1003.1g doesn't define a lot of things
2439 clearly however!
2440
1da177e4 2441 */
f55bb7f9
PE
2442
2443 sk_peek_offset_fwd(sk, size);
2444
1da177e4 2445 if (UNIXCB(skb).fp)
cbcf0112 2446 unix_peek_fds(&scm, skb);
1da177e4 2447 }
9f6f9af7 2448 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
1da177e4 2449
a9c49cc2 2450 scm_recv_unix(sock, msg, &scm, flags);
1da177e4
LT
2451
2452out_free:
6eba6a37 2453 skb_free_datagram(sk, skb);
6e1ce3c3 2454 mutex_unlock(&u->iolock);
1da177e4
LT
2455out:
2456 return err;
2457}
29df44fa 2458
9825d866
CW
2459static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
2460 int flags)
2461{
2462 struct sock *sk = sock->sk;
2463
2464#ifdef CONFIG_BPF_SYSCALL
94531cfc
JW
2465 const struct proto *prot = READ_ONCE(sk->sk_prot);
2466
2467 if (prot != &unix_dgram_proto)
ec095263 2468 return prot->recvmsg(sk, msg, size, flags, NULL);
9825d866
CW
2469#endif
2470 return __unix_dgram_recvmsg(sk, msg, size, flags);
2471}
2472
965b57b4 2473static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
29df44fa 2474{
d6e3b27c
PY
2475 struct unix_sock *u = unix_sk(sk);
2476 struct sk_buff *skb;
78fa0d61 2477 int err;
29df44fa 2478
d6e3b27c
PY
2479 mutex_lock(&u->iolock);
2480 skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
2481 mutex_unlock(&u->iolock);
2482 if (!skb)
2483 return err;
29df44fa 2484
78fa0d61 2485 return recv_actor(sk, skb);
29df44fa 2486}
1da177e4
LT
2487
2488/*
79f632c7 2489 * Sleep until more data has arrived. But check for races..
1da177e4 2490 */
79f632c7 2491static long unix_stream_data_wait(struct sock *sk, long timeo,
06a77b07
WC
2492 struct sk_buff *last, unsigned int last_len,
2493 bool freezable)
1da177e4 2494{
f5d39b02 2495 unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE;
2b514574 2496 struct sk_buff *tail;
1da177e4
LT
2497 DEFINE_WAIT(wait);
2498
1c92b4e5 2499 unix_state_lock(sk);
1da177e4
LT
2500
2501 for (;;) {
f5d39b02 2502 prepare_to_wait(sk_sleep(sk), &wait, state);
1da177e4 2503
2b514574
HFS
2504 tail = skb_peek_tail(&sk->sk_receive_queue);
2505 if (tail != last ||
2506 (tail && tail->len != last_len) ||
1da177e4
LT
2507 sk->sk_err ||
2508 (sk->sk_shutdown & RCV_SHUTDOWN) ||
2509 signal_pending(current) ||
2510 !timeo)
2511 break;
2512
9cd3e072 2513 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
1c92b4e5 2514 unix_state_unlock(sk);
f5d39b02 2515 timeo = schedule_timeout(timeo);
1c92b4e5 2516 unix_state_lock(sk);
b48732e4
MS
2517
2518 if (sock_flag(sk, SOCK_DEAD))
2519 break;
2520
9cd3e072 2521 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
1da177e4
LT
2522 }
2523
aa395145 2524 finish_wait(sk_sleep(sk), &wait);
1c92b4e5 2525 unix_state_unlock(sk);
1da177e4
LT
2526 return timeo;
2527}
2528
e370a723
ED
2529static unsigned int unix_skb_len(const struct sk_buff *skb)
2530{
2531 return skb->len - UNIXCB(skb).consumed;
2532}
2533
2b514574
HFS
2534struct unix_stream_read_state {
2535 int (*recv_actor)(struct sk_buff *, int, int,
2536 struct unix_stream_read_state *);
2537 struct socket *socket;
2538 struct msghdr *msg;
2539 struct pipe_inode_info *pipe;
2540 size_t size;
2541 int flags;
2542 unsigned int splice_flags;
2543};
2544
314001f0
RS
2545#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2546static int unix_stream_recv_urg(struct unix_stream_read_state *state)
2547{
2548 struct socket *sock = state->socket;
2549 struct sock *sk = sock->sk;
2550 struct unix_sock *u = unix_sk(sk);
2551 int chunk = 1;
876c14ad 2552 struct sk_buff *oob_skb;
314001f0 2553
876c14ad
RS
2554 mutex_lock(&u->iolock);
2555 unix_state_lock(sk);
2556
2557 if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) {
2558 unix_state_unlock(sk);
2559 mutex_unlock(&u->iolock);
314001f0 2560 return -EINVAL;
876c14ad 2561 }
314001f0 2562
876c14ad 2563 oob_skb = u->oob_skb;
314001f0 2564
e82025c6
KI
2565 if (!(state->flags & MSG_PEEK))
2566 WRITE_ONCE(u->oob_skb, NULL);
4b7b4926
ED
2567 else
2568 skb_get(oob_skb);
876c14ad
RS
2569 unix_state_unlock(sk);
2570
2571 chunk = state->recv_actor(oob_skb, 0, chunk, state);
2572
4b7b4926 2573 if (!(state->flags & MSG_PEEK))
876c14ad 2574 UNIXCB(oob_skb).consumed += 1;
4b7b4926
ED
2575
2576 consume_skb(oob_skb);
876c14ad
RS
2577
2578 mutex_unlock(&u->iolock);
2579
2580 if (chunk < 0)
2581 return -EFAULT;
2582
314001f0
RS
2583 state->msg->msg_flags |= MSG_OOB;
2584 return 1;
2585}
2586
2587static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
2588 int flags, int copied)
2589{
2590 struct unix_sock *u = unix_sk(sk);
2591
2592 if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) {
2593 skb_unlink(skb, &sk->sk_receive_queue);
2594 consume_skb(skb);
2595 skb = NULL;
2596 } else {
2597 if (skb == u->oob_skb) {
2598 if (copied) {
2599 skb = NULL;
2600 } else if (sock_flag(sk, SOCK_URGINLINE)) {
2601 if (!(flags & MSG_PEEK)) {
e82025c6 2602 WRITE_ONCE(u->oob_skb, NULL);
314001f0
RS
2603 consume_skb(skb);
2604 }
2605 } else if (!(flags & MSG_PEEK)) {
2606 skb_unlink(skb, &sk->sk_receive_queue);
2607 consume_skb(skb);
2608 skb = skb_peek(&sk->sk_receive_queue);
2609 }
2610 }
2611 }
2612 return skb;
2613}
2614#endif
2615
965b57b4 2616static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
77462de1
JW
2617{
2618 if (unlikely(sk->sk_state != TCP_ESTABLISHED))
2619 return -ENOTCONN;
2620
965b57b4 2621 return unix_read_skb(sk, recv_actor);
77462de1
JW
2622}
2623
06a77b07
WC
2624static int unix_stream_read_generic(struct unix_stream_read_state *state,
2625 bool freezable)
1da177e4 2626{
7cc05662 2627 struct scm_cookie scm;
2b514574 2628 struct socket *sock = state->socket;
1da177e4
LT
2629 struct sock *sk = sock->sk;
2630 struct unix_sock *u = unix_sk(sk);
1da177e4 2631 int copied = 0;
2b514574 2632 int flags = state->flags;
de144391 2633 int noblock = flags & MSG_DONTWAIT;
2b514574 2634 bool check_creds = false;
1da177e4
LT
2635 int target;
2636 int err = 0;
2637 long timeo;
fc0d7536 2638 int skip;
2b514574
HFS
2639 size_t size = state->size;
2640 unsigned int last_len;
1da177e4 2641
1b92ee3d
RW
2642 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2643 err = -EINVAL;
1da177e4 2644 goto out;
1b92ee3d 2645 }
1da177e4 2646
1b92ee3d
RW
2647 if (unlikely(flags & MSG_OOB)) {
2648 err = -EOPNOTSUPP;
314001f0 2649#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
314001f0 2650 err = unix_stream_recv_urg(state);
314001f0 2651#endif
1da177e4 2652 goto out;
1b92ee3d 2653 }
1da177e4 2654
2b514574 2655 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
de144391 2656 timeo = sock_rcvtimeo(sk, noblock);
1da177e4 2657
2b514574
HFS
2658 memset(&scm, 0, sizeof(scm));
2659
1da177e4
LT
2660 /* Lock the socket to prevent queue disordering
2661 * while sleeps in memcpy_tomsg
2662 */
6e1ce3c3 2663 mutex_lock(&u->iolock);
1da177e4 2664
a0917e0b 2665 skip = max(sk_peek_offset(sk, flags), 0);
e9193d60 2666
6eba6a37 2667 do {
1da177e4 2668 int chunk;
73ed5d25 2669 bool drop_skb;
79f632c7 2670 struct sk_buff *skb, *last;
1da177e4 2671
18eceb81 2672redo:
3c0d2f37 2673 unix_state_lock(sk);
b48732e4
MS
2674 if (sock_flag(sk, SOCK_DEAD)) {
2675 err = -ECONNRESET;
2676 goto unlock;
2677 }
79f632c7 2678 last = skb = skb_peek(&sk->sk_receive_queue);
2b514574 2679 last_len = last ? last->len : 0;
314001f0
RS
2680
2681#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2682 if (skb) {
2683 skb = manage_oob(skb, sk, flags, copied);
2684 if (!skb) {
2685 unix_state_unlock(sk);
2686 if (copied)
2687 break;
2688 goto redo;
2689 }
2690 }
2691#endif
fc0d7536 2692again:
6eba6a37 2693 if (skb == NULL) {
1da177e4 2694 if (copied >= target)
3c0d2f37 2695 goto unlock;
1da177e4
LT
2696
2697 /*
2698 * POSIX 1003.1g mandates this order.
2699 */
ac7bfa62 2700
6eba6a37
ED
2701 err = sock_error(sk);
2702 if (err)
3c0d2f37 2703 goto unlock;
1da177e4 2704 if (sk->sk_shutdown & RCV_SHUTDOWN)
3c0d2f37
MS
2705 goto unlock;
2706
2707 unix_state_unlock(sk);
1b92ee3d
RW
2708 if (!timeo) {
2709 err = -EAGAIN;
1da177e4 2710 break;
1b92ee3d
RW
2711 }
2712
6e1ce3c3 2713 mutex_unlock(&u->iolock);
1da177e4 2714
2b514574 2715 timeo = unix_stream_data_wait(sk, timeo, last,
06a77b07 2716 last_len, freezable);
1da177e4 2717
3822b5c2 2718 if (signal_pending(current)) {
1da177e4 2719 err = sock_intr_errno(timeo);
fa0dc04d 2720 scm_destroy(&scm);
1da177e4
LT
2721 goto out;
2722 }
b3ca9b02 2723
6e1ce3c3 2724 mutex_lock(&u->iolock);
18eceb81 2725 goto redo;
2b514574 2726unlock:
3c0d2f37
MS
2727 unix_state_unlock(sk);
2728 break;
1da177e4 2729 }
fc0d7536 2730
e370a723
ED
2731 while (skip >= unix_skb_len(skb)) {
2732 skip -= unix_skb_len(skb);
79f632c7 2733 last = skb;
2b514574 2734 last_len = skb->len;
fc0d7536 2735 skb = skb_peek_next(skb, &sk->sk_receive_queue);
79f632c7
BP
2736 if (!skb)
2737 goto again;
fc0d7536
PE
2738 }
2739
3c0d2f37 2740 unix_state_unlock(sk);
1da177e4
LT
2741
2742 if (check_creds) {
2743 /* Never glue messages from different writers */
9490f886 2744 if (!unix_skb_scm_eq(skb, &scm))
1da177e4 2745 break;
5e2ff670
AM
2746 } else if (test_bit(SOCK_PASSCRED, &sock->flags) ||
2747 test_bit(SOCK_PASSPIDFD, &sock->flags)) {
1da177e4 2748 /* Copy credentials */
7cc05662 2749 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
37a9a8df 2750 unix_set_secdata(&scm, skb);
2b514574 2751 check_creds = true;
1da177e4
LT
2752 }
2753
2754 /* Copy address just once */
2b514574
HFS
2755 if (state->msg && state->msg->msg_name) {
2756 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2757 state->msg->msg_name);
2758 unix_copy_addr(state->msg, skb->sk);
859051dd
DDM
2759
2760 BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk,
2761 state->msg->msg_name,
2762 &state->msg->msg_namelen);
2763
1da177e4
LT
2764 sunaddr = NULL;
2765 }
2766
e370a723 2767 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
73ed5d25 2768 skb_get(skb);
2b514574 2769 chunk = state->recv_actor(skb, skip, chunk, state);
73ed5d25
HFS
2770 drop_skb = !unix_skb_len(skb);
2771 /* skb is only safe to use if !drop_skb */
2772 consume_skb(skb);
2b514574 2773 if (chunk < 0) {
1da177e4
LT
2774 if (copied == 0)
2775 copied = -EFAULT;
2776 break;
2777 }
2778 copied += chunk;
2779 size -= chunk;
2780
73ed5d25
HFS
2781 if (drop_skb) {
2782 /* the skb was touched by a concurrent reader;
2783 * we should not expect anything from this skb
2784 * anymore and assume it invalid - we can be
2785 * sure it was dropped from the socket queue
2786 *
2787 * let's report a short read
2788 */
2789 err = 0;
2790 break;
2791 }
2792
1da177e4 2793 /* Mark read part of skb as used */
6eba6a37 2794 if (!(flags & MSG_PEEK)) {
e370a723 2795 UNIXCB(skb).consumed += chunk;
1da177e4 2796
fc0d7536
PE
2797 sk_peek_offset_bwd(sk, chunk);
2798
3c32da19 2799 if (UNIXCB(skb).fp) {
3c32da19 2800 scm_stat_del(sk, skb);
7cc05662 2801 unix_detach_fds(&scm, skb);
3c32da19 2802 }
1da177e4 2803
e370a723 2804 if (unix_skb_len(skb))
1da177e4 2805 break;
1da177e4 2806
6f01fd6e 2807 skb_unlink(skb, &sk->sk_receive_queue);
70d4bf6d 2808 consume_skb(skb);
1da177e4 2809
7cc05662 2810 if (scm.fp)
1da177e4 2811 break;
6eba6a37 2812 } else {
1da177e4
LT
2813 /* It is questionable, see note in unix_dgram_recvmsg.
2814 */
2815 if (UNIXCB(skb).fp)
cbcf0112 2816 unix_peek_fds(&scm, skb);
1da177e4 2817
e9193d60 2818 sk_peek_offset_fwd(sk, chunk);
fc0d7536 2819
9f389e35
AC
2820 if (UNIXCB(skb).fp)
2821 break;
2822
e9193d60 2823 skip = 0;
9f389e35
AC
2824 last = skb;
2825 last_len = skb->len;
2826 unix_state_lock(sk);
2827 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2828 if (skb)
2829 goto again;
2830 unix_state_unlock(sk);
1da177e4
LT
2831 break;
2832 }
2833 } while (size);
2834
6e1ce3c3 2835 mutex_unlock(&u->iolock);
9d797ee2 2836 if (state->msg)
a9c49cc2 2837 scm_recv_unix(sock, state->msg, &scm, flags);
2b514574
HFS
2838 else
2839 scm_destroy(&scm);
1da177e4
LT
2840out:
2841 return copied ? : err;
2842}
2843
2b514574
HFS
2844static int unix_stream_read_actor(struct sk_buff *skb,
2845 int skip, int chunk,
2846 struct unix_stream_read_state *state)
2847{
2848 int ret;
2849
2850 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2851 state->msg, chunk);
2852 return ret ?: chunk;
2853}
2854
94531cfc
JW
2855int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg,
2856 size_t size, int flags)
2857{
2858 struct unix_stream_read_state state = {
2859 .recv_actor = unix_stream_read_actor,
2860 .socket = sk->sk_socket,
2861 .msg = msg,
2862 .size = size,
2863 .flags = flags
2864 };
2865
2866 return unix_stream_read_generic(&state, true);
2867}
2868
2b514574
HFS
2869static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2870 size_t size, int flags)
2871{
2872 struct unix_stream_read_state state = {
2873 .recv_actor = unix_stream_read_actor,
2874 .socket = sock,
2875 .msg = msg,
2876 .size = size,
2877 .flags = flags
2878 };
2879
94531cfc
JW
2880#ifdef CONFIG_BPF_SYSCALL
2881 struct sock *sk = sock->sk;
2882 const struct proto *prot = READ_ONCE(sk->sk_prot);
2883
2884 if (prot != &unix_stream_proto)
ec095263 2885 return prot->recvmsg(sk, msg, size, flags, NULL);
94531cfc 2886#endif
06a77b07 2887 return unix_stream_read_generic(&state, true);
2b514574
HFS
2888}
2889
2b514574
HFS
2890static int unix_stream_splice_actor(struct sk_buff *skb,
2891 int skip, int chunk,
2892 struct unix_stream_read_state *state)
2893{
2894 return skb_splice_bits(skb, state->socket->sk,
2895 UNIXCB(skb).consumed + skip,
25869262 2896 state->pipe, chunk, state->splice_flags);
2b514574
HFS
2897}
2898
2899static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
2900 struct pipe_inode_info *pipe,
2901 size_t size, unsigned int flags)
2902{
2903 struct unix_stream_read_state state = {
2904 .recv_actor = unix_stream_splice_actor,
2905 .socket = sock,
2906 .pipe = pipe,
2907 .size = size,
2908 .splice_flags = flags,
2909 };
2910
2911 if (unlikely(*ppos))
2912 return -ESPIPE;
2913
2914 if (sock->file->f_flags & O_NONBLOCK ||
2915 flags & SPLICE_F_NONBLOCK)
2916 state.flags = MSG_DONTWAIT;
2917
06a77b07 2918 return unix_stream_read_generic(&state, false);
2b514574
HFS
2919}
2920
1da177e4
LT
2921static int unix_shutdown(struct socket *sock, int mode)
2922{
2923 struct sock *sk = sock->sk;
2924 struct sock *other;
2925
fc61b928
XW
2926 if (mode < SHUT_RD || mode > SHUT_RDWR)
2927 return -EINVAL;
2928 /* This maps:
2929 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2930 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
2931 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2932 */
2933 ++mode;
7180a031
AC
2934
2935 unix_state_lock(sk);
e1d09c2c 2936 WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode);
7180a031
AC
2937 other = unix_peer(sk);
2938 if (other)
2939 sock_hold(other);
2940 unix_state_unlock(sk);
2941 sk->sk_state_change(sk);
2942
2943 if (other &&
2944 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2945
2946 int peer_mode = 0;
94531cfc 2947 const struct proto *prot = READ_ONCE(other->sk_prot);
7180a031 2948
d359902d
JW
2949 if (prot->unhash)
2950 prot->unhash(other);
7180a031
AC
2951 if (mode&RCV_SHUTDOWN)
2952 peer_mode |= SEND_SHUTDOWN;
2953 if (mode&SEND_SHUTDOWN)
2954 peer_mode |= RCV_SHUTDOWN;
2955 unix_state_lock(other);
e1d09c2c 2956 WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode);
7180a031
AC
2957 unix_state_unlock(other);
2958 other->sk_state_change(other);
d0c6416b 2959 if (peer_mode == SHUTDOWN_MASK)
7180a031 2960 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
d0c6416b 2961 else if (peer_mode & RCV_SHUTDOWN)
7180a031 2962 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1da177e4 2963 }
7180a031
AC
2964 if (other)
2965 sock_put(other);
2966
1da177e4
LT
2967 return 0;
2968}
2969
885ee74d
PE
2970long unix_inq_len(struct sock *sk)
2971{
2972 struct sk_buff *skb;
2973 long amount = 0;
2974
2975 if (sk->sk_state == TCP_LISTEN)
2976 return -EINVAL;
2977
2978 spin_lock(&sk->sk_receive_queue.lock);
2979 if (sk->sk_type == SOCK_STREAM ||
2980 sk->sk_type == SOCK_SEQPACKET) {
2981 skb_queue_walk(&sk->sk_receive_queue, skb)
e370a723 2982 amount += unix_skb_len(skb);
885ee74d
PE
2983 } else {
2984 skb = skb_peek(&sk->sk_receive_queue);
2985 if (skb)
2986 amount = skb->len;
2987 }
2988 spin_unlock(&sk->sk_receive_queue.lock);
2989
2990 return amount;
2991}
2992EXPORT_SYMBOL_GPL(unix_inq_len);
2993
2994long unix_outq_len(struct sock *sk)
2995{
2996 return sk_wmem_alloc_get(sk);
2997}
2998EXPORT_SYMBOL_GPL(unix_outq_len);
2999
ba94f308
AV
3000static int unix_open_file(struct sock *sk)
3001{
3002 struct path path;
3003 struct file *f;
3004 int fd;
3005
3006 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
3007 return -EPERM;
3008
ae3b5641
AV
3009 if (!smp_load_acquire(&unix_sk(sk)->addr))
3010 return -ENOENT;
3011
ba94f308 3012 path = unix_sk(sk)->path;
ae3b5641 3013 if (!path.dentry)
ba94f308 3014 return -ENOENT;
ba94f308
AV
3015
3016 path_get(&path);
ba94f308
AV
3017
3018 fd = get_unused_fd_flags(O_CLOEXEC);
3019 if (fd < 0)
3020 goto out;
3021
3022 f = dentry_open(&path, O_PATH, current_cred());
3023 if (IS_ERR(f)) {
3024 put_unused_fd(fd);
3025 fd = PTR_ERR(f);
3026 goto out;
3027 }
3028
3029 fd_install(fd, f);
3030out:
3031 path_put(&path);
3032
3033 return fd;
3034}
3035
1da177e4
LT
3036static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3037{
3038 struct sock *sk = sock->sk;
e27dfcea 3039 long amount = 0;
1da177e4
LT
3040 int err;
3041
6eba6a37
ED
3042 switch (cmd) {
3043 case SIOCOUTQ:
885ee74d 3044 amount = unix_outq_len(sk);
6eba6a37
ED
3045 err = put_user(amount, (int __user *)arg);
3046 break;
3047 case SIOCINQ:
885ee74d
PE
3048 amount = unix_inq_len(sk);
3049 if (amount < 0)
3050 err = amount;
3051 else
1da177e4 3052 err = put_user(amount, (int __user *)arg);
885ee74d 3053 break;
ba94f308
AV
3054 case SIOCUNIXFILE:
3055 err = unix_open_file(sk);
3056 break;
314001f0
RS
3057#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3058 case SIOCATMARK:
3059 {
3060 struct sk_buff *skb;
314001f0
RS
3061 int answ = 0;
3062
3063 skb = skb_peek(&sk->sk_receive_queue);
e82025c6 3064 if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb))
314001f0
RS
3065 answ = 1;
3066 err = put_user(answ, (int __user *)arg);
3067 }
3068 break;
3069#endif
6eba6a37
ED
3070 default:
3071 err = -ENOIOCTLCMD;
3072 break;
1da177e4
LT
3073 }
3074 return err;
3075}
3076
5f6beb9e
AB
3077#ifdef CONFIG_COMPAT
3078static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3079{
3080 return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
3081}
3082#endif
3083
a11e1d43 3084static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
1da177e4
LT
3085{
3086 struct sock *sk = sock->sk;
a11e1d43 3087 __poll_t mask;
e1d09c2c 3088 u8 shutdown;
a11e1d43 3089
89ab066d 3090 sock_poll_wait(file, sock, wait);
a11e1d43 3091 mask = 0;
e1d09c2c 3092 shutdown = READ_ONCE(sk->sk_shutdown);
1da177e4
LT
3093
3094 /* exceptional events? */
cc04410a 3095 if (READ_ONCE(sk->sk_err))
a9a08845 3096 mask |= EPOLLERR;
e1d09c2c 3097 if (shutdown == SHUTDOWN_MASK)
a9a08845 3098 mask |= EPOLLHUP;
e1d09c2c 3099 if (shutdown & RCV_SHUTDOWN)
a9a08845 3100 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
1da177e4
LT
3101
3102 /* readable? */
3ef7cf57 3103 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
a9a08845 3104 mask |= EPOLLIN | EPOLLRDNORM;
af493388
CW
3105 if (sk_is_readable(sk))
3106 mask |= EPOLLIN | EPOLLRDNORM;
d9a232d4
KI
3107#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3108 if (READ_ONCE(unix_sk(sk)->oob_skb))
3109 mask |= EPOLLPRI;
3110#endif
1da177e4
LT
3111
3112 /* Connection-based need to check for termination and startup */
6eba6a37
ED
3113 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
3114 sk->sk_state == TCP_CLOSE)
a9a08845 3115 mask |= EPOLLHUP;
1da177e4
LT
3116
3117 /*
3118 * we set writable also when the other side has shut down the
3119 * connection. This prevents stuck sockets.
3120 */
3121 if (unix_writable(sk))
a9a08845 3122 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
1da177e4
LT
3123
3124 return mask;
3125}
3126
a11e1d43
LT
3127static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
3128 poll_table *wait)
3c73419c 3129{
ec0d215f 3130 struct sock *sk = sock->sk, *other;
a11e1d43
LT
3131 unsigned int writable;
3132 __poll_t mask;
e1d09c2c 3133 u8 shutdown;
a11e1d43 3134
89ab066d 3135 sock_poll_wait(file, sock, wait);
a11e1d43 3136 mask = 0;
e1d09c2c 3137 shutdown = READ_ONCE(sk->sk_shutdown);
3c73419c
RW
3138
3139 /* exceptional events? */
cc04410a
ED
3140 if (READ_ONCE(sk->sk_err) ||
3141 !skb_queue_empty_lockless(&sk->sk_error_queue))
a9a08845
LT
3142 mask |= EPOLLERR |
3143 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
7d4c04fc 3144
e1d09c2c 3145 if (shutdown & RCV_SHUTDOWN)
a9a08845 3146 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
e1d09c2c 3147 if (shutdown == SHUTDOWN_MASK)
a9a08845 3148 mask |= EPOLLHUP;
3c73419c
RW
3149
3150 /* readable? */
3ef7cf57 3151 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
a9a08845 3152 mask |= EPOLLIN | EPOLLRDNORM;
af493388
CW
3153 if (sk_is_readable(sk))
3154 mask |= EPOLLIN | EPOLLRDNORM;
3c73419c
RW
3155
3156 /* Connection-based need to check for termination and startup */
3157 if (sk->sk_type == SOCK_SEQPACKET) {
3158 if (sk->sk_state == TCP_CLOSE)
a9a08845 3159 mask |= EPOLLHUP;
3c73419c
RW
3160 /* connection hasn't started yet? */
3161 if (sk->sk_state == TCP_SYN_SENT)
3162 return mask;
3163 }
3164
973a34aa 3165 /* No write status requested, avoid expensive OUT tests. */
a11e1d43 3166 if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
973a34aa
ED
3167 return mask;
3168
ec0d215f 3169 writable = unix_writable(sk);
7d267278
RW
3170 if (writable) {
3171 unix_state_lock(sk);
3172
3173 other = unix_peer(sk);
3174 if (other && unix_peer(other) != sk &&
04f08eb4 3175 unix_recvq_full_lockless(other) &&
7d267278
RW
3176 unix_dgram_peer_wake_me(sk, other))
3177 writable = 0;
3178
3179 unix_state_unlock(sk);
ec0d215f
RW
3180 }
3181
3182 if (writable)
a9a08845 3183 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3c73419c 3184 else
9cd3e072 3185 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
3c73419c 3186
3c73419c
RW
3187 return mask;
3188}
1da177e4
LT
3189
3190#ifdef CONFIG_PROC_FS
a53eb3fe 3191
7123aaa3
ED
3192#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
3193
3194#define get_bucket(x) ((x) >> BUCKET_SPACE)
afd20b92 3195#define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1))
7123aaa3 3196#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
a53eb3fe 3197
7123aaa3 3198static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
1da177e4 3199{
7123aaa3
ED
3200 unsigned long offset = get_offset(*pos);
3201 unsigned long bucket = get_bucket(*pos);
7123aaa3 3202 unsigned long count = 0;
cf2f225e 3203 struct sock *sk;
1da177e4 3204
cf2f225e
KI
3205 for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]);
3206 sk; sk = sk_next(sk)) {
7123aaa3
ED
3207 if (++count == offset)
3208 break;
3209 }
3210
3211 return sk;
3212}
3213
4408d55a 3214static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos)
7123aaa3 3215{
afd20b92 3216 unsigned long bucket = get_bucket(*pos);
79b05bea 3217 struct net *net = seq_file_net(seq);
4408d55a 3218 struct sock *sk;
7123aaa3 3219
f302d180 3220 while (bucket < UNIX_HASH_SIZE) {
79b05bea 3221 spin_lock(&net->unx.table.locks[bucket]);
4408d55a 3222
7123aaa3
ED
3223 sk = unix_from_bucket(seq, pos);
3224 if (sk)
3225 return sk;
3226
79b05bea 3227 spin_unlock(&net->unx.table.locks[bucket]);
4408d55a
KI
3228
3229 *pos = set_bucket_offset(++bucket, 1);
3230 }
7123aaa3 3231
1da177e4
LT
3232 return NULL;
3233}
3234
4408d55a
KI
3235static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk,
3236 loff_t *pos)
3237{
3238 unsigned long bucket = get_bucket(*pos);
3239
cf2f225e
KI
3240 sk = sk_next(sk);
3241 if (sk)
3242 return sk;
3243
4408d55a 3244
cf2f225e 3245 spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]);
4408d55a
KI
3246
3247 *pos = set_bucket_offset(++bucket, 1);
3248
3249 return unix_get_first(seq, pos);
3250}
3251
1da177e4
LT
3252static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
3253{
7123aaa3
ED
3254 if (!*pos)
3255 return SEQ_START_TOKEN;
3256
4408d55a 3257 return unix_get_first(seq, pos);
1da177e4
LT
3258}
3259
3260static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3261{
3262 ++*pos;
4408d55a
KI
3263
3264 if (v == SEQ_START_TOKEN)
3265 return unix_get_first(seq, pos);
3266
3267 return unix_get_next(seq, v, pos);
1da177e4
LT
3268}
3269
3270static void unix_seq_stop(struct seq_file *seq, void *v)
3271{
afd20b92
KI
3272 struct sock *sk = v;
3273
2f7ca90a 3274 if (sk)
79b05bea 3275 spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]);
1da177e4
LT
3276}
3277
3278static int unix_seq_show(struct seq_file *seq, void *v)
3279{
ac7bfa62 3280
b9f3124f 3281 if (v == SEQ_START_TOKEN)
1da177e4
LT
3282 seq_puts(seq, "Num RefCount Protocol Flags Type St "
3283 "Inode Path\n");
3284 else {
3285 struct sock *s = v;
3286 struct unix_sock *u = unix_sk(s);
1c92b4e5 3287 unix_state_lock(s);
1da177e4 3288
71338aa7 3289 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
1da177e4 3290 s,
41c6d650 3291 refcount_read(&s->sk_refcnt),
1da177e4
LT
3292 0,
3293 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
3294 s->sk_type,
3295 s->sk_socket ?
3296 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
3297 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
3298 sock_i_ino(s));
3299
2f7ca90a 3300 if (u->addr) { // under a hash table lock here
1da177e4
LT
3301 int i, len;
3302 seq_putc(seq, ' ');
3303
3304 i = 0;
755662ce
KI
3305 len = u->addr->len -
3306 offsetof(struct sockaddr_un, sun_path);
5ce7ab49 3307 if (u->addr->name->sun_path[0]) {
1da177e4 3308 len--;
5ce7ab49 3309 } else {
1da177e4
LT
3310 seq_putc(seq, '@');
3311 i++;
3312 }
3313 for ( ; i < len; i++)
e7947ea7
IB
3314 seq_putc(seq, u->addr->name->sun_path[i] ?:
3315 '@');
1da177e4 3316 }
1c92b4e5 3317 unix_state_unlock(s);
1da177e4
LT
3318 seq_putc(seq, '\n');
3319 }
3320
3321 return 0;
3322}
3323
56b3d975 3324static const struct seq_operations unix_seq_ops = {
1da177e4
LT
3325 .start = unix_seq_start,
3326 .next = unix_seq_next,
3327 .stop = unix_seq_stop,
3328 .show = unix_seq_show,
3329};
2c860a43 3330
3a04927f 3331#ifdef CONFIG_BPF_SYSCALL
855d8e77
KI
3332struct bpf_unix_iter_state {
3333 struct seq_net_private p;
3334 unsigned int cur_sk;
3335 unsigned int end_sk;
3336 unsigned int max_sk;
3337 struct sock **batch;
3338 bool st_bucket_done;
3339};
3340
2c860a43
KI
3341struct bpf_iter__unix {
3342 __bpf_md_ptr(struct bpf_iter_meta *, meta);
3343 __bpf_md_ptr(struct unix_sock *, unix_sk);
3344 uid_t uid __aligned(8);
3345};
3346
3347static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
3348 struct unix_sock *unix_sk, uid_t uid)
3349{
3350 struct bpf_iter__unix ctx;
3351
3352 meta->seq_num--; /* skip SEQ_START_TOKEN */
3353 ctx.meta = meta;
3354 ctx.unix_sk = unix_sk;
3355 ctx.uid = uid;
3356 return bpf_iter_run_prog(prog, &ctx);
3357}
3358
855d8e77
KI
3359static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk)
3360
3361{
3362 struct bpf_unix_iter_state *iter = seq->private;
3363 unsigned int expected = 1;
3364 struct sock *sk;
3365
3366 sock_hold(start_sk);
3367 iter->batch[iter->end_sk++] = start_sk;
3368
3369 for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) {
855d8e77
KI
3370 if (iter->end_sk < iter->max_sk) {
3371 sock_hold(sk);
3372 iter->batch[iter->end_sk++] = sk;
3373 }
3374
3375 expected++;
3376 }
3377
cf2f225e 3378 spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]);
855d8e77
KI
3379
3380 return expected;
3381}
3382
3383static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter)
3384{
3385 while (iter->cur_sk < iter->end_sk)
3386 sock_put(iter->batch[iter->cur_sk++]);
3387}
3388
3389static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter,
3390 unsigned int new_batch_sz)
3391{
3392 struct sock **new_batch;
3393
3394 new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
3395 GFP_USER | __GFP_NOWARN);
3396 if (!new_batch)
3397 return -ENOMEM;
3398
3399 bpf_iter_unix_put_batch(iter);
3400 kvfree(iter->batch);
3401 iter->batch = new_batch;
3402 iter->max_sk = new_batch_sz;
3403
3404 return 0;
3405}
3406
3407static struct sock *bpf_iter_unix_batch(struct seq_file *seq,
3408 loff_t *pos)
3409{
3410 struct bpf_unix_iter_state *iter = seq->private;
3411 unsigned int expected;
3412 bool resized = false;
3413 struct sock *sk;
3414
3415 if (iter->st_bucket_done)
3416 *pos = set_bucket_offset(get_bucket(*pos) + 1, 1);
3417
3418again:
3419 /* Get a new batch */
3420 iter->cur_sk = 0;
3421 iter->end_sk = 0;
3422
3423 sk = unix_get_first(seq, pos);
3424 if (!sk)
3425 return NULL; /* Done */
3426
3427 expected = bpf_iter_unix_hold_batch(seq, sk);
3428
3429 if (iter->end_sk == expected) {
3430 iter->st_bucket_done = true;
3431 return sk;
3432 }
3433
3434 if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) {
3435 resized = true;
3436 goto again;
3437 }
3438
3439 return sk;
3440}
3441
3442static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos)
3443{
3444 if (!*pos)
3445 return SEQ_START_TOKEN;
3446
3447 /* bpf iter does not support lseek, so it always
3448 * continue from where it was stop()-ped.
3449 */
3450 return bpf_iter_unix_batch(seq, pos);
3451}
3452
3453static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3454{
3455 struct bpf_unix_iter_state *iter = seq->private;
3456 struct sock *sk;
3457
3458 /* Whenever seq_next() is called, the iter->cur_sk is
3459 * done with seq_show(), so advance to the next sk in
3460 * the batch.
3461 */
3462 if (iter->cur_sk < iter->end_sk)
3463 sock_put(iter->batch[iter->cur_sk++]);
3464
3465 ++*pos;
3466
3467 if (iter->cur_sk < iter->end_sk)
3468 sk = iter->batch[iter->cur_sk];
3469 else
3470 sk = bpf_iter_unix_batch(seq, pos);
3471
3472 return sk;
3473}
3474
2c860a43
KI
3475static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
3476{
3477 struct bpf_iter_meta meta;
3478 struct bpf_prog *prog;
3479 struct sock *sk = v;
3480 uid_t uid;
855d8e77
KI
3481 bool slow;
3482 int ret;
2c860a43
KI
3483
3484 if (v == SEQ_START_TOKEN)
3485 return 0;
3486
855d8e77
KI
3487 slow = lock_sock_fast(sk);
3488
3489 if (unlikely(sk_unhashed(sk))) {
3490 ret = SEQ_SKIP;
3491 goto unlock;
3492 }
3493
2c860a43
KI
3494 uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
3495 meta.seq = seq;
3496 prog = bpf_iter_get_info(&meta, false);
855d8e77
KI
3497 ret = unix_prog_seq_show(prog, &meta, v, uid);
3498unlock:
3499 unlock_sock_fast(sk, slow);
3500 return ret;
2c860a43
KI
3501}
3502
3503static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
3504{
855d8e77 3505 struct bpf_unix_iter_state *iter = seq->private;
2c860a43
KI
3506 struct bpf_iter_meta meta;
3507 struct bpf_prog *prog;
3508
3509 if (!v) {
3510 meta.seq = seq;
3511 prog = bpf_iter_get_info(&meta, true);
3512 if (prog)
3513 (void)unix_prog_seq_show(prog, &meta, v, 0);
3514 }
3515
855d8e77
KI
3516 if (iter->cur_sk < iter->end_sk)
3517 bpf_iter_unix_put_batch(iter);
2c860a43
KI
3518}
3519
3520static const struct seq_operations bpf_iter_unix_seq_ops = {
855d8e77
KI
3521 .start = bpf_iter_unix_seq_start,
3522 .next = bpf_iter_unix_seq_next,
2c860a43
KI
3523 .stop = bpf_iter_unix_seq_stop,
3524 .show = bpf_iter_unix_seq_show,
3525};
3526#endif
1da177e4
LT
3527#endif
3528
ec1b4cf7 3529static const struct net_proto_family unix_family_ops = {
1da177e4
LT
3530 .family = PF_UNIX,
3531 .create = unix_create,
3532 .owner = THIS_MODULE,
3533};
3534
097e66c5 3535
2c8c1e72 3536static int __net_init unix_net_init(struct net *net)
097e66c5 3537{
b6e81138 3538 int i;
097e66c5 3539
a0a53c8b 3540 net->unx.sysctl_max_dgram_qlen = 10;
1597fbc0
PE
3541 if (unix_sysctl_register(net))
3542 goto out;
d392e497 3543
097e66c5 3544#ifdef CONFIG_PROC_FS
c3506372 3545 if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
b6e81138
KI
3546 sizeof(struct seq_net_private)))
3547 goto err_sysctl;
3548#endif
3549
3550 net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE,
3551 sizeof(spinlock_t), GFP_KERNEL);
3552 if (!net->unx.table.locks)
3553 goto err_proc;
3554
3555 net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE,
3556 sizeof(struct hlist_head),
3557 GFP_KERNEL);
3558 if (!net->unx.table.buckets)
3559 goto free_locks;
3560
3561 for (i = 0; i < UNIX_HASH_SIZE; i++) {
3562 spin_lock_init(&net->unx.table.locks[i]);
3563 INIT_HLIST_HEAD(&net->unx.table.buckets[i]);
1597fbc0 3564 }
b6e81138
KI
3565
3566 return 0;
3567
3568free_locks:
3569 kvfree(net->unx.table.locks);
3570err_proc:
3571#ifdef CONFIG_PROC_FS
3572 remove_proc_entry("unix", net->proc_net);
3573err_sysctl:
097e66c5 3574#endif
b6e81138 3575 unix_sysctl_unregister(net);
097e66c5 3576out:
b6e81138 3577 return -ENOMEM;
097e66c5
DL
3578}
3579
2c8c1e72 3580static void __net_exit unix_net_exit(struct net *net)
097e66c5 3581{
b6e81138
KI
3582 kvfree(net->unx.table.buckets);
3583 kvfree(net->unx.table.locks);
1597fbc0 3584 unix_sysctl_unregister(net);
ece31ffd 3585 remove_proc_entry("unix", net->proc_net);
097e66c5
DL
3586}
3587
3588static struct pernet_operations unix_net_ops = {
3589 .init = unix_net_init,
3590 .exit = unix_net_exit,
3591};
3592
3a04927f 3593#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
2c860a43
KI
3594DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
3595 struct unix_sock *unix_sk, uid_t uid)
3596
855d8e77
KI
3597#define INIT_BATCH_SZ 16
3598
3599static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux)
3600{
3601 struct bpf_unix_iter_state *iter = priv_data;
3602 int err;
3603
3604 err = bpf_iter_init_seq_net(priv_data, aux);
3605 if (err)
3606 return err;
3607
3608 err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ);
3609 if (err) {
3610 bpf_iter_fini_seq_net(priv_data);
3611 return err;
3612 }
3613
3614 return 0;
3615}
3616
3617static void bpf_iter_fini_unix(void *priv_data)
3618{
3619 struct bpf_unix_iter_state *iter = priv_data;
3620
3621 bpf_iter_fini_seq_net(priv_data);
3622 kvfree(iter->batch);
3623}
3624
2c860a43
KI
3625static const struct bpf_iter_seq_info unix_seq_info = {
3626 .seq_ops = &bpf_iter_unix_seq_ops,
855d8e77
KI
3627 .init_seq_private = bpf_iter_init_unix,
3628 .fini_seq_private = bpf_iter_fini_unix,
3629 .seq_priv_size = sizeof(struct bpf_unix_iter_state),
2c860a43
KI
3630};
3631
eb7d8f1d
KI
3632static const struct bpf_func_proto *
3633bpf_iter_unix_get_func_proto(enum bpf_func_id func_id,
3634 const struct bpf_prog *prog)
3635{
3636 switch (func_id) {
3637 case BPF_FUNC_setsockopt:
3638 return &bpf_sk_setsockopt_proto;
3639 case BPF_FUNC_getsockopt:
3640 return &bpf_sk_getsockopt_proto;
3641 default:
3642 return NULL;
3643 }
3644}
3645
2c860a43
KI
3646static struct bpf_iter_reg unix_reg_info = {
3647 .target = "unix",
3648 .ctx_arg_info_size = 1,
3649 .ctx_arg_info = {
3650 { offsetof(struct bpf_iter__unix, unix_sk),
3651 PTR_TO_BTF_ID_OR_NULL },
3652 },
eb7d8f1d 3653 .get_func_proto = bpf_iter_unix_get_func_proto,
2c860a43
KI
3654 .seq_info = &unix_seq_info,
3655};
3656
3657static void __init bpf_iter_register(void)
3658{
3659 unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX];
3660 if (bpf_iter_reg_target(&unix_reg_info))
3661 pr_warn("Warning: could not register bpf iterator unix\n");
3662}
3663#endif
3664
1da177e4
LT
3665static int __init af_unix_init(void)
3666{
51bae889 3667 int i, rc = -1;
1da177e4 3668
c593642c 3669 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
1da177e4 3670
51bae889
KI
3671 for (i = 0; i < UNIX_HASH_SIZE / 2; i++) {
3672 spin_lock_init(&bsd_socket_locks[i]);
3673 INIT_HLIST_HEAD(&bsd_socket_buckets[i]);
3674 }
3675
94531cfc
JW
3676 rc = proto_register(&unix_dgram_proto, 1);
3677 if (rc != 0) {
3678 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3679 goto out;
3680 }
3681
3682 rc = proto_register(&unix_stream_proto, 1);
ac7bfa62 3683 if (rc != 0) {
5cc208be 3684 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
73e341e0 3685 proto_unregister(&unix_dgram_proto);
1da177e4
LT
3686 goto out;
3687 }
3688
3689 sock_register(&unix_family_ops);
097e66c5 3690 register_pernet_subsys(&unix_net_ops);
c6382918 3691 unix_bpf_build_proto();
2c860a43 3692
3a04927f 3693#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
2c860a43
KI
3694 bpf_iter_register();
3695#endif
3696
1da177e4
LT
3697out:
3698 return rc;
3699}
3700
3a04927f 3701/* Later than subsys_initcall() because we depend on stuff initialised there */
3d366960 3702fs_initcall(af_unix_init);