Merge tag 'perf-tools-fixes-for-v6.4-1-2023-05-20' of git://git.kernel.org/pub/scm...
[linux-block.git] / net / unix / af_unix.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * NET4: Implementation of BSD Unix domain sockets.
4 *
113aa838 5 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
1da177e4 6 *
1da177e4
LT
7 * Fixes:
8 * Linus Torvalds : Assorted bug cures.
9 * Niibe Yutaka : async I/O support.
10 * Carsten Paeth : PF_UNIX check, address fixes.
11 * Alan Cox : Limit size of allocated blocks.
12 * Alan Cox : Fixed the stupid socketpair bug.
13 * Alan Cox : BSD compatibility fine tuning.
14 * Alan Cox : Fixed a bug in connect when interrupted.
15 * Alan Cox : Sorted out a proper draft version of
16 * file descriptor passing hacked up from
17 * Mike Shaver's work.
18 * Marty Leisner : Fixes to fd passing
19 * Nick Nevin : recvmsg bugfix.
20 * Alan Cox : Started proper garbage collector
21 * Heiko EiBfeldt : Missing verify_area check
22 * Alan Cox : Started POSIXisms
23 * Andreas Schwab : Replace inode by dentry for proper
24 * reference counting
25 * Kirk Petersen : Made this a module
26 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
27 * Lots of bug fixes.
28 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
29 * by above two patches.
30 * Andrea Arcangeli : If possible we block in connect(2)
31 * if the max backlog of the listen socket
32 * is been reached. This won't break
33 * old apps and it will avoid huge amount
34 * of socks hashed (this for unix_gc()
35 * performances reasons).
36 * Security fix that limits the max
37 * number of socks to 2*max_files and
38 * the number of skb queueable in the
39 * dgram receiver.
40 * Artur Skawina : Hash function optimizations
41 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
42 * Malcolm Beattie : Set peercred for socketpair
43 * Michal Ostrowski : Module initialization cleanup.
44 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
45 * the core infrastructure is doing that
46 * for all net proto families now (2.5.69+)
47 *
1da177e4
LT
48 * Known differences from reference BSD that was tested:
49 *
50 * [TO FIX]
51 * ECONNREFUSED is not returned from one end of a connected() socket to the
52 * other the moment one end closes.
53 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
54 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
55 * [NOT TO FIX]
56 * accept() returns a path name even if the connecting socket has closed
57 * in the meantime (BSD loses the path and gives up).
58 * accept() returns 0 length path for an unbound connector. BSD returns 16
59 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
61 * BSD af_unix apparently has connect forgetting to block properly.
62 * (need to check this with the POSIX spec in detail)
63 *
64 * Differences from 2.0.0-11-... (ANK)
65 * Bug fixes and improvements.
66 * - client shutdown killed server socket.
67 * - removed all useless cli/sti pairs.
68 *
69 * Semantic changes/extensions.
70 * - generic control message passing.
71 * - SCM_CREDENTIALS control message.
72 * - "Abstract" (not FS based) socket bindings.
73 * Abstract names are sequences of bytes (not zero terminated)
74 * started by 0, so that this name space does not intersect
75 * with BSD names.
76 */
77
5cc208be 78#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
79
1da177e4 80#include <linux/module.h>
1da177e4 81#include <linux/kernel.h>
1da177e4 82#include <linux/signal.h>
3f07c014 83#include <linux/sched/signal.h>
1da177e4
LT
84#include <linux/errno.h>
85#include <linux/string.h>
86#include <linux/stat.h>
87#include <linux/dcache.h>
88#include <linux/namei.h>
89#include <linux/socket.h>
90#include <linux/un.h>
91#include <linux/fcntl.h>
b6459415 92#include <linux/filter.h>
1da177e4
LT
93#include <linux/termios.h>
94#include <linux/sockios.h>
95#include <linux/net.h>
96#include <linux/in.h>
97#include <linux/fs.h>
98#include <linux/slab.h>
7c0f6ba6 99#include <linux/uaccess.h>
1da177e4
LT
100#include <linux/skbuff.h>
101#include <linux/netdevice.h>
457c4cbc 102#include <net/net_namespace.h>
1da177e4 103#include <net/sock.h>
c752f073 104#include <net/tcp_states.h>
1da177e4
LT
105#include <net/af_unix.h>
106#include <linux/proc_fs.h>
107#include <linux/seq_file.h>
108#include <net/scm.h>
109#include <linux/init.h>
110#include <linux/poll.h>
1da177e4
LT
111#include <linux/rtnetlink.h>
112#include <linux/mount.h>
113#include <net/checksum.h>
114#include <linux/security.h>
509f15b9 115#include <linux/splice.h>
2b15af6f 116#include <linux/freezer.h>
ba94f308 117#include <linux/file.h>
2c860a43 118#include <linux/btf_ids.h>
1da177e4 119
f4e65870
JA
120#include "scm.h"
121
518de9b3 122static atomic_long_t unix_nr_socks;
51bae889
KI
123static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2];
124static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
1da177e4 125
f452be49 126/* SMP locking strategy:
2f7ca90a
KI
127 * hash table is protected with spinlock.
128 * each socket state is protected by separate spinlock.
f452be49 129 */
1da177e4 130
f452be49 131static unsigned int unix_unbound_hash(struct sock *sk)
7123aaa3 132{
f452be49 133 unsigned long hash = (unsigned long)sk;
7123aaa3
ED
134
135 hash ^= hash >> 16;
136 hash ^= hash >> 8;
f452be49
KI
137 hash ^= sk->sk_type;
138
cf21b355 139 return hash & UNIX_HASH_MOD;
f452be49
KI
140}
141
142static unsigned int unix_bsd_hash(struct inode *i)
143{
f302d180 144 return i->i_ino & UNIX_HASH_MOD;
f452be49
KI
145}
146
147static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr,
148 int addr_len, int type)
149{
150 __wsum csum = csum_partial(sunaddr, addr_len, 0);
151 unsigned int hash;
152
153 hash = (__force unsigned int)csum_fold(csum);
154 hash ^= hash >> 8;
155 hash ^= type;
156
cf21b355 157 return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD);
7123aaa3
ED
158}
159
79b05bea
KI
160static void unix_table_double_lock(struct net *net,
161 unsigned int hash1, unsigned int hash2)
afd20b92 162{
cf21b355
KI
163 if (hash1 == hash2) {
164 spin_lock(&net->unx.table.locks[hash1]);
165 return;
166 }
167
afd20b92
KI
168 if (hash1 > hash2)
169 swap(hash1, hash2);
170
79b05bea
KI
171 spin_lock(&net->unx.table.locks[hash1]);
172 spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING);
afd20b92
KI
173}
174
79b05bea
KI
175static void unix_table_double_unlock(struct net *net,
176 unsigned int hash1, unsigned int hash2)
afd20b92 177{
cf21b355
KI
178 if (hash1 == hash2) {
179 spin_unlock(&net->unx.table.locks[hash1]);
180 return;
181 }
182
79b05bea
KI
183 spin_unlock(&net->unx.table.locks[hash1]);
184 spin_unlock(&net->unx.table.locks[hash2]);
afd20b92
KI
185}
186
877ce7c1 187#ifdef CONFIG_SECURITY_NETWORK
dc49c1f9 188static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
877ce7c1 189{
37a9a8df 190 UNIXCB(skb).secid = scm->secid;
877ce7c1
CZ
191}
192
193static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
194{
37a9a8df
SS
195 scm->secid = UNIXCB(skb).secid;
196}
197
198static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
199{
200 return (scm->secid == UNIXCB(skb).secid);
877ce7c1
CZ
201}
202#else
dc49c1f9 203static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
877ce7c1
CZ
204{ }
205
206static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
207{ }
37a9a8df
SS
208
209static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
210{
211 return true;
212}
877ce7c1
CZ
213#endif /* CONFIG_SECURITY_NETWORK */
214
1da177e4
LT
215#define unix_peer(sk) (unix_sk(sk)->peer)
216
217static inline int unix_our_peer(struct sock *sk, struct sock *osk)
218{
219 return unix_peer(osk) == sk;
220}
221
222static inline int unix_may_send(struct sock *sk, struct sock *osk)
223{
6eba6a37 224 return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
1da177e4
LT
225}
226
86b18aaa 227static inline int unix_recvq_full(const struct sock *sk)
3c73419c
RW
228{
229 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
230}
231
86b18aaa
QC
232static inline int unix_recvq_full_lockless(const struct sock *sk)
233{
234 return skb_queue_len_lockless(&sk->sk_receive_queue) >
235 READ_ONCE(sk->sk_max_ack_backlog);
236}
237
fa7ff56f 238struct sock *unix_peer_get(struct sock *s)
1da177e4
LT
239{
240 struct sock *peer;
241
1c92b4e5 242 unix_state_lock(s);
1da177e4
LT
243 peer = unix_peer(s);
244 if (peer)
245 sock_hold(peer);
1c92b4e5 246 unix_state_unlock(s);
1da177e4
LT
247 return peer;
248}
fa7ff56f 249EXPORT_SYMBOL_GPL(unix_peer_get);
1da177e4 250
12f21c49
KI
251static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr,
252 int addr_len)
253{
254 struct unix_address *addr;
255
256 addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL);
257 if (!addr)
258 return NULL;
259
260 refcount_set(&addr->refcnt, 1);
261 addr->len = addr_len;
262 memcpy(addr->name, sunaddr, addr_len);
263
264 return addr;
265}
266
1da177e4
LT
267static inline void unix_release_addr(struct unix_address *addr)
268{
8c9814b9 269 if (refcount_dec_and_test(&addr->refcnt))
1da177e4
LT
270 kfree(addr);
271}
272
273/*
274 * Check unix socket name:
275 * - should be not zero length.
276 * - if started by not zero, should be NULL terminated (FS object)
277 * - if started by zero, it is abstract name.
278 */
ac7bfa62 279
b8a58aa6
KI
280static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len)
281{
282 if (addr_len <= offsetof(struct sockaddr_un, sun_path) ||
283 addr_len > sizeof(*sunaddr))
284 return -EINVAL;
285
286 if (sunaddr->sun_family != AF_UNIX)
287 return -EINVAL;
288
289 return 0;
290}
291
d2d8c9fd
KI
292static void unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len)
293{
294 /* This may look like an off by one error but it is a bit more
295 * subtle. 108 is the longest valid AF_UNIX path for a binding.
296 * sun_path[108] doesn't as such exist. However in kernel space
297 * we are guaranteed that it is a valid memory location in our
298 * kernel address buffer because syscall functions always pass
299 * a pointer of struct sockaddr_storage which has a bigger buffer
300 * than 108.
301 */
302 ((char *)sunaddr)[addr_len] = 0;
303}
304
1da177e4
LT
305static void __unix_remove_socket(struct sock *sk)
306{
307 sk_del_node_init(sk);
308}
309
cf2f225e 310static void __unix_insert_socket(struct net *net, struct sock *sk)
1da177e4 311{
dd29c67d 312 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
cf2f225e 313 sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]);
1da177e4
LT
314}
315
cf2f225e
KI
316static void __unix_set_addr_hash(struct net *net, struct sock *sk,
317 struct unix_address *addr, unsigned int hash)
185ab886
AV
318{
319 __unix_remove_socket(sk);
320 smp_store_release(&unix_sk(sk)->addr, addr);
e6b4b873
KI
321
322 sk->sk_hash = hash;
cf2f225e 323 __unix_insert_socket(net, sk);
185ab886
AV
324}
325
79b05bea 326static void unix_remove_socket(struct net *net, struct sock *sk)
1da177e4 327{
79b05bea 328 spin_lock(&net->unx.table.locks[sk->sk_hash]);
1da177e4 329 __unix_remove_socket(sk);
79b05bea 330 spin_unlock(&net->unx.table.locks[sk->sk_hash]);
1da177e4
LT
331}
332
79b05bea 333static void unix_insert_unbound_socket(struct net *net, struct sock *sk)
1da177e4 334{
79b05bea 335 spin_lock(&net->unx.table.locks[sk->sk_hash]);
cf2f225e 336 __unix_insert_socket(net, sk);
79b05bea 337 spin_unlock(&net->unx.table.locks[sk->sk_hash]);
1da177e4
LT
338}
339
51bae889
KI
340static void unix_insert_bsd_socket(struct sock *sk)
341{
342 spin_lock(&bsd_socket_locks[sk->sk_hash]);
343 sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]);
344 spin_unlock(&bsd_socket_locks[sk->sk_hash]);
345}
346
347static void unix_remove_bsd_socket(struct sock *sk)
348{
349 if (!hlist_unhashed(&sk->sk_bind_node)) {
350 spin_lock(&bsd_socket_locks[sk->sk_hash]);
351 __sk_del_bind_node(sk);
352 spin_unlock(&bsd_socket_locks[sk->sk_hash]);
353
354 sk_node_init(&sk->sk_bind_node);
355 }
356}
357
097e66c5
DL
358static struct sock *__unix_find_socket_byname(struct net *net,
359 struct sockaddr_un *sunname,
be752283 360 int len, unsigned int hash)
1da177e4
LT
361{
362 struct sock *s;
1da177e4 363
cf2f225e 364 sk_for_each(s, &net->unx.table.buckets[hash]) {
1da177e4
LT
365 struct unix_sock *u = unix_sk(s);
366
367 if (u->addr->len == len &&
368 !memcmp(u->addr->name, sunname, len))
262ce0af 369 return s;
1da177e4 370 }
262ce0af 371 return NULL;
1da177e4
LT
372}
373
097e66c5
DL
374static inline struct sock *unix_find_socket_byname(struct net *net,
375 struct sockaddr_un *sunname,
be752283 376 int len, unsigned int hash)
1da177e4
LT
377{
378 struct sock *s;
379
79b05bea 380 spin_lock(&net->unx.table.locks[hash]);
be752283 381 s = __unix_find_socket_byname(net, sunname, len, hash);
1da177e4
LT
382 if (s)
383 sock_hold(s);
79b05bea 384 spin_unlock(&net->unx.table.locks[hash]);
1da177e4
LT
385 return s;
386}
387
51bae889 388static struct sock *unix_find_socket_byinode(struct inode *i)
1da177e4 389{
f452be49 390 unsigned int hash = unix_bsd_hash(i);
1da177e4 391 struct sock *s;
1da177e4 392
51bae889
KI
393 spin_lock(&bsd_socket_locks[hash]);
394 sk_for_each_bound(s, &bsd_socket_buckets[hash]) {
40ffe67d 395 struct dentry *dentry = unix_sk(s)->path.dentry;
1da177e4 396
beef5121 397 if (dentry && d_backing_inode(dentry) == i) {
1da177e4 398 sock_hold(s);
51bae889 399 spin_unlock(&bsd_socket_locks[hash]);
afd20b92 400 return s;
1da177e4
LT
401 }
402 }
51bae889 403 spin_unlock(&bsd_socket_locks[hash]);
afd20b92 404 return NULL;
1da177e4
LT
405}
406
7d267278
RW
407/* Support code for asymmetrically connected dgram sockets
408 *
409 * If a datagram socket is connected to a socket not itself connected
410 * to the first socket (eg, /dev/log), clients may only enqueue more
411 * messages if the present receive queue of the server socket is not
412 * "too large". This means there's a second writeability condition
413 * poll and sendmsg need to test. The dgram recv code will do a wake
414 * up on the peer_wait wait queue of a socket upon reception of a
415 * datagram which needs to be propagated to sleeping would-be writers
416 * since these might not have sent anything so far. This can't be
417 * accomplished via poll_wait because the lifetime of the server
418 * socket might be less than that of its clients if these break their
419 * association with it or if the server socket is closed while clients
420 * are still connected to it and there's no way to inform "a polling
421 * implementation" that it should let go of a certain wait queue
422 *
ac6424b9 423 * In order to propagate a wake up, a wait_queue_entry_t of the client
7d267278
RW
424 * socket is enqueued on the peer_wait queue of the server socket
425 * whose wake function does a wake_up on the ordinary client socket
426 * wait queue. This connection is established whenever a write (or
427 * poll for write) hit the flow control condition and broken when the
428 * association to the server socket is dissolved or after a wake up
429 * was relayed.
430 */
431
ac6424b9 432static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
7d267278
RW
433 void *key)
434{
435 struct unix_sock *u;
436 wait_queue_head_t *u_sleep;
437
438 u = container_of(q, struct unix_sock, peer_wake);
439
440 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
441 q);
442 u->peer_wake.private = NULL;
443
444 /* relaying can only happen while the wq still exists */
445 u_sleep = sk_sleep(&u->sk);
446 if (u_sleep)
3ad6f93e 447 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
7d267278
RW
448
449 return 0;
450}
451
452static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
453{
454 struct unix_sock *u, *u_other;
455 int rc;
456
457 u = unix_sk(sk);
458 u_other = unix_sk(other);
459 rc = 0;
460 spin_lock(&u_other->peer_wait.lock);
461
462 if (!u->peer_wake.private) {
463 u->peer_wake.private = other;
464 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
465
466 rc = 1;
467 }
468
469 spin_unlock(&u_other->peer_wait.lock);
470 return rc;
471}
472
473static void unix_dgram_peer_wake_disconnect(struct sock *sk,
474 struct sock *other)
475{
476 struct unix_sock *u, *u_other;
477
478 u = unix_sk(sk);
479 u_other = unix_sk(other);
480 spin_lock(&u_other->peer_wait.lock);
481
482 if (u->peer_wake.private == other) {
483 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
484 u->peer_wake.private = NULL;
485 }
486
487 spin_unlock(&u_other->peer_wait.lock);
488}
489
490static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
491 struct sock *other)
492{
493 unix_dgram_peer_wake_disconnect(sk, other);
494 wake_up_interruptible_poll(sk_sleep(sk),
a9a08845
LT
495 EPOLLOUT |
496 EPOLLWRNORM |
497 EPOLLWRBAND);
7d267278
RW
498}
499
500/* preconditions:
501 * - unix_peer(sk) == other
502 * - association is stable
503 */
504static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
505{
506 int connected;
507
508 connected = unix_dgram_peer_wake_connect(sk, other);
509
51f7e951
JB
510 /* If other is SOCK_DEAD, we want to make sure we signal
511 * POLLOUT, such that a subsequent write() can get a
512 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
513 * to other and its full, we will hang waiting for POLLOUT.
514 */
662a8094 515 if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD))
7d267278
RW
516 return 1;
517
518 if (connected)
519 unix_dgram_peer_wake_disconnect(sk, other);
520
521 return 0;
522}
523
1586a587 524static int unix_writable(const struct sock *sk)
1da177e4 525{
1586a587 526 return sk->sk_state != TCP_LISTEN &&
14afee4b 527 (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
1da177e4
LT
528}
529
530static void unix_write_space(struct sock *sk)
531{
43815482
ED
532 struct socket_wq *wq;
533
534 rcu_read_lock();
1da177e4 535 if (unix_writable(sk)) {
43815482 536 wq = rcu_dereference(sk->sk_wq);
1ce0bf50 537 if (skwq_has_sleeper(wq))
67426b75 538 wake_up_interruptible_sync_poll(&wq->wait,
a9a08845 539 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
8d8ad9d7 540 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
1da177e4 541 }
43815482 542 rcu_read_unlock();
1da177e4
LT
543}
544
545/* When dgram socket disconnects (or changes its peer), we clear its receive
546 * queue of packets arrived from previous peer. First, it allows to do
547 * flow control based only on wmem_alloc; second, sk connected to peer
548 * may receive messages only from that peer. */
549static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
550{
b03efcfb 551 if (!skb_queue_empty(&sk->sk_receive_queue)) {
1da177e4
LT
552 skb_queue_purge(&sk->sk_receive_queue);
553 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
554
555 /* If one link of bidirectional dgram pipe is disconnected,
556 * we signal error. Messages are lost. Do not make this,
557 * when peer was not connected to us.
558 */
559 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
cc04410a 560 WRITE_ONCE(other->sk_err, ECONNRESET);
e3ae2365 561 sk_error_report(other);
1da177e4
LT
562 }
563 }
dc56ad70 564 other->sk_state = TCP_CLOSE;
1da177e4
LT
565}
566
567static void unix_sock_destructor(struct sock *sk)
568{
569 struct unix_sock *u = unix_sk(sk);
570
571 skb_queue_purge(&sk->sk_receive_queue);
572
dd29c67d
ED
573 DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc));
574 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
575 DEBUG_NET_WARN_ON_ONCE(sk->sk_socket);
1da177e4 576 if (!sock_flag(sk, SOCK_DEAD)) {
5cc208be 577 pr_info("Attempt to release alive unix socket: %p\n", sk);
1da177e4
LT
578 return;
579 }
580
581 if (u->addr)
582 unix_release_addr(u->addr);
583
518de9b3 584 atomic_long_dec(&unix_nr_socks);
a8076d8d 585 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
1da177e4 586#ifdef UNIX_REFCNT_DEBUG
5cc208be 587 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
518de9b3 588 atomic_long_read(&unix_nr_socks));
1da177e4
LT
589#endif
590}
591
ded34e0f 592static void unix_release_sock(struct sock *sk, int embrion)
1da177e4
LT
593{
594 struct unix_sock *u = unix_sk(sk);
1da177e4
LT
595 struct sock *skpair;
596 struct sk_buff *skb;
79b05bea 597 struct path path;
1da177e4
LT
598 int state;
599
79b05bea 600 unix_remove_socket(sock_net(sk), sk);
51bae889 601 unix_remove_bsd_socket(sk);
1da177e4
LT
602
603 /* Clear state */
1c92b4e5 604 unix_state_lock(sk);
1da177e4 605 sock_orphan(sk);
e1d09c2c 606 WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
40ffe67d
AV
607 path = u->path;
608 u->path.dentry = NULL;
609 u->path.mnt = NULL;
1da177e4
LT
610 state = sk->sk_state;
611 sk->sk_state = TCP_CLOSE;
a494bd64
ED
612
613 skpair = unix_peer(sk);
614 unix_peer(sk) = NULL;
615
1c92b4e5 616 unix_state_unlock(sk);
1da177e4 617
7a62ed61
KI
618#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
619 if (u->oob_skb) {
620 kfree_skb(u->oob_skb);
621 u->oob_skb = NULL;
622 }
623#endif
624
1da177e4
LT
625 wake_up_interruptible_all(&u->peer_wait);
626
e27dfcea 627 if (skpair != NULL) {
1da177e4 628 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
1c92b4e5 629 unix_state_lock(skpair);
1da177e4 630 /* No more writes */
e1d09c2c 631 WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
1da177e4 632 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
cc04410a 633 WRITE_ONCE(skpair->sk_err, ECONNRESET);
1c92b4e5 634 unix_state_unlock(skpair);
1da177e4 635 skpair->sk_state_change(skpair);
8d8ad9d7 636 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
1da177e4 637 }
7d267278
RW
638
639 unix_dgram_peer_wake_disconnect(sk, skpair);
1da177e4 640 sock_put(skpair); /* It may now die */
1da177e4
LT
641 }
642
643 /* Try to flush out this socket. Throw out buffers at least */
644
645 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
e27dfcea 646 if (state == TCP_LISTEN)
1da177e4
LT
647 unix_release_sock(skb->sk, 1);
648 /* passed fds are erased in the kfree_skb hook */
73ed5d25 649 UNIXCB(skb).consumed = skb->len;
1da177e4
LT
650 kfree_skb(skb);
651 }
652
40ffe67d
AV
653 if (path.dentry)
654 path_put(&path);
1da177e4
LT
655
656 sock_put(sk);
657
658 /* ---- Socket is dead now and most probably destroyed ---- */
659
660 /*
e04dae84 661 * Fixme: BSD difference: In BSD all sockets connected to us get
1da177e4
LT
662 * ECONNRESET and we die on the spot. In Linux we behave
663 * like files and pipes do and wait for the last
664 * dereference.
665 *
666 * Can't we simply set sock->err?
667 *
668 * What the above comment does talk about? --ANK(980817)
669 */
670
9305cfa4 671 if (unix_tot_inflight)
ac7bfa62 672 unix_gc(); /* Garbage collect fds */
1da177e4
LT
673}
674
109f6e39
EB
675static void init_peercred(struct sock *sk)
676{
35306eb2
ED
677 const struct cred *old_cred;
678 struct pid *old_pid;
679
680 spin_lock(&sk->sk_peer_lock);
681 old_pid = sk->sk_peer_pid;
682 old_cred = sk->sk_peer_cred;
109f6e39
EB
683 sk->sk_peer_pid = get_pid(task_tgid(current));
684 sk->sk_peer_cred = get_current_cred();
35306eb2
ED
685 spin_unlock(&sk->sk_peer_lock);
686
687 put_pid(old_pid);
688 put_cred(old_cred);
109f6e39
EB
689}
690
691static void copy_peercred(struct sock *sk, struct sock *peersk)
692{
35306eb2
ED
693 const struct cred *old_cred;
694 struct pid *old_pid;
695
696 if (sk < peersk) {
697 spin_lock(&sk->sk_peer_lock);
698 spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
699 } else {
700 spin_lock(&peersk->sk_peer_lock);
701 spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
702 }
703 old_pid = sk->sk_peer_pid;
704 old_cred = sk->sk_peer_cred;
109f6e39
EB
705 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
706 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
35306eb2
ED
707
708 spin_unlock(&sk->sk_peer_lock);
709 spin_unlock(&peersk->sk_peer_lock);
710
711 put_pid(old_pid);
712 put_cred(old_cred);
109f6e39
EB
713}
714
1da177e4
LT
715static int unix_listen(struct socket *sock, int backlog)
716{
717 int err;
718 struct sock *sk = sock->sk;
719 struct unix_sock *u = unix_sk(sk);
720
721 err = -EOPNOTSUPP;
6eba6a37
ED
722 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
723 goto out; /* Only stream/seqpacket sockets accept */
1da177e4
LT
724 err = -EINVAL;
725 if (!u->addr)
6eba6a37 726 goto out; /* No listens on an unbound socket */
1c92b4e5 727 unix_state_lock(sk);
1da177e4
LT
728 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
729 goto out_unlock;
730 if (backlog > sk->sk_max_ack_backlog)
731 wake_up_interruptible_all(&u->peer_wait);
732 sk->sk_max_ack_backlog = backlog;
733 sk->sk_state = TCP_LISTEN;
734 /* set credentials so connect can copy them */
109f6e39 735 init_peercred(sk);
1da177e4
LT
736 err = 0;
737
738out_unlock:
1c92b4e5 739 unix_state_unlock(sk);
1da177e4
LT
740out:
741 return err;
742}
743
744static int unix_release(struct socket *);
745static int unix_bind(struct socket *, struct sockaddr *, int);
746static int unix_stream_connect(struct socket *, struct sockaddr *,
747 int addr_len, int flags);
748static int unix_socketpair(struct socket *, struct socket *);
cdfbabfb 749static int unix_accept(struct socket *, struct socket *, int, bool);
9b2c45d4 750static int unix_getname(struct socket *, struct sockaddr *, int);
a11e1d43
LT
751static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
752static __poll_t unix_dgram_poll(struct file *, struct socket *,
753 poll_table *);
1da177e4 754static int unix_ioctl(struct socket *, unsigned int, unsigned long);
5f6beb9e
AB
755#ifdef CONFIG_COMPAT
756static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
757#endif
1da177e4 758static int unix_shutdown(struct socket *, int);
1b784140
YX
759static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
760static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
869e7c62
HFS
761static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
762 size_t size, int flags);
2b514574
HFS
763static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
764 struct pipe_inode_info *, size_t size,
765 unsigned int flags);
1b784140
YX
766static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
767static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
965b57b4
CW
768static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
769static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
1da177e4
LT
770static int unix_dgram_connect(struct socket *, struct sockaddr *,
771 int, int);
1b784140
YX
772static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
773static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
774 int);
1da177e4 775
12663bfc 776static int unix_set_peek_off(struct sock *sk, int val)
f55bb7f9
PE
777{
778 struct unix_sock *u = unix_sk(sk);
779
6e1ce3c3 780 if (mutex_lock_interruptible(&u->iolock))
12663bfc
SL
781 return -EINTR;
782
f55bb7f9 783 sk->sk_peek_off = val;
6e1ce3c3 784 mutex_unlock(&u->iolock);
12663bfc
SL
785
786 return 0;
f55bb7f9
PE
787}
788
5c05a164 789#ifdef CONFIG_PROC_FS
de437089
KT
790static int unix_count_nr_fds(struct sock *sk)
791{
792 struct sk_buff *skb;
793 struct unix_sock *u;
794 int nr_fds = 0;
795
796 spin_lock(&sk->sk_receive_queue.lock);
797 skb = skb_peek(&sk->sk_receive_queue);
798 while (skb) {
799 u = unix_sk(skb->sk);
800 nr_fds += atomic_read(&u->scm_stat.nr_fds);
801 skb = skb_peek_next(skb, &sk->sk_receive_queue);
802 }
803 spin_unlock(&sk->sk_receive_queue.lock);
804
805 return nr_fds;
806}
807
3c32da19
KT
808static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
809{
810 struct sock *sk = sock->sk;
b27401a3 811 unsigned char s_state;
3c32da19 812 struct unix_sock *u;
b27401a3 813 int nr_fds = 0;
3c32da19
KT
814
815 if (sk) {
b27401a3 816 s_state = READ_ONCE(sk->sk_state);
de437089 817 u = unix_sk(sk);
de437089 818
b27401a3
KT
819 /* SOCK_STREAM and SOCK_SEQPACKET sockets never change their
820 * sk_state after switching to TCP_ESTABLISHED or TCP_LISTEN.
821 * SOCK_DGRAM is ordinary. So, no lock is needed.
822 */
823 if (sock->type == SOCK_DGRAM || s_state == TCP_ESTABLISHED)
de437089 824 nr_fds = atomic_read(&u->scm_stat.nr_fds);
b27401a3 825 else if (s_state == TCP_LISTEN)
de437089 826 nr_fds = unix_count_nr_fds(sk);
b27401a3 827
de437089 828 seq_printf(m, "scm_fds: %u\n", nr_fds);
3c32da19
KT
829 }
830}
3a12500e
TK
831#else
832#define unix_show_fdinfo NULL
833#endif
f55bb7f9 834
90ddc4f0 835static const struct proto_ops unix_stream_ops = {
1da177e4
LT
836 .family = PF_UNIX,
837 .owner = THIS_MODULE,
838 .release = unix_release,
839 .bind = unix_bind,
840 .connect = unix_stream_connect,
841 .socketpair = unix_socketpair,
842 .accept = unix_accept,
843 .getname = unix_getname,
a11e1d43 844 .poll = unix_poll,
1da177e4 845 .ioctl = unix_ioctl,
5f6beb9e
AB
846#ifdef CONFIG_COMPAT
847 .compat_ioctl = unix_compat_ioctl,
848#endif
1da177e4
LT
849 .listen = unix_listen,
850 .shutdown = unix_shutdown,
1da177e4
LT
851 .sendmsg = unix_stream_sendmsg,
852 .recvmsg = unix_stream_recvmsg,
965b57b4 853 .read_skb = unix_stream_read_skb,
1da177e4 854 .mmap = sock_no_mmap,
869e7c62 855 .sendpage = unix_stream_sendpage,
2b514574 856 .splice_read = unix_stream_splice_read,
fc0d7536 857 .set_peek_off = unix_set_peek_off,
3c32da19 858 .show_fdinfo = unix_show_fdinfo,
1da177e4
LT
859};
860
90ddc4f0 861static const struct proto_ops unix_dgram_ops = {
1da177e4
LT
862 .family = PF_UNIX,
863 .owner = THIS_MODULE,
864 .release = unix_release,
865 .bind = unix_bind,
866 .connect = unix_dgram_connect,
867 .socketpair = unix_socketpair,
868 .accept = sock_no_accept,
869 .getname = unix_getname,
a11e1d43 870 .poll = unix_dgram_poll,
1da177e4 871 .ioctl = unix_ioctl,
5f6beb9e
AB
872#ifdef CONFIG_COMPAT
873 .compat_ioctl = unix_compat_ioctl,
874#endif
1da177e4
LT
875 .listen = sock_no_listen,
876 .shutdown = unix_shutdown,
1da177e4 877 .sendmsg = unix_dgram_sendmsg,
965b57b4 878 .read_skb = unix_read_skb,
1da177e4
LT
879 .recvmsg = unix_dgram_recvmsg,
880 .mmap = sock_no_mmap,
881 .sendpage = sock_no_sendpage,
f55bb7f9 882 .set_peek_off = unix_set_peek_off,
3c32da19 883 .show_fdinfo = unix_show_fdinfo,
1da177e4
LT
884};
885
90ddc4f0 886static const struct proto_ops unix_seqpacket_ops = {
1da177e4
LT
887 .family = PF_UNIX,
888 .owner = THIS_MODULE,
889 .release = unix_release,
890 .bind = unix_bind,
891 .connect = unix_stream_connect,
892 .socketpair = unix_socketpair,
893 .accept = unix_accept,
894 .getname = unix_getname,
a11e1d43 895 .poll = unix_dgram_poll,
1da177e4 896 .ioctl = unix_ioctl,
5f6beb9e
AB
897#ifdef CONFIG_COMPAT
898 .compat_ioctl = unix_compat_ioctl,
899#endif
1da177e4
LT
900 .listen = unix_listen,
901 .shutdown = unix_shutdown,
1da177e4 902 .sendmsg = unix_seqpacket_sendmsg,
a05d2ad1 903 .recvmsg = unix_seqpacket_recvmsg,
1da177e4
LT
904 .mmap = sock_no_mmap,
905 .sendpage = sock_no_sendpage,
f55bb7f9 906 .set_peek_off = unix_set_peek_off,
3c32da19 907 .show_fdinfo = unix_show_fdinfo,
1da177e4
LT
908};
909
c7272e15
CW
910static void unix_close(struct sock *sk, long timeout)
911{
912 /* Nothing to do here, unix socket does not need a ->close().
913 * This is merely for sockmap.
914 */
915}
916
94531cfc
JW
917static void unix_unhash(struct sock *sk)
918{
919 /* Nothing to do here, unix socket does not need a ->unhash().
920 * This is merely for sockmap.
921 */
922}
923
924struct proto unix_dgram_proto = {
0edf0824 925 .name = "UNIX",
248969ae 926 .owner = THIS_MODULE,
248969ae 927 .obj_size = sizeof(struct unix_sock),
c7272e15 928 .close = unix_close,
c6382918 929#ifdef CONFIG_BPF_SYSCALL
94531cfc 930 .psock_update_sk_prot = unix_dgram_bpf_update_proto,
c6382918 931#endif
1da177e4
LT
932};
933
94531cfc
JW
934struct proto unix_stream_proto = {
935 .name = "UNIX-STREAM",
248969ae 936 .owner = THIS_MODULE,
248969ae 937 .obj_size = sizeof(struct unix_sock),
c7272e15 938 .close = unix_close,
94531cfc 939 .unhash = unix_unhash,
c6382918 940#ifdef CONFIG_BPF_SYSCALL
94531cfc 941 .psock_update_sk_prot = unix_stream_bpf_update_proto,
c6382918 942#endif
1da177e4
LT
943};
944
94531cfc 945static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
1da177e4 946{
1da177e4 947 struct unix_sock *u;
f4bd73b5
KI
948 struct sock *sk;
949 int err;
1da177e4 950
518de9b3 951 atomic_long_inc(&unix_nr_socks);
f4bd73b5
KI
952 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) {
953 err = -ENFILE;
954 goto err;
955 }
1da177e4 956
94531cfc
JW
957 if (type == SOCK_STREAM)
958 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
959 else /*dgram and seqpacket */
960 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
961
f4bd73b5
KI
962 if (!sk) {
963 err = -ENOMEM;
964 goto err;
965 }
1da177e4 966
6eba6a37 967 sock_init_data(sock, sk);
1da177e4 968
e6b4b873 969 sk->sk_hash = unix_unbound_hash(sk);
3aa9799e 970 sk->sk_allocation = GFP_KERNEL_ACCOUNT;
1da177e4 971 sk->sk_write_space = unix_write_space;
a0a53c8b 972 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
1da177e4
LT
973 sk->sk_destruct = unix_sock_destructor;
974 u = unix_sk(sk);
40ffe67d
AV
975 u->path.dentry = NULL;
976 u->path.mnt = NULL;
fd19f329 977 spin_lock_init(&u->lock);
516e0cc5 978 atomic_long_set(&u->inflight, 0);
1fd05ba5 979 INIT_LIST_HEAD(&u->link);
6e1ce3c3
LT
980 mutex_init(&u->iolock); /* single task reading lock */
981 mutex_init(&u->bindlock); /* single task binding lock */
1da177e4 982 init_waitqueue_head(&u->peer_wait);
7d267278 983 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
3c32da19 984 memset(&u->scm_stat, 0, sizeof(struct scm_stat));
79b05bea 985 unix_insert_unbound_socket(net, sk);
f4bd73b5 986
340c3d33 987 sock_prot_inuse_add(net, sk->sk_prot, 1);
f4bd73b5 988
1da177e4 989 return sk;
f4bd73b5
KI
990
991err:
992 atomic_long_dec(&unix_nr_socks);
993 return ERR_PTR(err);
1da177e4
LT
994}
995
3f378b68
EP
996static int unix_create(struct net *net, struct socket *sock, int protocol,
997 int kern)
1da177e4 998{
f4bd73b5
KI
999 struct sock *sk;
1000
1da177e4
LT
1001 if (protocol && protocol != PF_UNIX)
1002 return -EPROTONOSUPPORT;
1003
1004 sock->state = SS_UNCONNECTED;
1005
1006 switch (sock->type) {
1007 case SOCK_STREAM:
1008 sock->ops = &unix_stream_ops;
1009 break;
1010 /*
1011 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
1012 * nothing uses it.
1013 */
1014 case SOCK_RAW:
e27dfcea 1015 sock->type = SOCK_DGRAM;
df561f66 1016 fallthrough;
1da177e4
LT
1017 case SOCK_DGRAM:
1018 sock->ops = &unix_dgram_ops;
1019 break;
1020 case SOCK_SEQPACKET:
1021 sock->ops = &unix_seqpacket_ops;
1022 break;
1023 default:
1024 return -ESOCKTNOSUPPORT;
1025 }
1026
f4bd73b5
KI
1027 sk = unix_create1(net, sock, kern, sock->type);
1028 if (IS_ERR(sk))
1029 return PTR_ERR(sk);
1030
1031 return 0;
1da177e4
LT
1032}
1033
1034static int unix_release(struct socket *sock)
1035{
1036 struct sock *sk = sock->sk;
1037
1038 if (!sk)
1039 return 0;
1040
c7272e15 1041 sk->sk_prot->close(sk, 0);
ded34e0f 1042 unix_release_sock(sk, 0);
1da177e4
LT
1043 sock->sk = NULL;
1044
ded34e0f 1045 return 0;
1da177e4
LT
1046}
1047
51bae889
KI
1048static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len,
1049 int type)
fa39ef0e
KI
1050{
1051 struct inode *inode;
1052 struct path path;
1053 struct sock *sk;
1054 int err;
1055
d2d8c9fd 1056 unix_mkname_bsd(sunaddr, addr_len);
fa39ef0e
KI
1057 err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path);
1058 if (err)
1059 goto fail;
1060
1061 err = path_permission(&path, MAY_WRITE);
1062 if (err)
1063 goto path_put;
1064
1065 err = -ECONNREFUSED;
1066 inode = d_backing_inode(path.dentry);
1067 if (!S_ISSOCK(inode->i_mode))
1068 goto path_put;
1069
51bae889 1070 sk = unix_find_socket_byinode(inode);
fa39ef0e
KI
1071 if (!sk)
1072 goto path_put;
1073
1074 err = -EPROTOTYPE;
1075 if (sk->sk_type == type)
1076 touch_atime(&path);
1077 else
1078 goto sock_put;
1079
1080 path_put(&path);
1081
1082 return sk;
1083
1084sock_put:
1085 sock_put(sk);
1086path_put:
1087 path_put(&path);
1088fail:
aed26f55 1089 return ERR_PTR(err);
fa39ef0e
KI
1090}
1091
1092static struct sock *unix_find_abstract(struct net *net,
1093 struct sockaddr_un *sunaddr,
d2d8c9fd 1094 int addr_len, int type)
fa39ef0e 1095{
f452be49 1096 unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type);
fa39ef0e
KI
1097 struct dentry *dentry;
1098 struct sock *sk;
1099
f452be49 1100 sk = unix_find_socket_byname(net, sunaddr, addr_len, hash);
aed26f55
KI
1101 if (!sk)
1102 return ERR_PTR(-ECONNREFUSED);
fa39ef0e
KI
1103
1104 dentry = unix_sk(sk)->path.dentry;
1105 if (dentry)
1106 touch_atime(&unix_sk(sk)->path);
1107
1108 return sk;
1109}
1110
1111static struct sock *unix_find_other(struct net *net,
1112 struct sockaddr_un *sunaddr,
d2d8c9fd 1113 int addr_len, int type)
fa39ef0e
KI
1114{
1115 struct sock *sk;
1116
1117 if (sunaddr->sun_path[0])
51bae889 1118 sk = unix_find_bsd(sunaddr, addr_len, type);
fa39ef0e 1119 else
d2d8c9fd 1120 sk = unix_find_abstract(net, sunaddr, addr_len, type);
fa39ef0e
KI
1121
1122 return sk;
1123}
1124
f7ed31f4 1125static int unix_autobind(struct sock *sk)
1da177e4 1126{
afd20b92 1127 unsigned int new_hash, old_hash = sk->sk_hash;
1da177e4 1128 struct unix_sock *u = unix_sk(sk);
79b05bea 1129 struct net *net = sock_net(sk);
6eba6a37 1130 struct unix_address *addr;
9acbc584 1131 u32 lastnum, ordernum;
f7ed31f4 1132 int err;
1da177e4 1133
6e1ce3c3 1134 err = mutex_lock_interruptible(&u->bindlock);
37ab4fa7
SL
1135 if (err)
1136 return err;
1da177e4 1137
1da177e4
LT
1138 if (u->addr)
1139 goto out;
1140
1141 err = -ENOMEM;
755662ce
KI
1142 addr = kzalloc(sizeof(*addr) +
1143 offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL);
1da177e4
LT
1144 if (!addr)
1145 goto out;
1146
9acbc584 1147 addr->len = offsetof(struct sockaddr_un, sun_path) + 6;
1da177e4 1148 addr->name->sun_family = AF_UNIX;
8c9814b9 1149 refcount_set(&addr->refcnt, 1);
1da177e4 1150
a251c17a 1151 ordernum = get_random_u32();
9acbc584 1152 lastnum = ordernum & 0xFFFFF;
1da177e4 1153retry:
9acbc584
KI
1154 ordernum = (ordernum + 1) & 0xFFFFF;
1155 sprintf(addr->name->sun_path + 1, "%05x", ordernum);
1da177e4 1156
e6b4b873 1157 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
79b05bea 1158 unix_table_double_lock(net, old_hash, new_hash);
1da177e4 1159
79b05bea
KI
1160 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) {
1161 unix_table_double_unlock(net, old_hash, new_hash);
afd20b92 1162
9acbc584 1163 /* __unix_find_socket_byname() may take long time if many names
8df73ff9
TH
1164 * are already in use.
1165 */
1166 cond_resched();
9acbc584
KI
1167
1168 if (ordernum == lastnum) {
1169 /* Give up if all names seems to be in use. */
8df73ff9 1170 err = -ENOSPC;
9acbc584 1171 unix_release_addr(addr);
8df73ff9
TH
1172 goto out;
1173 }
9acbc584 1174
1da177e4
LT
1175 goto retry;
1176 }
1da177e4 1177
cf2f225e 1178 __unix_set_addr_hash(net, sk, addr, new_hash);
79b05bea 1179 unix_table_double_unlock(net, old_hash, new_hash);
1da177e4
LT
1180 err = 0;
1181
6e1ce3c3 1182out: mutex_unlock(&u->bindlock);
1da177e4
LT
1183 return err;
1184}
1185
12f21c49
KI
1186static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
1187 int addr_len)
faf02010 1188{
71e6be6f
AV
1189 umode_t mode = S_IFSOCK |
1190 (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
afd20b92 1191 unsigned int new_hash, old_hash = sk->sk_hash;
12f21c49 1192 struct unix_sock *u = unix_sk(sk);
79b05bea 1193 struct net *net = sock_net(sk);
abf08576 1194 struct mnt_idmap *idmap;
12f21c49 1195 struct unix_address *addr;
38f7bd94 1196 struct dentry *dentry;
12f21c49 1197 struct path parent;
71e6be6f
AV
1198 int err;
1199
12f21c49
KI
1200 unix_mkname_bsd(sunaddr, addr_len);
1201 addr_len = strlen(sunaddr->sun_path) +
1202 offsetof(struct sockaddr_un, sun_path) + 1;
1203
1204 addr = unix_create_addr(sunaddr, addr_len);
1205 if (!addr)
1206 return -ENOMEM;
1207
38f7bd94
LT
1208 /*
1209 * Get the parent directory, calculate the hash for last
1210 * component.
1211 */
71e6be6f 1212 dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0);
12f21c49
KI
1213 if (IS_ERR(dentry)) {
1214 err = PTR_ERR(dentry);
1215 goto out;
1216 }
faf02010 1217
38f7bd94
LT
1218 /*
1219 * All right, let's create it.
1220 */
abf08576 1221 idmap = mnt_idmap(parent.mnt);
71e6be6f 1222 err = security_path_mknod(&parent, dentry, mode, 0);
56c1731b 1223 if (!err)
abf08576 1224 err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0);
c0c3b8d3 1225 if (err)
12f21c49 1226 goto out_path;
fa42d910 1227 err = mutex_lock_interruptible(&u->bindlock);
c0c3b8d3
AV
1228 if (err)
1229 goto out_unlink;
1230 if (u->addr)
1231 goto out_unlock;
fa42d910 1232
e6b4b873 1233 new_hash = unix_bsd_hash(d_backing_inode(dentry));
79b05bea 1234 unix_table_double_lock(net, old_hash, new_hash);
56c1731b
AV
1235 u->path.mnt = mntget(parent.mnt);
1236 u->path.dentry = dget(dentry);
cf2f225e 1237 __unix_set_addr_hash(net, sk, addr, new_hash);
79b05bea 1238 unix_table_double_unlock(net, old_hash, new_hash);
51bae889 1239 unix_insert_bsd_socket(sk);
fa42d910 1240 mutex_unlock(&u->bindlock);
56c1731b 1241 done_path_create(&parent, dentry);
fa42d910 1242 return 0;
c0c3b8d3
AV
1243
1244out_unlock:
1245 mutex_unlock(&u->bindlock);
1246 err = -EINVAL;
1247out_unlink:
1248 /* failed after successful mknod? unlink what we'd created... */
abf08576 1249 vfs_unlink(idmap, d_inode(parent.dentry), dentry, NULL);
12f21c49 1250out_path:
c0c3b8d3 1251 done_path_create(&parent, dentry);
12f21c49
KI
1252out:
1253 unix_release_addr(addr);
1254 return err == -EEXIST ? -EADDRINUSE : err;
fa42d910
AV
1255}
1256
12f21c49
KI
1257static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
1258 int addr_len)
fa42d910 1259{
afd20b92 1260 unsigned int new_hash, old_hash = sk->sk_hash;
fa42d910 1261 struct unix_sock *u = unix_sk(sk);
79b05bea 1262 struct net *net = sock_net(sk);
12f21c49 1263 struct unix_address *addr;
fa42d910
AV
1264 int err;
1265
12f21c49
KI
1266 addr = unix_create_addr(sunaddr, addr_len);
1267 if (!addr)
1268 return -ENOMEM;
1269
fa42d910
AV
1270 err = mutex_lock_interruptible(&u->bindlock);
1271 if (err)
12f21c49 1272 goto out;
fa42d910
AV
1273
1274 if (u->addr) {
12f21c49
KI
1275 err = -EINVAL;
1276 goto out_mutex;
fa42d910
AV
1277 }
1278
e6b4b873 1279 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
79b05bea 1280 unix_table_double_lock(net, old_hash, new_hash);
12f21c49 1281
79b05bea 1282 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash))
12f21c49
KI
1283 goto out_spin;
1284
cf2f225e 1285 __unix_set_addr_hash(net, sk, addr, new_hash);
79b05bea 1286 unix_table_double_unlock(net, old_hash, new_hash);
fa42d910
AV
1287 mutex_unlock(&u->bindlock);
1288 return 0;
12f21c49
KI
1289
1290out_spin:
79b05bea 1291 unix_table_double_unlock(net, old_hash, new_hash);
12f21c49
KI
1292 err = -EADDRINUSE;
1293out_mutex:
1294 mutex_unlock(&u->bindlock);
1295out:
1296 unix_release_addr(addr);
1297 return err;
fa42d910
AV
1298}
1299
1da177e4
LT
1300static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1301{
e27dfcea 1302 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
5c32a3ed 1303 struct sock *sk = sock->sk;
5c32a3ed 1304 int err;
1da177e4 1305
b8a58aa6
KI
1306 if (addr_len == offsetof(struct sockaddr_un, sun_path) &&
1307 sunaddr->sun_family == AF_UNIX)
f7ed31f4 1308 return unix_autobind(sk);
1da177e4 1309
b8a58aa6
KI
1310 err = unix_validate_addr(sunaddr, addr_len);
1311 if (err)
1312 return err;
1313
12f21c49
KI
1314 if (sunaddr->sun_path[0])
1315 err = unix_bind_bsd(sk, sunaddr, addr_len);
fa42d910 1316 else
12f21c49
KI
1317 err = unix_bind_abstract(sk, sunaddr, addr_len);
1318
1319 return err;
1da177e4
LT
1320}
1321
278a3de5
DM
1322static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1323{
1324 if (unlikely(sk1 == sk2) || !sk2) {
1325 unix_state_lock(sk1);
1326 return;
1327 }
1328 if (sk1 < sk2) {
1329 unix_state_lock(sk1);
1330 unix_state_lock_nested(sk2);
1331 } else {
1332 unix_state_lock(sk2);
1333 unix_state_lock_nested(sk1);
1334 }
1335}
1336
1337static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1338{
1339 if (unlikely(sk1 == sk2) || !sk2) {
1340 unix_state_unlock(sk1);
1341 return;
1342 }
1343 unix_state_unlock(sk1);
1344 unix_state_unlock(sk2);
1345}
1346
1da177e4
LT
1347static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1348 int alen, int flags)
1349{
e27dfcea 1350 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
340c3d33 1351 struct sock *sk = sock->sk;
1da177e4 1352 struct sock *other;
1da177e4
LT
1353 int err;
1354
defbcf2d
MJ
1355 err = -EINVAL;
1356 if (alen < offsetofend(struct sockaddr, sa_family))
1357 goto out;
1358
1da177e4 1359 if (addr->sa_family != AF_UNSPEC) {
b8a58aa6
KI
1360 err = unix_validate_addr(sunaddr, alen);
1361 if (err)
1362 goto out;
1363
1da177e4 1364 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
f7ed31f4
KI
1365 !unix_sk(sk)->addr) {
1366 err = unix_autobind(sk);
1367 if (err)
1368 goto out;
1369 }
1da177e4 1370
278a3de5 1371restart:
340c3d33 1372 other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type);
aed26f55
KI
1373 if (IS_ERR(other)) {
1374 err = PTR_ERR(other);
1da177e4 1375 goto out;
aed26f55 1376 }
1da177e4 1377
278a3de5
DM
1378 unix_state_double_lock(sk, other);
1379
1380 /* Apparently VFS overslept socket death. Retry. */
1381 if (sock_flag(other, SOCK_DEAD)) {
1382 unix_state_double_unlock(sk, other);
1383 sock_put(other);
1384 goto restart;
1385 }
1da177e4
LT
1386
1387 err = -EPERM;
1388 if (!unix_may_send(sk, other))
1389 goto out_unlock;
1390
1391 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1392 if (err)
1393 goto out_unlock;
1394
dc56ad70 1395 sk->sk_state = other->sk_state = TCP_ESTABLISHED;
1da177e4
LT
1396 } else {
1397 /*
1398 * 1003.1g breaking connected state with AF_UNSPEC
1399 */
1400 other = NULL;
278a3de5 1401 unix_state_double_lock(sk, other);
1da177e4
LT
1402 }
1403
1404 /*
1405 * If it was connected, reconnect.
1406 */
1407 if (unix_peer(sk)) {
1408 struct sock *old_peer = unix_peer(sk);
dc56ad70 1409
e27dfcea 1410 unix_peer(sk) = other;
dc56ad70
ED
1411 if (!other)
1412 sk->sk_state = TCP_CLOSE;
7d267278
RW
1413 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1414
278a3de5 1415 unix_state_double_unlock(sk, other);
1da177e4
LT
1416
1417 if (other != old_peer)
1418 unix_dgram_disconnected(sk, old_peer);
1419 sock_put(old_peer);
1420 } else {
e27dfcea 1421 unix_peer(sk) = other;
278a3de5 1422 unix_state_double_unlock(sk, other);
1da177e4 1423 }
83301b53 1424
ac7bfa62 1425 return 0;
1da177e4
LT
1426
1427out_unlock:
278a3de5 1428 unix_state_double_unlock(sk, other);
1da177e4
LT
1429 sock_put(other);
1430out:
1431 return err;
1432}
1433
1434static long unix_wait_for_peer(struct sock *other, long timeo)
48851e9e 1435 __releases(&unix_sk(other)->lock)
1da177e4
LT
1436{
1437 struct unix_sock *u = unix_sk(other);
1438 int sched;
1439 DEFINE_WAIT(wait);
1440
1441 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1442
1443 sched = !sock_flag(other, SOCK_DEAD) &&
1444 !(other->sk_shutdown & RCV_SHUTDOWN) &&
679ed006 1445 unix_recvq_full_lockless(other);
1da177e4 1446
1c92b4e5 1447 unix_state_unlock(other);
1da177e4
LT
1448
1449 if (sched)
1450 timeo = schedule_timeout(timeo);
1451
1452 finish_wait(&u->peer_wait, &wait);
1453 return timeo;
1454}
1455
1456static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1457 int addr_len, int flags)
1458{
e27dfcea 1459 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
340c3d33 1460 struct sock *sk = sock->sk, *newsk = NULL, *other = NULL;
1da177e4 1461 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
340c3d33 1462 struct net *net = sock_net(sk);
1da177e4 1463 struct sk_buff *skb = NULL;
1da177e4 1464 long timeo;
340c3d33
KI
1465 int err;
1466 int st;
1da177e4 1467
b8a58aa6
KI
1468 err = unix_validate_addr(sunaddr, addr_len);
1469 if (err)
1470 goto out;
1471
f7ed31f4
KI
1472 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) {
1473 err = unix_autobind(sk);
1474 if (err)
1475 goto out;
1476 }
1da177e4
LT
1477
1478 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1479
1480 /* First of all allocate resources.
1481 If we will make it after state is locked,
1482 we will have to recheck all again in any case.
1483 */
1484
1da177e4 1485 /* create new sock for complete connection */
340c3d33 1486 newsk = unix_create1(net, NULL, 0, sock->type);
f4bd73b5
KI
1487 if (IS_ERR(newsk)) {
1488 err = PTR_ERR(newsk);
1489 newsk = NULL;
1da177e4 1490 goto out;
f4bd73b5
KI
1491 }
1492
1493 err = -ENOMEM;
1da177e4
LT
1494
1495 /* Allocate skb for sending to listening sock */
1496 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1497 if (skb == NULL)
1498 goto out;
1499
1500restart:
1501 /* Find listening sock. */
d2d8c9fd 1502 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type);
aed26f55
KI
1503 if (IS_ERR(other)) {
1504 err = PTR_ERR(other);
1505 other = NULL;
1da177e4 1506 goto out;
aed26f55 1507 }
1da177e4
LT
1508
1509 /* Latch state of peer */
1c92b4e5 1510 unix_state_lock(other);
1da177e4
LT
1511
1512 /* Apparently VFS overslept socket death. Retry. */
1513 if (sock_flag(other, SOCK_DEAD)) {
1c92b4e5 1514 unix_state_unlock(other);
1da177e4
LT
1515 sock_put(other);
1516 goto restart;
1517 }
1518
1519 err = -ECONNREFUSED;
1520 if (other->sk_state != TCP_LISTEN)
1521 goto out_unlock;
77238f2b
TS
1522 if (other->sk_shutdown & RCV_SHUTDOWN)
1523 goto out_unlock;
1da177e4 1524
3c73419c 1525 if (unix_recvq_full(other)) {
1da177e4
LT
1526 err = -EAGAIN;
1527 if (!timeo)
1528 goto out_unlock;
1529
1530 timeo = unix_wait_for_peer(other, timeo);
1531
1532 err = sock_intr_errno(timeo);
1533 if (signal_pending(current))
1534 goto out;
1535 sock_put(other);
1536 goto restart;
ac7bfa62 1537 }
1da177e4
LT
1538
1539 /* Latch our state.
1540
e5537bfc 1541 It is tricky place. We need to grab our state lock and cannot
1da177e4
LT
1542 drop lock on peer. It is dangerous because deadlock is
1543 possible. Connect to self case and simultaneous
1544 attempt to connect are eliminated by checking socket
1545 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1546 check this before attempt to grab lock.
1547
1548 Well, and we have to recheck the state after socket locked.
1549 */
1550 st = sk->sk_state;
1551
1552 switch (st) {
1553 case TCP_CLOSE:
1554 /* This is ok... continue with connect */
1555 break;
1556 case TCP_ESTABLISHED:
1557 /* Socket is already connected */
1558 err = -EISCONN;
1559 goto out_unlock;
1560 default:
1561 err = -EINVAL;
1562 goto out_unlock;
1563 }
1564
1c92b4e5 1565 unix_state_lock_nested(sk);
1da177e4
LT
1566
1567 if (sk->sk_state != st) {
1c92b4e5
DM
1568 unix_state_unlock(sk);
1569 unix_state_unlock(other);
1da177e4
LT
1570 sock_put(other);
1571 goto restart;
1572 }
1573
3610cda5 1574 err = security_unix_stream_connect(sk, other, newsk);
1da177e4 1575 if (err) {
1c92b4e5 1576 unix_state_unlock(sk);
1da177e4
LT
1577 goto out_unlock;
1578 }
1579
1580 /* The way is open! Fastly set all the necessary fields... */
1581
1582 sock_hold(sk);
1583 unix_peer(newsk) = sk;
1584 newsk->sk_state = TCP_ESTABLISHED;
1585 newsk->sk_type = sk->sk_type;
109f6e39 1586 init_peercred(newsk);
1da177e4 1587 newu = unix_sk(newsk);
eaefd110 1588 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1da177e4
LT
1589 otheru = unix_sk(other);
1590
ae3b5641
AV
1591 /* copy address information from listening to new sock
1592 *
1593 * The contents of *(otheru->addr) and otheru->path
1594 * are seen fully set up here, since we have found
2f7ca90a
KI
1595 * otheru in hash under its lock. Insertion into the
1596 * hash chain we'd found it in had been done in an
1597 * earlier critical area protected by the chain's lock,
ae3b5641
AV
1598 * the same one where we'd set *(otheru->addr) contents,
1599 * as well as otheru->path and otheru->addr itself.
1600 *
1601 * Using smp_store_release() here to set newu->addr
1602 * is enough to make those stores, as well as stores
1603 * to newu->path visible to anyone who gets newu->addr
1604 * by smp_load_acquire(). IOW, the same warranties
1605 * as for unix_sock instances bound in unix_bind() or
1606 * in unix_autobind().
1607 */
40ffe67d
AV
1608 if (otheru->path.dentry) {
1609 path_get(&otheru->path);
1610 newu->path = otheru->path;
1da177e4 1611 }
ae3b5641
AV
1612 refcount_inc(&otheru->addr->refcnt);
1613 smp_store_release(&newu->addr, otheru->addr);
1da177e4
LT
1614
1615 /* Set credentials */
109f6e39 1616 copy_peercred(sk, other);
1da177e4 1617
1da177e4
LT
1618 sock->state = SS_CONNECTED;
1619 sk->sk_state = TCP_ESTABLISHED;
830a1e5c
BL
1620 sock_hold(newsk);
1621
4e857c58 1622 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
830a1e5c 1623 unix_peer(sk) = newsk;
1da177e4 1624
1c92b4e5 1625 unix_state_unlock(sk);
1da177e4 1626
4e03d073 1627 /* take ten and send info to listening sock */
1da177e4
LT
1628 spin_lock(&other->sk_receive_queue.lock);
1629 __skb_queue_tail(&other->sk_receive_queue, skb);
1da177e4 1630 spin_unlock(&other->sk_receive_queue.lock);
1c92b4e5 1631 unix_state_unlock(other);
676d2369 1632 other->sk_data_ready(other);
1da177e4
LT
1633 sock_put(other);
1634 return 0;
1635
1636out_unlock:
1637 if (other)
1c92b4e5 1638 unix_state_unlock(other);
1da177e4
LT
1639
1640out:
40d44446 1641 kfree_skb(skb);
1da177e4
LT
1642 if (newsk)
1643 unix_release_sock(newsk, 0);
1644 if (other)
1645 sock_put(other);
1646 return err;
1647}
1648
1649static int unix_socketpair(struct socket *socka, struct socket *sockb)
1650{
e27dfcea 1651 struct sock *ska = socka->sk, *skb = sockb->sk;
1da177e4
LT
1652
1653 /* Join our sockets back to back */
1654 sock_hold(ska);
1655 sock_hold(skb);
e27dfcea
JK
1656 unix_peer(ska) = skb;
1657 unix_peer(skb) = ska;
109f6e39
EB
1658 init_peercred(ska);
1659 init_peercred(skb);
1da177e4 1660
83301b53
CW
1661 ska->sk_state = TCP_ESTABLISHED;
1662 skb->sk_state = TCP_ESTABLISHED;
1663 socka->state = SS_CONNECTED;
1664 sockb->state = SS_CONNECTED;
1da177e4
LT
1665 return 0;
1666}
1667
90c6bd34
DB
1668static void unix_sock_inherit_flags(const struct socket *old,
1669 struct socket *new)
1670{
1671 if (test_bit(SOCK_PASSCRED, &old->flags))
1672 set_bit(SOCK_PASSCRED, &new->flags);
1673 if (test_bit(SOCK_PASSSEC, &old->flags))
1674 set_bit(SOCK_PASSSEC, &new->flags);
1675}
1676
cdfbabfb
DH
1677static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1678 bool kern)
1da177e4
LT
1679{
1680 struct sock *sk = sock->sk;
1681 struct sock *tsk;
1682 struct sk_buff *skb;
1683 int err;
1684
1685 err = -EOPNOTSUPP;
6eba6a37 1686 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1da177e4
LT
1687 goto out;
1688
1689 err = -EINVAL;
1690 if (sk->sk_state != TCP_LISTEN)
1691 goto out;
1692
1693 /* If socket state is TCP_LISTEN it cannot change (for now...),
1694 * so that no locks are necessary.
1695 */
1696
f4b41f06
OH
1697 skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
1698 &err);
1da177e4
LT
1699 if (!skb) {
1700 /* This means receive shutdown. */
1701 if (err == 0)
1702 err = -EINVAL;
1703 goto out;
1704 }
1705
1706 tsk = skb->sk;
1707 skb_free_datagram(sk, skb);
1708 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1709
1710 /* attach accepted sock to socket */
1c92b4e5 1711 unix_state_lock(tsk);
1da177e4 1712 newsock->state = SS_CONNECTED;
90c6bd34 1713 unix_sock_inherit_flags(sock, newsock);
1da177e4 1714 sock_graft(tsk, newsock);
1c92b4e5 1715 unix_state_unlock(tsk);
1da177e4
LT
1716 return 0;
1717
1718out:
1719 return err;
1720}
1721
1722
9b2c45d4 1723static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1da177e4
LT
1724{
1725 struct sock *sk = sock->sk;
ae3b5641 1726 struct unix_address *addr;
13cfa97b 1727 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1da177e4
LT
1728 int err = 0;
1729
1730 if (peer) {
1731 sk = unix_peer_get(sk);
1732
1733 err = -ENOTCONN;
1734 if (!sk)
1735 goto out;
1736 err = 0;
1737 } else {
1738 sock_hold(sk);
1739 }
1740
ae3b5641
AV
1741 addr = smp_load_acquire(&unix_sk(sk)->addr);
1742 if (!addr) {
1da177e4
LT
1743 sunaddr->sun_family = AF_UNIX;
1744 sunaddr->sun_path[0] = 0;
755662ce 1745 err = offsetof(struct sockaddr_un, sun_path);
1da177e4 1746 } else {
9b2c45d4
DV
1747 err = addr->len;
1748 memcpy(sunaddr, addr->name, addr->len);
1da177e4 1749 }
1da177e4
LT
1750 sock_put(sk);
1751out:
1752 return err;
1753}
1754
cbcf0112
MS
1755static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
1756{
1757 scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1758
1759 /*
1760 * Garbage collection of unix sockets starts by selecting a set of
1761 * candidate sockets which have reference only from being in flight
1762 * (total_refs == inflight_refs). This condition is checked once during
1763 * the candidate collection phase, and candidates are marked as such, so
1764 * that non-candidates can later be ignored. While inflight_refs is
1765 * protected by unix_gc_lock, total_refs (file count) is not, hence this
1766 * is an instantaneous decision.
1767 *
1768 * Once a candidate, however, the socket must not be reinstalled into a
1769 * file descriptor while the garbage collection is in progress.
1770 *
1771 * If the above conditions are met, then the directed graph of
1772 * candidates (*) does not change while unix_gc_lock is held.
1773 *
1774 * Any operations that changes the file count through file descriptors
1775 * (dup, close, sendmsg) does not change the graph since candidates are
1776 * not installed in fds.
1777 *
1778 * Dequeing a candidate via recvmsg would install it into an fd, but
1779 * that takes unix_gc_lock to decrement the inflight count, so it's
1780 * serialized with garbage collection.
1781 *
1782 * MSG_PEEK is special in that it does not change the inflight count,
1783 * yet does install the socket into an fd. The following lock/unlock
1784 * pair is to ensure serialization with garbage collection. It must be
1785 * done between incrementing the file count and installing the file into
1786 * an fd.
1787 *
1788 * If garbage collection starts after the barrier provided by the
1789 * lock/unlock, then it will see the elevated refcount and not mark this
1790 * as a candidate. If a garbage collection is already in progress
1791 * before the file count was incremented, then the lock/unlock pair will
1792 * ensure that garbage collection is finished before progressing to
1793 * installing the fd.
1794 *
1795 * (*) A -> B where B is on the queue of A or B is on the queue of C
1796 * which is on the queue of listening socket A.
1797 */
1798 spin_lock(&unix_gc_lock);
1799 spin_unlock(&unix_gc_lock);
1800}
1801
f78a5fda 1802static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
7361c36c
EB
1803{
1804 int err = 0;
16e57262 1805
f78a5fda 1806 UNIXCB(skb).pid = get_pid(scm->pid);
6b0ee8c0
EB
1807 UNIXCB(skb).uid = scm->creds.uid;
1808 UNIXCB(skb).gid = scm->creds.gid;
7361c36c 1809 UNIXCB(skb).fp = NULL;
37a9a8df 1810 unix_get_secdata(scm, skb);
7361c36c
EB
1811 if (scm->fp && send_fds)
1812 err = unix_attach_fds(scm, skb);
1813
1814 skb->destructor = unix_destruct_scm;
1815 return err;
1816}
1817
9490f886
HFS
1818static bool unix_passcred_enabled(const struct socket *sock,
1819 const struct sock *other)
1820{
1821 return test_bit(SOCK_PASSCRED, &sock->flags) ||
1822 !other->sk_socket ||
1823 test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1824}
1825
16e57262
ED
1826/*
1827 * Some apps rely on write() giving SCM_CREDENTIALS
1828 * We include credentials if source or destination socket
1829 * asserted SOCK_PASSCRED.
1830 */
1831static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1832 const struct sock *other)
1833{
6b0ee8c0 1834 if (UNIXCB(skb).pid)
16e57262 1835 return;
9490f886 1836 if (unix_passcred_enabled(sock, other)) {
16e57262 1837 UNIXCB(skb).pid = get_pid(task_tgid(current));
6e0895c2 1838 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
16e57262
ED
1839 }
1840}
1841
9490f886
HFS
1842static int maybe_init_creds(struct scm_cookie *scm,
1843 struct socket *socket,
1844 const struct sock *other)
1845{
1846 int err;
1847 struct msghdr msg = { .msg_controllen = 0 };
1848
1849 err = scm_send(socket, &msg, scm, false);
1850 if (err)
1851 return err;
1852
1853 if (unix_passcred_enabled(socket, other)) {
1854 scm->pid = get_pid(task_tgid(current));
1855 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1856 }
1857 return err;
1858}
1859
1860static bool unix_skb_scm_eq(struct sk_buff *skb,
1861 struct scm_cookie *scm)
1862{
b146cbf2
KC
1863 return UNIXCB(skb).pid == scm->pid &&
1864 uid_eq(UNIXCB(skb).uid, scm->creds.uid) &&
1865 gid_eq(UNIXCB(skb).gid, scm->creds.gid) &&
9490f886
HFS
1866 unix_secdata_eq(scm, skb);
1867}
1868
3c32da19
KT
1869static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
1870{
1871 struct scm_fp_list *fp = UNIXCB(skb).fp;
1872 struct unix_sock *u = unix_sk(sk);
1873
3c32da19 1874 if (unlikely(fp && fp->count))
7782040b 1875 atomic_add(fp->count, &u->scm_stat.nr_fds);
3c32da19
KT
1876}
1877
1878static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
1879{
1880 struct scm_fp_list *fp = UNIXCB(skb).fp;
1881 struct unix_sock *u = unix_sk(sk);
1882
3c32da19 1883 if (unlikely(fp && fp->count))
7782040b 1884 atomic_sub(fp->count, &u->scm_stat.nr_fds);
3c32da19
KT
1885}
1886
1da177e4
LT
1887/*
1888 * Send AF_UNIX data.
1889 */
1890
1b784140
YX
1891static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1892 size_t len)
1da177e4 1893{
342dfc30 1894 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
340c3d33
KI
1895 struct sock *sk = sock->sk, *other = NULL;
1896 struct unix_sock *u = unix_sk(sk);
7cc05662 1897 struct scm_cookie scm;
340c3d33 1898 struct sk_buff *skb;
eb6a2481 1899 int data_len = 0;
7d267278 1900 int sk_locked;
340c3d33
KI
1901 long timeo;
1902 int err;
1da177e4 1903
5f23b734 1904 wait_for_unix_gc();
7cc05662 1905 err = scm_send(sock, msg, &scm, false);
1da177e4
LT
1906 if (err < 0)
1907 return err;
1908
1909 err = -EOPNOTSUPP;
1910 if (msg->msg_flags&MSG_OOB)
1911 goto out;
1912
1913 if (msg->msg_namelen) {
b8a58aa6
KI
1914 err = unix_validate_addr(sunaddr, msg->msg_namelen);
1915 if (err)
1916 goto out;
1da177e4
LT
1917 } else {
1918 sunaddr = NULL;
1919 err = -ENOTCONN;
1920 other = unix_peer_get(sk);
1921 if (!other)
1922 goto out;
1923 }
1924
f7ed31f4
KI
1925 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) {
1926 err = unix_autobind(sk);
1927 if (err)
1928 goto out;
1929 }
1da177e4
LT
1930
1931 err = -EMSGSIZE;
1932 if (len > sk->sk_sndbuf - 32)
1933 goto out;
1934
31ff6aa5 1935 if (len > SKB_MAX_ALLOC) {
eb6a2481
ED
1936 data_len = min_t(size_t,
1937 len - SKB_MAX_ALLOC,
1938 MAX_SKB_FRAGS * PAGE_SIZE);
31ff6aa5
KT
1939 data_len = PAGE_ALIGN(data_len);
1940
1941 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1942 }
eb6a2481
ED
1943
1944 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
28d64271
ED
1945 msg->msg_flags & MSG_DONTWAIT, &err,
1946 PAGE_ALLOC_COSTLY_ORDER);
e27dfcea 1947 if (skb == NULL)
1da177e4
LT
1948 goto out;
1949
7cc05662 1950 err = unix_scm_to_skb(&scm, skb, true);
25888e30 1951 if (err < 0)
7361c36c 1952 goto out_free;
877ce7c1 1953
eb6a2481
ED
1954 skb_put(skb, len - data_len);
1955 skb->data_len = data_len;
1956 skb->len = len;
c0371da6 1957 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1da177e4
LT
1958 if (err)
1959 goto out_free;
1960
1961 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1962
1963restart:
1964 if (!other) {
1965 err = -ECONNRESET;
1966 if (sunaddr == NULL)
1967 goto out_free;
1968
340c3d33 1969 other = unix_find_other(sock_net(sk), sunaddr, msg->msg_namelen,
d2d8c9fd 1970 sk->sk_type);
aed26f55
KI
1971 if (IS_ERR(other)) {
1972 err = PTR_ERR(other);
1973 other = NULL;
1da177e4 1974 goto out_free;
aed26f55 1975 }
1da177e4
LT
1976 }
1977
d6ae3bae
AC
1978 if (sk_filter(other, skb) < 0) {
1979 /* Toss the packet but do not return any error to the sender */
1980 err = len;
1981 goto out_free;
1982 }
1983
7d267278 1984 sk_locked = 0;
1c92b4e5 1985 unix_state_lock(other);
7d267278 1986restart_locked:
1da177e4
LT
1987 err = -EPERM;
1988 if (!unix_may_send(sk, other))
1989 goto out_unlock;
1990
7d267278 1991 if (unlikely(sock_flag(other, SOCK_DEAD))) {
1da177e4
LT
1992 /*
1993 * Check with 1003.1g - what should
1994 * datagram error
1995 */
1c92b4e5 1996 unix_state_unlock(other);
1da177e4
LT
1997 sock_put(other);
1998
7d267278
RW
1999 if (!sk_locked)
2000 unix_state_lock(sk);
2001
1da177e4 2002 err = 0;
3ff8bff7
KT
2003 if (sk->sk_type == SOCK_SEQPACKET) {
2004 /* We are here only when racing with unix_release_sock()
2005 * is clearing @other. Never change state to TCP_CLOSE
2006 * unlike SOCK_DGRAM wants.
2007 */
2008 unix_state_unlock(sk);
2009 err = -EPIPE;
2010 } else if (unix_peer(sk) == other) {
e27dfcea 2011 unix_peer(sk) = NULL;
7d267278
RW
2012 unix_dgram_peer_wake_disconnect_wakeup(sk, other);
2013
3ff8bff7 2014 sk->sk_state = TCP_CLOSE;
1c92b4e5 2015 unix_state_unlock(sk);
1da177e4
LT
2016
2017 unix_dgram_disconnected(sk, other);
2018 sock_put(other);
2019 err = -ECONNREFUSED;
2020 } else {
1c92b4e5 2021 unix_state_unlock(sk);
1da177e4
LT
2022 }
2023
2024 other = NULL;
2025 if (err)
2026 goto out_free;
2027 goto restart;
2028 }
2029
2030 err = -EPIPE;
2031 if (other->sk_shutdown & RCV_SHUTDOWN)
2032 goto out_unlock;
2033
2034 if (sk->sk_type != SOCK_SEQPACKET) {
2035 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
2036 if (err)
2037 goto out_unlock;
2038 }
2039
a5527dda
RW
2040 /* other == sk && unix_peer(other) != sk if
2041 * - unix_peer(sk) == NULL, destination address bound to sk
2042 * - unix_peer(sk) == sk by time of get but disconnected before lock
2043 */
2044 if (other != sk &&
86b18aaa
QC
2045 unlikely(unix_peer(other) != sk &&
2046 unix_recvq_full_lockless(other))) {
7d267278
RW
2047 if (timeo) {
2048 timeo = unix_wait_for_peer(other, timeo);
2049
2050 err = sock_intr_errno(timeo);
2051 if (signal_pending(current))
2052 goto out_free;
2053
2054 goto restart;
1da177e4
LT
2055 }
2056
7d267278
RW
2057 if (!sk_locked) {
2058 unix_state_unlock(other);
2059 unix_state_double_lock(sk, other);
2060 }
1da177e4 2061
7d267278
RW
2062 if (unix_peer(sk) != other ||
2063 unix_dgram_peer_wake_me(sk, other)) {
2064 err = -EAGAIN;
2065 sk_locked = 1;
2066 goto out_unlock;
2067 }
1da177e4 2068
7d267278
RW
2069 if (!sk_locked) {
2070 sk_locked = 1;
2071 goto restart_locked;
2072 }
1da177e4
LT
2073 }
2074
7d267278
RW
2075 if (unlikely(sk_locked))
2076 unix_state_unlock(sk);
2077
3f66116e
AC
2078 if (sock_flag(other, SOCK_RCVTSTAMP))
2079 __net_timestamp(skb);
16e57262 2080 maybe_add_creds(skb, sock, other);
3c32da19 2081 scm_stat_add(other, skb);
7782040b 2082 skb_queue_tail(&other->sk_receive_queue, skb);
1c92b4e5 2083 unix_state_unlock(other);
676d2369 2084 other->sk_data_ready(other);
1da177e4 2085 sock_put(other);
7cc05662 2086 scm_destroy(&scm);
1da177e4
LT
2087 return len;
2088
2089out_unlock:
7d267278
RW
2090 if (sk_locked)
2091 unix_state_unlock(sk);
1c92b4e5 2092 unix_state_unlock(other);
1da177e4
LT
2093out_free:
2094 kfree_skb(skb);
2095out:
2096 if (other)
2097 sock_put(other);
7cc05662 2098 scm_destroy(&scm);
1da177e4
LT
2099 return err;
2100}
2101
e370a723 2102/* We use paged skbs for stream sockets, and limit occupancy to 32768
d4e9a408 2103 * bytes, and a minimum of a full page.
e370a723
ED
2104 */
2105#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
ac7bfa62 2106
4edf21aa 2107#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2aab4b96
ED
2108static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other,
2109 struct scm_cookie *scm, bool fds_sent)
314001f0
RS
2110{
2111 struct unix_sock *ousk = unix_sk(other);
2112 struct sk_buff *skb;
2113 int err = 0;
2114
2115 skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err);
2116
2117 if (!skb)
2118 return err;
2119
2aab4b96
ED
2120 err = unix_scm_to_skb(scm, skb, !fds_sent);
2121 if (err < 0) {
2122 kfree_skb(skb);
2123 return err;
2124 }
314001f0 2125 skb_put(skb, 1);
314001f0
RS
2126 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
2127
2128 if (err) {
2129 kfree_skb(skb);
2130 return err;
2131 }
2132
2133 unix_state_lock(other);
19eed721
RS
2134
2135 if (sock_flag(other, SOCK_DEAD) ||
2136 (other->sk_shutdown & RCV_SHUTDOWN)) {
2137 unix_state_unlock(other);
2138 kfree_skb(skb);
2139 return -EPIPE;
2140 }
2141
314001f0
RS
2142 maybe_add_creds(skb, sock, other);
2143 skb_get(skb);
2144
2145 if (ousk->oob_skb)
19eed721 2146 consume_skb(ousk->oob_skb);
314001f0 2147
e82025c6 2148 WRITE_ONCE(ousk->oob_skb, skb);
314001f0
RS
2149
2150 scm_stat_add(other, skb);
2151 skb_queue_tail(&other->sk_receive_queue, skb);
2152 sk_send_sigurg(other);
2153 unix_state_unlock(other);
2154 other->sk_data_ready(other);
2155
2156 return err;
2157}
2158#endif
2159
1b784140
YX
2160static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
2161 size_t len)
1da177e4 2162{
1da177e4
LT
2163 struct sock *sk = sock->sk;
2164 struct sock *other = NULL;
6eba6a37 2165 int err, size;
f78a5fda 2166 struct sk_buff *skb;
e27dfcea 2167 int sent = 0;
7cc05662 2168 struct scm_cookie scm;
8ba69ba6 2169 bool fds_sent = false;
e370a723 2170 int data_len;
1da177e4 2171
5f23b734 2172 wait_for_unix_gc();
7cc05662 2173 err = scm_send(sock, msg, &scm, false);
1da177e4
LT
2174 if (err < 0)
2175 return err;
2176
2177 err = -EOPNOTSUPP;
314001f0 2178 if (msg->msg_flags & MSG_OOB) {
4edf21aa 2179#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
314001f0
RS
2180 if (len)
2181 len--;
2182 else
2183#endif
2184 goto out_err;
2185 }
1da177e4
LT
2186
2187 if (msg->msg_namelen) {
2188 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
2189 goto out_err;
2190 } else {
1da177e4 2191 err = -ENOTCONN;
830a1e5c 2192 other = unix_peer(sk);
1da177e4
LT
2193 if (!other)
2194 goto out_err;
2195 }
2196
2197 if (sk->sk_shutdown & SEND_SHUTDOWN)
2198 goto pipe_err;
2199
6eba6a37 2200 while (sent < len) {
e370a723 2201 size = len - sent;
1da177e4
LT
2202
2203 /* Keep two messages in the pipe so it schedules better */
e370a723 2204 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1da177e4 2205
e370a723
ED
2206 /* allow fallback to order-0 allocations */
2207 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
ac7bfa62 2208
e370a723 2209 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1da177e4 2210
31ff6aa5
KT
2211 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
2212
e370a723 2213 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
28d64271
ED
2214 msg->msg_flags & MSG_DONTWAIT, &err,
2215 get_order(UNIX_SKB_FRAGS_SZ));
e370a723 2216 if (!skb)
1da177e4
LT
2217 goto out_err;
2218
f78a5fda 2219 /* Only send the fds in the first buffer */
7cc05662 2220 err = unix_scm_to_skb(&scm, skb, !fds_sent);
25888e30 2221 if (err < 0) {
7361c36c 2222 kfree_skb(skb);
f78a5fda 2223 goto out_err;
6209344f 2224 }
7361c36c 2225 fds_sent = true;
1da177e4 2226
e370a723
ED
2227 skb_put(skb, size - data_len);
2228 skb->data_len = data_len;
2229 skb->len = size;
c0371da6 2230 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
6eba6a37 2231 if (err) {
1da177e4 2232 kfree_skb(skb);
f78a5fda 2233 goto out_err;
1da177e4
LT
2234 }
2235
1c92b4e5 2236 unix_state_lock(other);
1da177e4
LT
2237
2238 if (sock_flag(other, SOCK_DEAD) ||
2239 (other->sk_shutdown & RCV_SHUTDOWN))
2240 goto pipe_err_free;
2241
16e57262 2242 maybe_add_creds(skb, sock, other);
3c32da19 2243 scm_stat_add(other, skb);
7782040b 2244 skb_queue_tail(&other->sk_receive_queue, skb);
1c92b4e5 2245 unix_state_unlock(other);
676d2369 2246 other->sk_data_ready(other);
e27dfcea 2247 sent += size;
1da177e4 2248 }
1da177e4 2249
4edf21aa 2250#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
314001f0 2251 if (msg->msg_flags & MSG_OOB) {
2aab4b96 2252 err = queue_oob(sock, msg, other, &scm, fds_sent);
314001f0
RS
2253 if (err)
2254 goto out_err;
2255 sent++;
2256 }
2257#endif
2258
7cc05662 2259 scm_destroy(&scm);
1da177e4
LT
2260
2261 return sent;
2262
2263pipe_err_free:
1c92b4e5 2264 unix_state_unlock(other);
1da177e4
LT
2265 kfree_skb(skb);
2266pipe_err:
6eba6a37
ED
2267 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
2268 send_sig(SIGPIPE, current, 0);
1da177e4
LT
2269 err = -EPIPE;
2270out_err:
7cc05662 2271 scm_destroy(&scm);
1da177e4
LT
2272 return sent ? : err;
2273}
2274
869e7c62
HFS
2275static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
2276 int offset, size_t size, int flags)
2277{
9490f886
HFS
2278 int err;
2279 bool send_sigpipe = false;
2280 bool init_scm = true;
2281 struct scm_cookie scm;
869e7c62
HFS
2282 struct sock *other, *sk = socket->sk;
2283 struct sk_buff *skb, *newskb = NULL, *tail = NULL;
2284
2285 if (flags & MSG_OOB)
2286 return -EOPNOTSUPP;
2287
2288 other = unix_peer(sk);
2289 if (!other || sk->sk_state != TCP_ESTABLISHED)
2290 return -ENOTCONN;
2291
2292 if (false) {
2293alloc_skb:
2294 unix_state_unlock(other);
6e1ce3c3 2295 mutex_unlock(&unix_sk(other)->iolock);
869e7c62
HFS
2296 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
2297 &err, 0);
2298 if (!newskb)
9490f886 2299 goto err;
869e7c62
HFS
2300 }
2301
6e1ce3c3 2302 /* we must acquire iolock as we modify already present
869e7c62
HFS
2303 * skbs in the sk_receive_queue and mess with skb->len
2304 */
6e1ce3c3 2305 err = mutex_lock_interruptible(&unix_sk(other)->iolock);
869e7c62
HFS
2306 if (err) {
2307 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
869e7c62
HFS
2308 goto err;
2309 }
2310
2311 if (sk->sk_shutdown & SEND_SHUTDOWN) {
2312 err = -EPIPE;
9490f886 2313 send_sigpipe = true;
869e7c62
HFS
2314 goto err_unlock;
2315 }
2316
2317 unix_state_lock(other);
2318
2319 if (sock_flag(other, SOCK_DEAD) ||
2320 other->sk_shutdown & RCV_SHUTDOWN) {
2321 err = -EPIPE;
9490f886 2322 send_sigpipe = true;
869e7c62
HFS
2323 goto err_state_unlock;
2324 }
2325
9490f886
HFS
2326 if (init_scm) {
2327 err = maybe_init_creds(&scm, socket, other);
2328 if (err)
2329 goto err_state_unlock;
2330 init_scm = false;
2331 }
2332
869e7c62
HFS
2333 skb = skb_peek_tail(&other->sk_receive_queue);
2334 if (tail && tail == skb) {
2335 skb = newskb;
9490f886
HFS
2336 } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2337 if (newskb) {
869e7c62 2338 skb = newskb;
9490f886
HFS
2339 } else {
2340 tail = skb;
869e7c62 2341 goto alloc_skb;
9490f886 2342 }
869e7c62
HFS
2343 } else if (newskb) {
2344 /* this is fast path, we don't necessarily need to
2345 * call to kfree_skb even though with newskb == NULL
2346 * this - does no harm
2347 */
2348 consume_skb(newskb);
8844f972 2349 newskb = NULL;
869e7c62
HFS
2350 }
2351
2352 if (skb_append_pagefrags(skb, page, offset, size)) {
2353 tail = skb;
2354 goto alloc_skb;
2355 }
2356
2357 skb->len += size;
2358 skb->data_len += size;
2359 skb->truesize += size;
14afee4b 2360 refcount_add(size, &sk->sk_wmem_alloc);
869e7c62 2361
a3a116e0 2362 if (newskb) {
9490f886
HFS
2363 err = unix_scm_to_skb(&scm, skb, false);
2364 if (err)
2365 goto err_state_unlock;
a3a116e0 2366 spin_lock(&other->sk_receive_queue.lock);
869e7c62 2367 __skb_queue_tail(&other->sk_receive_queue, newskb);
a3a116e0
HFS
2368 spin_unlock(&other->sk_receive_queue.lock);
2369 }
869e7c62
HFS
2370
2371 unix_state_unlock(other);
6e1ce3c3 2372 mutex_unlock(&unix_sk(other)->iolock);
869e7c62
HFS
2373
2374 other->sk_data_ready(other);
9490f886 2375 scm_destroy(&scm);
869e7c62
HFS
2376 return size;
2377
2378err_state_unlock:
2379 unix_state_unlock(other);
2380err_unlock:
6e1ce3c3 2381 mutex_unlock(&unix_sk(other)->iolock);
869e7c62
HFS
2382err:
2383 kfree_skb(newskb);
2384 if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2385 send_sig(SIGPIPE, current, 0);
9490f886
HFS
2386 if (!init_scm)
2387 scm_destroy(&scm);
869e7c62
HFS
2388 return err;
2389}
2390
1b784140
YX
2391static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2392 size_t len)
1da177e4
LT
2393{
2394 int err;
2395 struct sock *sk = sock->sk;
ac7bfa62 2396
1da177e4
LT
2397 err = sock_error(sk);
2398 if (err)
2399 return err;
2400
2401 if (sk->sk_state != TCP_ESTABLISHED)
2402 return -ENOTCONN;
2403
2404 if (msg->msg_namelen)
2405 msg->msg_namelen = 0;
2406
1b784140 2407 return unix_dgram_sendmsg(sock, msg, len);
1da177e4 2408}
ac7bfa62 2409
1b784140
YX
2410static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2411 size_t size, int flags)
a05d2ad1
EB
2412{
2413 struct sock *sk = sock->sk;
2414
2415 if (sk->sk_state != TCP_ESTABLISHED)
2416 return -ENOTCONN;
2417
1b784140 2418 return unix_dgram_recvmsg(sock, msg, size, flags);
a05d2ad1
EB
2419}
2420
1da177e4
LT
2421static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2422{
ae3b5641 2423 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
1da177e4 2424
ae3b5641
AV
2425 if (addr) {
2426 msg->msg_namelen = addr->len;
2427 memcpy(msg->msg_name, addr->name, addr->len);
1da177e4
LT
2428 }
2429}
2430
9825d866
CW
2431int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
2432 int flags)
1da177e4 2433{
7cc05662 2434 struct scm_cookie scm;
9825d866 2435 struct socket *sock = sk->sk_socket;
1da177e4 2436 struct unix_sock *u = unix_sk(sk);
64874280
RW
2437 struct sk_buff *skb, *last;
2438 long timeo;
fd69c399 2439 int skip;
1da177e4
LT
2440 int err;
2441
2442 err = -EOPNOTSUPP;
2443 if (flags&MSG_OOB)
2444 goto out;
2445
64874280 2446 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1da177e4 2447
64874280 2448 do {
6e1ce3c3 2449 mutex_lock(&u->iolock);
f55bb7f9 2450
64874280 2451 skip = sk_peek_offset(sk, flags);
b50b0580 2452 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
e427cad6
PA
2453 &skip, &err, &last);
2454 if (skb) {
2455 if (!(flags & MSG_PEEK))
2456 scm_stat_del(sk, skb);
64874280 2457 break;
e427cad6 2458 }
64874280 2459
6e1ce3c3 2460 mutex_unlock(&u->iolock);
64874280
RW
2461
2462 if (err != -EAGAIN)
2463 break;
2464 } while (timeo &&
b50b0580
SD
2465 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2466 &err, &timeo, last));
64874280 2467
6e1ce3c3 2468 if (!skb) { /* implies iolock unlocked */
0a112258
FZ
2469 unix_state_lock(sk);
2470 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2471 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2472 (sk->sk_shutdown & RCV_SHUTDOWN))
2473 err = 0;
2474 unix_state_unlock(sk);
64874280 2475 goto out;
0a112258 2476 }
1da177e4 2477
77b75f4d
RW
2478 if (wq_has_sleeper(&u->peer_wait))
2479 wake_up_interruptible_sync_poll(&u->peer_wait,
a9a08845
LT
2480 EPOLLOUT | EPOLLWRNORM |
2481 EPOLLWRBAND);
1da177e4
LT
2482
2483 if (msg->msg_name)
2484 unix_copy_addr(msg, skb->sk);
2485
f55bb7f9
PE
2486 if (size > skb->len - skip)
2487 size = skb->len - skip;
2488 else if (size < skb->len - skip)
1da177e4
LT
2489 msg->msg_flags |= MSG_TRUNC;
2490
51f3d02b 2491 err = skb_copy_datagram_msg(skb, skip, msg, size);
1da177e4
LT
2492 if (err)
2493 goto out_free;
2494
3f66116e
AC
2495 if (sock_flag(sk, SOCK_RCVTSTAMP))
2496 __sock_recv_timestamp(msg, sk, skb);
2497
7cc05662
CH
2498 memset(&scm, 0, sizeof(scm));
2499
2500 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2501 unix_set_secdata(&scm, skb);
1da177e4 2502
6eba6a37 2503 if (!(flags & MSG_PEEK)) {
1da177e4 2504 if (UNIXCB(skb).fp)
7cc05662 2505 unix_detach_fds(&scm, skb);
f55bb7f9
PE
2506
2507 sk_peek_offset_bwd(sk, skb->len);
6eba6a37 2508 } else {
1da177e4
LT
2509 /* It is questionable: on PEEK we could:
2510 - do not return fds - good, but too simple 8)
2511 - return fds, and do not return them on read (old strategy,
2512 apparently wrong)
2513 - clone fds (I chose it for now, it is the most universal
2514 solution)
ac7bfa62
YH
2515
2516 POSIX 1003.1g does not actually define this clearly
2517 at all. POSIX 1003.1g doesn't define a lot of things
2518 clearly however!
2519
1da177e4 2520 */
f55bb7f9
PE
2521
2522 sk_peek_offset_fwd(sk, size);
2523
1da177e4 2524 if (UNIXCB(skb).fp)
cbcf0112 2525 unix_peek_fds(&scm, skb);
1da177e4 2526 }
9f6f9af7 2527 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
1da177e4 2528
7cc05662 2529 scm_recv(sock, msg, &scm, flags);
1da177e4
LT
2530
2531out_free:
6eba6a37 2532 skb_free_datagram(sk, skb);
6e1ce3c3 2533 mutex_unlock(&u->iolock);
1da177e4
LT
2534out:
2535 return err;
2536}
29df44fa 2537
9825d866
CW
2538static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
2539 int flags)
2540{
2541 struct sock *sk = sock->sk;
2542
2543#ifdef CONFIG_BPF_SYSCALL
94531cfc
JW
2544 const struct proto *prot = READ_ONCE(sk->sk_prot);
2545
2546 if (prot != &unix_dgram_proto)
ec095263 2547 return prot->recvmsg(sk, msg, size, flags, NULL);
9825d866
CW
2548#endif
2549 return __unix_dgram_recvmsg(sk, msg, size, flags);
2550}
2551
965b57b4 2552static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
29df44fa 2553{
d6e3b27c
PY
2554 struct unix_sock *u = unix_sk(sk);
2555 struct sk_buff *skb;
2556 int err, copied;
29df44fa 2557
d6e3b27c
PY
2558 mutex_lock(&u->iolock);
2559 skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
2560 mutex_unlock(&u->iolock);
2561 if (!skb)
2562 return err;
29df44fa 2563
d6e3b27c
PY
2564 copied = recv_actor(sk, skb);
2565 kfree_skb(skb);
29df44fa
CW
2566
2567 return copied;
2568}
1da177e4
LT
2569
2570/*
79f632c7 2571 * Sleep until more data has arrived. But check for races..
1da177e4 2572 */
79f632c7 2573static long unix_stream_data_wait(struct sock *sk, long timeo,
06a77b07
WC
2574 struct sk_buff *last, unsigned int last_len,
2575 bool freezable)
1da177e4 2576{
f5d39b02 2577 unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE;
2b514574 2578 struct sk_buff *tail;
1da177e4
LT
2579 DEFINE_WAIT(wait);
2580
1c92b4e5 2581 unix_state_lock(sk);
1da177e4
LT
2582
2583 for (;;) {
f5d39b02 2584 prepare_to_wait(sk_sleep(sk), &wait, state);
1da177e4 2585
2b514574
HFS
2586 tail = skb_peek_tail(&sk->sk_receive_queue);
2587 if (tail != last ||
2588 (tail && tail->len != last_len) ||
1da177e4
LT
2589 sk->sk_err ||
2590 (sk->sk_shutdown & RCV_SHUTDOWN) ||
2591 signal_pending(current) ||
2592 !timeo)
2593 break;
2594
9cd3e072 2595 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
1c92b4e5 2596 unix_state_unlock(sk);
f5d39b02 2597 timeo = schedule_timeout(timeo);
1c92b4e5 2598 unix_state_lock(sk);
b48732e4
MS
2599
2600 if (sock_flag(sk, SOCK_DEAD))
2601 break;
2602
9cd3e072 2603 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
1da177e4
LT
2604 }
2605
aa395145 2606 finish_wait(sk_sleep(sk), &wait);
1c92b4e5 2607 unix_state_unlock(sk);
1da177e4
LT
2608 return timeo;
2609}
2610
e370a723
ED
2611static unsigned int unix_skb_len(const struct sk_buff *skb)
2612{
2613 return skb->len - UNIXCB(skb).consumed;
2614}
2615
2b514574
HFS
2616struct unix_stream_read_state {
2617 int (*recv_actor)(struct sk_buff *, int, int,
2618 struct unix_stream_read_state *);
2619 struct socket *socket;
2620 struct msghdr *msg;
2621 struct pipe_inode_info *pipe;
2622 size_t size;
2623 int flags;
2624 unsigned int splice_flags;
2625};
2626
314001f0
RS
2627#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2628static int unix_stream_recv_urg(struct unix_stream_read_state *state)
2629{
2630 struct socket *sock = state->socket;
2631 struct sock *sk = sock->sk;
2632 struct unix_sock *u = unix_sk(sk);
2633 int chunk = 1;
876c14ad 2634 struct sk_buff *oob_skb;
314001f0 2635
876c14ad
RS
2636 mutex_lock(&u->iolock);
2637 unix_state_lock(sk);
2638
2639 if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) {
2640 unix_state_unlock(sk);
2641 mutex_unlock(&u->iolock);
314001f0 2642 return -EINVAL;
876c14ad 2643 }
314001f0 2644
876c14ad 2645 oob_skb = u->oob_skb;
314001f0 2646
e82025c6
KI
2647 if (!(state->flags & MSG_PEEK))
2648 WRITE_ONCE(u->oob_skb, NULL);
876c14ad
RS
2649
2650 unix_state_unlock(sk);
2651
2652 chunk = state->recv_actor(oob_skb, 0, chunk, state);
2653
2654 if (!(state->flags & MSG_PEEK)) {
2655 UNIXCB(oob_skb).consumed += 1;
2656 kfree_skb(oob_skb);
2657 }
2658
2659 mutex_unlock(&u->iolock);
2660
2661 if (chunk < 0)
2662 return -EFAULT;
2663
314001f0
RS
2664 state->msg->msg_flags |= MSG_OOB;
2665 return 1;
2666}
2667
2668static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
2669 int flags, int copied)
2670{
2671 struct unix_sock *u = unix_sk(sk);
2672
2673 if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) {
2674 skb_unlink(skb, &sk->sk_receive_queue);
2675 consume_skb(skb);
2676 skb = NULL;
2677 } else {
2678 if (skb == u->oob_skb) {
2679 if (copied) {
2680 skb = NULL;
2681 } else if (sock_flag(sk, SOCK_URGINLINE)) {
2682 if (!(flags & MSG_PEEK)) {
e82025c6 2683 WRITE_ONCE(u->oob_skb, NULL);
314001f0
RS
2684 consume_skb(skb);
2685 }
2686 } else if (!(flags & MSG_PEEK)) {
2687 skb_unlink(skb, &sk->sk_receive_queue);
2688 consume_skb(skb);
2689 skb = skb_peek(&sk->sk_receive_queue);
2690 }
2691 }
2692 }
2693 return skb;
2694}
2695#endif
2696
965b57b4 2697static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
77462de1
JW
2698{
2699 if (unlikely(sk->sk_state != TCP_ESTABLISHED))
2700 return -ENOTCONN;
2701
965b57b4 2702 return unix_read_skb(sk, recv_actor);
77462de1
JW
2703}
2704
06a77b07
WC
2705static int unix_stream_read_generic(struct unix_stream_read_state *state,
2706 bool freezable)
1da177e4 2707{
7cc05662 2708 struct scm_cookie scm;
2b514574 2709 struct socket *sock = state->socket;
1da177e4
LT
2710 struct sock *sk = sock->sk;
2711 struct unix_sock *u = unix_sk(sk);
1da177e4 2712 int copied = 0;
2b514574 2713 int flags = state->flags;
de144391 2714 int noblock = flags & MSG_DONTWAIT;
2b514574 2715 bool check_creds = false;
1da177e4
LT
2716 int target;
2717 int err = 0;
2718 long timeo;
fc0d7536 2719 int skip;
2b514574
HFS
2720 size_t size = state->size;
2721 unsigned int last_len;
1da177e4 2722
1b92ee3d
RW
2723 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2724 err = -EINVAL;
1da177e4 2725 goto out;
1b92ee3d 2726 }
1da177e4 2727
1b92ee3d
RW
2728 if (unlikely(flags & MSG_OOB)) {
2729 err = -EOPNOTSUPP;
314001f0 2730#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
314001f0 2731 err = unix_stream_recv_urg(state);
314001f0 2732#endif
1da177e4 2733 goto out;
1b92ee3d 2734 }
1da177e4 2735
2b514574 2736 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
de144391 2737 timeo = sock_rcvtimeo(sk, noblock);
1da177e4 2738
2b514574
HFS
2739 memset(&scm, 0, sizeof(scm));
2740
1da177e4
LT
2741 /* Lock the socket to prevent queue disordering
2742 * while sleeps in memcpy_tomsg
2743 */
6e1ce3c3 2744 mutex_lock(&u->iolock);
1da177e4 2745
a0917e0b 2746 skip = max(sk_peek_offset(sk, flags), 0);
e9193d60 2747
6eba6a37 2748 do {
1da177e4 2749 int chunk;
73ed5d25 2750 bool drop_skb;
79f632c7 2751 struct sk_buff *skb, *last;
1da177e4 2752
18eceb81 2753redo:
3c0d2f37 2754 unix_state_lock(sk);
b48732e4
MS
2755 if (sock_flag(sk, SOCK_DEAD)) {
2756 err = -ECONNRESET;
2757 goto unlock;
2758 }
79f632c7 2759 last = skb = skb_peek(&sk->sk_receive_queue);
2b514574 2760 last_len = last ? last->len : 0;
314001f0
RS
2761
2762#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2763 if (skb) {
2764 skb = manage_oob(skb, sk, flags, copied);
2765 if (!skb) {
2766 unix_state_unlock(sk);
2767 if (copied)
2768 break;
2769 goto redo;
2770 }
2771 }
2772#endif
fc0d7536 2773again:
6eba6a37 2774 if (skb == NULL) {
1da177e4 2775 if (copied >= target)
3c0d2f37 2776 goto unlock;
1da177e4
LT
2777
2778 /*
2779 * POSIX 1003.1g mandates this order.
2780 */
ac7bfa62 2781
6eba6a37
ED
2782 err = sock_error(sk);
2783 if (err)
3c0d2f37 2784 goto unlock;
1da177e4 2785 if (sk->sk_shutdown & RCV_SHUTDOWN)
3c0d2f37
MS
2786 goto unlock;
2787
2788 unix_state_unlock(sk);
1b92ee3d
RW
2789 if (!timeo) {
2790 err = -EAGAIN;
1da177e4 2791 break;
1b92ee3d
RW
2792 }
2793
6e1ce3c3 2794 mutex_unlock(&u->iolock);
1da177e4 2795
2b514574 2796 timeo = unix_stream_data_wait(sk, timeo, last,
06a77b07 2797 last_len, freezable);
1da177e4 2798
3822b5c2 2799 if (signal_pending(current)) {
1da177e4 2800 err = sock_intr_errno(timeo);
fa0dc04d 2801 scm_destroy(&scm);
1da177e4
LT
2802 goto out;
2803 }
b3ca9b02 2804
6e1ce3c3 2805 mutex_lock(&u->iolock);
18eceb81 2806 goto redo;
2b514574 2807unlock:
3c0d2f37
MS
2808 unix_state_unlock(sk);
2809 break;
1da177e4 2810 }
fc0d7536 2811
e370a723
ED
2812 while (skip >= unix_skb_len(skb)) {
2813 skip -= unix_skb_len(skb);
79f632c7 2814 last = skb;
2b514574 2815 last_len = skb->len;
fc0d7536 2816 skb = skb_peek_next(skb, &sk->sk_receive_queue);
79f632c7
BP
2817 if (!skb)
2818 goto again;
fc0d7536
PE
2819 }
2820
3c0d2f37 2821 unix_state_unlock(sk);
1da177e4
LT
2822
2823 if (check_creds) {
2824 /* Never glue messages from different writers */
9490f886 2825 if (!unix_skb_scm_eq(skb, &scm))
1da177e4 2826 break;
0e82e7f6 2827 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
1da177e4 2828 /* Copy credentials */
7cc05662 2829 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
37a9a8df 2830 unix_set_secdata(&scm, skb);
2b514574 2831 check_creds = true;
1da177e4
LT
2832 }
2833
2834 /* Copy address just once */
2b514574
HFS
2835 if (state->msg && state->msg->msg_name) {
2836 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2837 state->msg->msg_name);
2838 unix_copy_addr(state->msg, skb->sk);
1da177e4
LT
2839 sunaddr = NULL;
2840 }
2841
e370a723 2842 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
73ed5d25 2843 skb_get(skb);
2b514574 2844 chunk = state->recv_actor(skb, skip, chunk, state);
73ed5d25
HFS
2845 drop_skb = !unix_skb_len(skb);
2846 /* skb is only safe to use if !drop_skb */
2847 consume_skb(skb);
2b514574 2848 if (chunk < 0) {
1da177e4
LT
2849 if (copied == 0)
2850 copied = -EFAULT;
2851 break;
2852 }
2853 copied += chunk;
2854 size -= chunk;
2855
73ed5d25
HFS
2856 if (drop_skb) {
2857 /* the skb was touched by a concurrent reader;
2858 * we should not expect anything from this skb
2859 * anymore and assume it invalid - we can be
2860 * sure it was dropped from the socket queue
2861 *
2862 * let's report a short read
2863 */
2864 err = 0;
2865 break;
2866 }
2867
1da177e4 2868 /* Mark read part of skb as used */
6eba6a37 2869 if (!(flags & MSG_PEEK)) {
e370a723 2870 UNIXCB(skb).consumed += chunk;
1da177e4 2871
fc0d7536
PE
2872 sk_peek_offset_bwd(sk, chunk);
2873
3c32da19 2874 if (UNIXCB(skb).fp) {
3c32da19 2875 scm_stat_del(sk, skb);
7cc05662 2876 unix_detach_fds(&scm, skb);
3c32da19 2877 }
1da177e4 2878
e370a723 2879 if (unix_skb_len(skb))
1da177e4 2880 break;
1da177e4 2881
6f01fd6e 2882 skb_unlink(skb, &sk->sk_receive_queue);
70d4bf6d 2883 consume_skb(skb);
1da177e4 2884
7cc05662 2885 if (scm.fp)
1da177e4 2886 break;
6eba6a37 2887 } else {
1da177e4
LT
2888 /* It is questionable, see note in unix_dgram_recvmsg.
2889 */
2890 if (UNIXCB(skb).fp)
cbcf0112 2891 unix_peek_fds(&scm, skb);
1da177e4 2892
e9193d60 2893 sk_peek_offset_fwd(sk, chunk);
fc0d7536 2894
9f389e35
AC
2895 if (UNIXCB(skb).fp)
2896 break;
2897
e9193d60 2898 skip = 0;
9f389e35
AC
2899 last = skb;
2900 last_len = skb->len;
2901 unix_state_lock(sk);
2902 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2903 if (skb)
2904 goto again;
2905 unix_state_unlock(sk);
1da177e4
LT
2906 break;
2907 }
2908 } while (size);
2909
6e1ce3c3 2910 mutex_unlock(&u->iolock);
2b514574
HFS
2911 if (state->msg)
2912 scm_recv(sock, state->msg, &scm, flags);
2913 else
2914 scm_destroy(&scm);
1da177e4
LT
2915out:
2916 return copied ? : err;
2917}
2918
2b514574
HFS
2919static int unix_stream_read_actor(struct sk_buff *skb,
2920 int skip, int chunk,
2921 struct unix_stream_read_state *state)
2922{
2923 int ret;
2924
2925 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2926 state->msg, chunk);
2927 return ret ?: chunk;
2928}
2929
94531cfc
JW
2930int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg,
2931 size_t size, int flags)
2932{
2933 struct unix_stream_read_state state = {
2934 .recv_actor = unix_stream_read_actor,
2935 .socket = sk->sk_socket,
2936 .msg = msg,
2937 .size = size,
2938 .flags = flags
2939 };
2940
2941 return unix_stream_read_generic(&state, true);
2942}
2943
2b514574
HFS
2944static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2945 size_t size, int flags)
2946{
2947 struct unix_stream_read_state state = {
2948 .recv_actor = unix_stream_read_actor,
2949 .socket = sock,
2950 .msg = msg,
2951 .size = size,
2952 .flags = flags
2953 };
2954
94531cfc
JW
2955#ifdef CONFIG_BPF_SYSCALL
2956 struct sock *sk = sock->sk;
2957 const struct proto *prot = READ_ONCE(sk->sk_prot);
2958
2959 if (prot != &unix_stream_proto)
ec095263 2960 return prot->recvmsg(sk, msg, size, flags, NULL);
94531cfc 2961#endif
06a77b07 2962 return unix_stream_read_generic(&state, true);
2b514574
HFS
2963}
2964
2b514574
HFS
2965static int unix_stream_splice_actor(struct sk_buff *skb,
2966 int skip, int chunk,
2967 struct unix_stream_read_state *state)
2968{
2969 return skb_splice_bits(skb, state->socket->sk,
2970 UNIXCB(skb).consumed + skip,
25869262 2971 state->pipe, chunk, state->splice_flags);
2b514574
HFS
2972}
2973
2974static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
2975 struct pipe_inode_info *pipe,
2976 size_t size, unsigned int flags)
2977{
2978 struct unix_stream_read_state state = {
2979 .recv_actor = unix_stream_splice_actor,
2980 .socket = sock,
2981 .pipe = pipe,
2982 .size = size,
2983 .splice_flags = flags,
2984 };
2985
2986 if (unlikely(*ppos))
2987 return -ESPIPE;
2988
2989 if (sock->file->f_flags & O_NONBLOCK ||
2990 flags & SPLICE_F_NONBLOCK)
2991 state.flags = MSG_DONTWAIT;
2992
06a77b07 2993 return unix_stream_read_generic(&state, false);
2b514574
HFS
2994}
2995
1da177e4
LT
2996static int unix_shutdown(struct socket *sock, int mode)
2997{
2998 struct sock *sk = sock->sk;
2999 struct sock *other;
3000
fc61b928
XW
3001 if (mode < SHUT_RD || mode > SHUT_RDWR)
3002 return -EINVAL;
3003 /* This maps:
3004 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
3005 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
3006 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
3007 */
3008 ++mode;
7180a031
AC
3009
3010 unix_state_lock(sk);
e1d09c2c 3011 WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode);
7180a031
AC
3012 other = unix_peer(sk);
3013 if (other)
3014 sock_hold(other);
3015 unix_state_unlock(sk);
3016 sk->sk_state_change(sk);
3017
3018 if (other &&
3019 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
3020
3021 int peer_mode = 0;
94531cfc 3022 const struct proto *prot = READ_ONCE(other->sk_prot);
7180a031 3023
d359902d
JW
3024 if (prot->unhash)
3025 prot->unhash(other);
7180a031
AC
3026 if (mode&RCV_SHUTDOWN)
3027 peer_mode |= SEND_SHUTDOWN;
3028 if (mode&SEND_SHUTDOWN)
3029 peer_mode |= RCV_SHUTDOWN;
3030 unix_state_lock(other);
e1d09c2c 3031 WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode);
7180a031
AC
3032 unix_state_unlock(other);
3033 other->sk_state_change(other);
d0c6416b 3034 if (peer_mode == SHUTDOWN_MASK)
7180a031 3035 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
d0c6416b 3036 else if (peer_mode & RCV_SHUTDOWN)
7180a031 3037 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1da177e4 3038 }
7180a031
AC
3039 if (other)
3040 sock_put(other);
3041
1da177e4
LT
3042 return 0;
3043}
3044
885ee74d
PE
3045long unix_inq_len(struct sock *sk)
3046{
3047 struct sk_buff *skb;
3048 long amount = 0;
3049
3050 if (sk->sk_state == TCP_LISTEN)
3051 return -EINVAL;
3052
3053 spin_lock(&sk->sk_receive_queue.lock);
3054 if (sk->sk_type == SOCK_STREAM ||
3055 sk->sk_type == SOCK_SEQPACKET) {
3056 skb_queue_walk(&sk->sk_receive_queue, skb)
e370a723 3057 amount += unix_skb_len(skb);
885ee74d
PE
3058 } else {
3059 skb = skb_peek(&sk->sk_receive_queue);
3060 if (skb)
3061 amount = skb->len;
3062 }
3063 spin_unlock(&sk->sk_receive_queue.lock);
3064
3065 return amount;
3066}
3067EXPORT_SYMBOL_GPL(unix_inq_len);
3068
3069long unix_outq_len(struct sock *sk)
3070{
3071 return sk_wmem_alloc_get(sk);
3072}
3073EXPORT_SYMBOL_GPL(unix_outq_len);
3074
ba94f308
AV
3075static int unix_open_file(struct sock *sk)
3076{
3077 struct path path;
3078 struct file *f;
3079 int fd;
3080
3081 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
3082 return -EPERM;
3083
ae3b5641
AV
3084 if (!smp_load_acquire(&unix_sk(sk)->addr))
3085 return -ENOENT;
3086
ba94f308 3087 path = unix_sk(sk)->path;
ae3b5641 3088 if (!path.dentry)
ba94f308 3089 return -ENOENT;
ba94f308
AV
3090
3091 path_get(&path);
ba94f308
AV
3092
3093 fd = get_unused_fd_flags(O_CLOEXEC);
3094 if (fd < 0)
3095 goto out;
3096
3097 f = dentry_open(&path, O_PATH, current_cred());
3098 if (IS_ERR(f)) {
3099 put_unused_fd(fd);
3100 fd = PTR_ERR(f);
3101 goto out;
3102 }
3103
3104 fd_install(fd, f);
3105out:
3106 path_put(&path);
3107
3108 return fd;
3109}
3110
1da177e4
LT
3111static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3112{
3113 struct sock *sk = sock->sk;
e27dfcea 3114 long amount = 0;
1da177e4
LT
3115 int err;
3116
6eba6a37
ED
3117 switch (cmd) {
3118 case SIOCOUTQ:
885ee74d 3119 amount = unix_outq_len(sk);
6eba6a37
ED
3120 err = put_user(amount, (int __user *)arg);
3121 break;
3122 case SIOCINQ:
885ee74d
PE
3123 amount = unix_inq_len(sk);
3124 if (amount < 0)
3125 err = amount;
3126 else
1da177e4 3127 err = put_user(amount, (int __user *)arg);
885ee74d 3128 break;
ba94f308
AV
3129 case SIOCUNIXFILE:
3130 err = unix_open_file(sk);
3131 break;
314001f0
RS
3132#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3133 case SIOCATMARK:
3134 {
3135 struct sk_buff *skb;
314001f0
RS
3136 int answ = 0;
3137
3138 skb = skb_peek(&sk->sk_receive_queue);
e82025c6 3139 if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb))
314001f0
RS
3140 answ = 1;
3141 err = put_user(answ, (int __user *)arg);
3142 }
3143 break;
3144#endif
6eba6a37
ED
3145 default:
3146 err = -ENOIOCTLCMD;
3147 break;
1da177e4
LT
3148 }
3149 return err;
3150}
3151
5f6beb9e
AB
3152#ifdef CONFIG_COMPAT
3153static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3154{
3155 return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
3156}
3157#endif
3158
a11e1d43 3159static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
1da177e4
LT
3160{
3161 struct sock *sk = sock->sk;
a11e1d43 3162 __poll_t mask;
e1d09c2c 3163 u8 shutdown;
a11e1d43 3164
89ab066d 3165 sock_poll_wait(file, sock, wait);
a11e1d43 3166 mask = 0;
e1d09c2c 3167 shutdown = READ_ONCE(sk->sk_shutdown);
1da177e4
LT
3168
3169 /* exceptional events? */
cc04410a 3170 if (READ_ONCE(sk->sk_err))
a9a08845 3171 mask |= EPOLLERR;
e1d09c2c 3172 if (shutdown == SHUTDOWN_MASK)
a9a08845 3173 mask |= EPOLLHUP;
e1d09c2c 3174 if (shutdown & RCV_SHUTDOWN)
a9a08845 3175 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
1da177e4
LT
3176
3177 /* readable? */
3ef7cf57 3178 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
a9a08845 3179 mask |= EPOLLIN | EPOLLRDNORM;
af493388
CW
3180 if (sk_is_readable(sk))
3181 mask |= EPOLLIN | EPOLLRDNORM;
d9a232d4
KI
3182#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3183 if (READ_ONCE(unix_sk(sk)->oob_skb))
3184 mask |= EPOLLPRI;
3185#endif
1da177e4
LT
3186
3187 /* Connection-based need to check for termination and startup */
6eba6a37
ED
3188 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
3189 sk->sk_state == TCP_CLOSE)
a9a08845 3190 mask |= EPOLLHUP;
1da177e4
LT
3191
3192 /*
3193 * we set writable also when the other side has shut down the
3194 * connection. This prevents stuck sockets.
3195 */
3196 if (unix_writable(sk))
a9a08845 3197 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
1da177e4
LT
3198
3199 return mask;
3200}
3201
a11e1d43
LT
3202static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
3203 poll_table *wait)
3c73419c 3204{
ec0d215f 3205 struct sock *sk = sock->sk, *other;
a11e1d43
LT
3206 unsigned int writable;
3207 __poll_t mask;
e1d09c2c 3208 u8 shutdown;
a11e1d43 3209
89ab066d 3210 sock_poll_wait(file, sock, wait);
a11e1d43 3211 mask = 0;
e1d09c2c 3212 shutdown = READ_ONCE(sk->sk_shutdown);
3c73419c
RW
3213
3214 /* exceptional events? */
cc04410a
ED
3215 if (READ_ONCE(sk->sk_err) ||
3216 !skb_queue_empty_lockless(&sk->sk_error_queue))
a9a08845
LT
3217 mask |= EPOLLERR |
3218 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
7d4c04fc 3219
e1d09c2c 3220 if (shutdown & RCV_SHUTDOWN)
a9a08845 3221 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
e1d09c2c 3222 if (shutdown == SHUTDOWN_MASK)
a9a08845 3223 mask |= EPOLLHUP;
3c73419c
RW
3224
3225 /* readable? */
3ef7cf57 3226 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
a9a08845 3227 mask |= EPOLLIN | EPOLLRDNORM;
af493388
CW
3228 if (sk_is_readable(sk))
3229 mask |= EPOLLIN | EPOLLRDNORM;
3c73419c
RW
3230
3231 /* Connection-based need to check for termination and startup */
3232 if (sk->sk_type == SOCK_SEQPACKET) {
3233 if (sk->sk_state == TCP_CLOSE)
a9a08845 3234 mask |= EPOLLHUP;
3c73419c
RW
3235 /* connection hasn't started yet? */
3236 if (sk->sk_state == TCP_SYN_SENT)
3237 return mask;
3238 }
3239
973a34aa 3240 /* No write status requested, avoid expensive OUT tests. */
a11e1d43 3241 if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
973a34aa
ED
3242 return mask;
3243
ec0d215f 3244 writable = unix_writable(sk);
7d267278
RW
3245 if (writable) {
3246 unix_state_lock(sk);
3247
3248 other = unix_peer(sk);
3249 if (other && unix_peer(other) != sk &&
04f08eb4 3250 unix_recvq_full_lockless(other) &&
7d267278
RW
3251 unix_dgram_peer_wake_me(sk, other))
3252 writable = 0;
3253
3254 unix_state_unlock(sk);
ec0d215f
RW
3255 }
3256
3257 if (writable)
a9a08845 3258 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3c73419c 3259 else
9cd3e072 3260 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
3c73419c 3261
3c73419c
RW
3262 return mask;
3263}
1da177e4
LT
3264
3265#ifdef CONFIG_PROC_FS
a53eb3fe 3266
7123aaa3
ED
3267#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
3268
3269#define get_bucket(x) ((x) >> BUCKET_SPACE)
afd20b92 3270#define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1))
7123aaa3 3271#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
a53eb3fe 3272
7123aaa3 3273static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
1da177e4 3274{
7123aaa3
ED
3275 unsigned long offset = get_offset(*pos);
3276 unsigned long bucket = get_bucket(*pos);
7123aaa3 3277 unsigned long count = 0;
cf2f225e 3278 struct sock *sk;
1da177e4 3279
cf2f225e
KI
3280 for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]);
3281 sk; sk = sk_next(sk)) {
7123aaa3
ED
3282 if (++count == offset)
3283 break;
3284 }
3285
3286 return sk;
3287}
3288
4408d55a 3289static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos)
7123aaa3 3290{
afd20b92 3291 unsigned long bucket = get_bucket(*pos);
79b05bea 3292 struct net *net = seq_file_net(seq);
4408d55a 3293 struct sock *sk;
7123aaa3 3294
f302d180 3295 while (bucket < UNIX_HASH_SIZE) {
79b05bea 3296 spin_lock(&net->unx.table.locks[bucket]);
4408d55a 3297
7123aaa3
ED
3298 sk = unix_from_bucket(seq, pos);
3299 if (sk)
3300 return sk;
3301
79b05bea 3302 spin_unlock(&net->unx.table.locks[bucket]);
4408d55a
KI
3303
3304 *pos = set_bucket_offset(++bucket, 1);
3305 }
7123aaa3 3306
1da177e4
LT
3307 return NULL;
3308}
3309
4408d55a
KI
3310static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk,
3311 loff_t *pos)
3312{
3313 unsigned long bucket = get_bucket(*pos);
3314
cf2f225e
KI
3315 sk = sk_next(sk);
3316 if (sk)
3317 return sk;
3318
4408d55a 3319
cf2f225e 3320 spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]);
4408d55a
KI
3321
3322 *pos = set_bucket_offset(++bucket, 1);
3323
3324 return unix_get_first(seq, pos);
3325}
3326
1da177e4
LT
3327static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
3328{
7123aaa3
ED
3329 if (!*pos)
3330 return SEQ_START_TOKEN;
3331
4408d55a 3332 return unix_get_first(seq, pos);
1da177e4
LT
3333}
3334
3335static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3336{
3337 ++*pos;
4408d55a
KI
3338
3339 if (v == SEQ_START_TOKEN)
3340 return unix_get_first(seq, pos);
3341
3342 return unix_get_next(seq, v, pos);
1da177e4
LT
3343}
3344
3345static void unix_seq_stop(struct seq_file *seq, void *v)
3346{
afd20b92
KI
3347 struct sock *sk = v;
3348
2f7ca90a 3349 if (sk)
79b05bea 3350 spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]);
1da177e4
LT
3351}
3352
3353static int unix_seq_show(struct seq_file *seq, void *v)
3354{
ac7bfa62 3355
b9f3124f 3356 if (v == SEQ_START_TOKEN)
1da177e4
LT
3357 seq_puts(seq, "Num RefCount Protocol Flags Type St "
3358 "Inode Path\n");
3359 else {
3360 struct sock *s = v;
3361 struct unix_sock *u = unix_sk(s);
1c92b4e5 3362 unix_state_lock(s);
1da177e4 3363
71338aa7 3364 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
1da177e4 3365 s,
41c6d650 3366 refcount_read(&s->sk_refcnt),
1da177e4
LT
3367 0,
3368 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
3369 s->sk_type,
3370 s->sk_socket ?
3371 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
3372 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
3373 sock_i_ino(s));
3374
2f7ca90a 3375 if (u->addr) { // under a hash table lock here
1da177e4
LT
3376 int i, len;
3377 seq_putc(seq, ' ');
3378
3379 i = 0;
755662ce
KI
3380 len = u->addr->len -
3381 offsetof(struct sockaddr_un, sun_path);
5ce7ab49 3382 if (u->addr->name->sun_path[0]) {
1da177e4 3383 len--;
5ce7ab49 3384 } else {
1da177e4
LT
3385 seq_putc(seq, '@');
3386 i++;
3387 }
3388 for ( ; i < len; i++)
e7947ea7
IB
3389 seq_putc(seq, u->addr->name->sun_path[i] ?:
3390 '@');
1da177e4 3391 }
1c92b4e5 3392 unix_state_unlock(s);
1da177e4
LT
3393 seq_putc(seq, '\n');
3394 }
3395
3396 return 0;
3397}
3398
56b3d975 3399static const struct seq_operations unix_seq_ops = {
1da177e4
LT
3400 .start = unix_seq_start,
3401 .next = unix_seq_next,
3402 .stop = unix_seq_stop,
3403 .show = unix_seq_show,
3404};
2c860a43
KI
3405
3406#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL)
855d8e77
KI
3407struct bpf_unix_iter_state {
3408 struct seq_net_private p;
3409 unsigned int cur_sk;
3410 unsigned int end_sk;
3411 unsigned int max_sk;
3412 struct sock **batch;
3413 bool st_bucket_done;
3414};
3415
2c860a43
KI
3416struct bpf_iter__unix {
3417 __bpf_md_ptr(struct bpf_iter_meta *, meta);
3418 __bpf_md_ptr(struct unix_sock *, unix_sk);
3419 uid_t uid __aligned(8);
3420};
3421
3422static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
3423 struct unix_sock *unix_sk, uid_t uid)
3424{
3425 struct bpf_iter__unix ctx;
3426
3427 meta->seq_num--; /* skip SEQ_START_TOKEN */
3428 ctx.meta = meta;
3429 ctx.unix_sk = unix_sk;
3430 ctx.uid = uid;
3431 return bpf_iter_run_prog(prog, &ctx);
3432}
3433
855d8e77
KI
3434static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk)
3435
3436{
3437 struct bpf_unix_iter_state *iter = seq->private;
3438 unsigned int expected = 1;
3439 struct sock *sk;
3440
3441 sock_hold(start_sk);
3442 iter->batch[iter->end_sk++] = start_sk;
3443
3444 for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) {
855d8e77
KI
3445 if (iter->end_sk < iter->max_sk) {
3446 sock_hold(sk);
3447 iter->batch[iter->end_sk++] = sk;
3448 }
3449
3450 expected++;
3451 }
3452
cf2f225e 3453 spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]);
855d8e77
KI
3454
3455 return expected;
3456}
3457
3458static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter)
3459{
3460 while (iter->cur_sk < iter->end_sk)
3461 sock_put(iter->batch[iter->cur_sk++]);
3462}
3463
3464static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter,
3465 unsigned int new_batch_sz)
3466{
3467 struct sock **new_batch;
3468
3469 new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
3470 GFP_USER | __GFP_NOWARN);
3471 if (!new_batch)
3472 return -ENOMEM;
3473
3474 bpf_iter_unix_put_batch(iter);
3475 kvfree(iter->batch);
3476 iter->batch = new_batch;
3477 iter->max_sk = new_batch_sz;
3478
3479 return 0;
3480}
3481
3482static struct sock *bpf_iter_unix_batch(struct seq_file *seq,
3483 loff_t *pos)
3484{
3485 struct bpf_unix_iter_state *iter = seq->private;
3486 unsigned int expected;
3487 bool resized = false;
3488 struct sock *sk;
3489
3490 if (iter->st_bucket_done)
3491 *pos = set_bucket_offset(get_bucket(*pos) + 1, 1);
3492
3493again:
3494 /* Get a new batch */
3495 iter->cur_sk = 0;
3496 iter->end_sk = 0;
3497
3498 sk = unix_get_first(seq, pos);
3499 if (!sk)
3500 return NULL; /* Done */
3501
3502 expected = bpf_iter_unix_hold_batch(seq, sk);
3503
3504 if (iter->end_sk == expected) {
3505 iter->st_bucket_done = true;
3506 return sk;
3507 }
3508
3509 if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) {
3510 resized = true;
3511 goto again;
3512 }
3513
3514 return sk;
3515}
3516
3517static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos)
3518{
3519 if (!*pos)
3520 return SEQ_START_TOKEN;
3521
3522 /* bpf iter does not support lseek, so it always
3523 * continue from where it was stop()-ped.
3524 */
3525 return bpf_iter_unix_batch(seq, pos);
3526}
3527
3528static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3529{
3530 struct bpf_unix_iter_state *iter = seq->private;
3531 struct sock *sk;
3532
3533 /* Whenever seq_next() is called, the iter->cur_sk is
3534 * done with seq_show(), so advance to the next sk in
3535 * the batch.
3536 */
3537 if (iter->cur_sk < iter->end_sk)
3538 sock_put(iter->batch[iter->cur_sk++]);
3539
3540 ++*pos;
3541
3542 if (iter->cur_sk < iter->end_sk)
3543 sk = iter->batch[iter->cur_sk];
3544 else
3545 sk = bpf_iter_unix_batch(seq, pos);
3546
3547 return sk;
3548}
3549
2c860a43
KI
3550static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
3551{
3552 struct bpf_iter_meta meta;
3553 struct bpf_prog *prog;
3554 struct sock *sk = v;
3555 uid_t uid;
855d8e77
KI
3556 bool slow;
3557 int ret;
2c860a43
KI
3558
3559 if (v == SEQ_START_TOKEN)
3560 return 0;
3561
855d8e77
KI
3562 slow = lock_sock_fast(sk);
3563
3564 if (unlikely(sk_unhashed(sk))) {
3565 ret = SEQ_SKIP;
3566 goto unlock;
3567 }
3568
2c860a43
KI
3569 uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
3570 meta.seq = seq;
3571 prog = bpf_iter_get_info(&meta, false);
855d8e77
KI
3572 ret = unix_prog_seq_show(prog, &meta, v, uid);
3573unlock:
3574 unlock_sock_fast(sk, slow);
3575 return ret;
2c860a43
KI
3576}
3577
3578static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
3579{
855d8e77 3580 struct bpf_unix_iter_state *iter = seq->private;
2c860a43
KI
3581 struct bpf_iter_meta meta;
3582 struct bpf_prog *prog;
3583
3584 if (!v) {
3585 meta.seq = seq;
3586 prog = bpf_iter_get_info(&meta, true);
3587 if (prog)
3588 (void)unix_prog_seq_show(prog, &meta, v, 0);
3589 }
3590
855d8e77
KI
3591 if (iter->cur_sk < iter->end_sk)
3592 bpf_iter_unix_put_batch(iter);
2c860a43
KI
3593}
3594
3595static const struct seq_operations bpf_iter_unix_seq_ops = {
855d8e77
KI
3596 .start = bpf_iter_unix_seq_start,
3597 .next = bpf_iter_unix_seq_next,
2c860a43
KI
3598 .stop = bpf_iter_unix_seq_stop,
3599 .show = bpf_iter_unix_seq_show,
3600};
3601#endif
1da177e4
LT
3602#endif
3603
ec1b4cf7 3604static const struct net_proto_family unix_family_ops = {
1da177e4
LT
3605 .family = PF_UNIX,
3606 .create = unix_create,
3607 .owner = THIS_MODULE,
3608};
3609
097e66c5 3610
2c8c1e72 3611static int __net_init unix_net_init(struct net *net)
097e66c5 3612{
b6e81138 3613 int i;
097e66c5 3614
a0a53c8b 3615 net->unx.sysctl_max_dgram_qlen = 10;
1597fbc0
PE
3616 if (unix_sysctl_register(net))
3617 goto out;
d392e497 3618
097e66c5 3619#ifdef CONFIG_PROC_FS
c3506372 3620 if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
b6e81138
KI
3621 sizeof(struct seq_net_private)))
3622 goto err_sysctl;
3623#endif
3624
3625 net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE,
3626 sizeof(spinlock_t), GFP_KERNEL);
3627 if (!net->unx.table.locks)
3628 goto err_proc;
3629
3630 net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE,
3631 sizeof(struct hlist_head),
3632 GFP_KERNEL);
3633 if (!net->unx.table.buckets)
3634 goto free_locks;
3635
3636 for (i = 0; i < UNIX_HASH_SIZE; i++) {
3637 spin_lock_init(&net->unx.table.locks[i]);
3638 INIT_HLIST_HEAD(&net->unx.table.buckets[i]);
1597fbc0 3639 }
b6e81138
KI
3640
3641 return 0;
3642
3643free_locks:
3644 kvfree(net->unx.table.locks);
3645err_proc:
3646#ifdef CONFIG_PROC_FS
3647 remove_proc_entry("unix", net->proc_net);
3648err_sysctl:
097e66c5 3649#endif
b6e81138 3650 unix_sysctl_unregister(net);
097e66c5 3651out:
b6e81138 3652 return -ENOMEM;
097e66c5
DL
3653}
3654
2c8c1e72 3655static void __net_exit unix_net_exit(struct net *net)
097e66c5 3656{
b6e81138
KI
3657 kvfree(net->unx.table.buckets);
3658 kvfree(net->unx.table.locks);
1597fbc0 3659 unix_sysctl_unregister(net);
ece31ffd 3660 remove_proc_entry("unix", net->proc_net);
097e66c5
DL
3661}
3662
3663static struct pernet_operations unix_net_ops = {
3664 .init = unix_net_init,
3665 .exit = unix_net_exit,
3666};
3667
2c860a43
KI
3668#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3669DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
3670 struct unix_sock *unix_sk, uid_t uid)
3671
855d8e77
KI
3672#define INIT_BATCH_SZ 16
3673
3674static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux)
3675{
3676 struct bpf_unix_iter_state *iter = priv_data;
3677 int err;
3678
3679 err = bpf_iter_init_seq_net(priv_data, aux);
3680 if (err)
3681 return err;
3682
3683 err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ);
3684 if (err) {
3685 bpf_iter_fini_seq_net(priv_data);
3686 return err;
3687 }
3688
3689 return 0;
3690}
3691
3692static void bpf_iter_fini_unix(void *priv_data)
3693{
3694 struct bpf_unix_iter_state *iter = priv_data;
3695
3696 bpf_iter_fini_seq_net(priv_data);
3697 kvfree(iter->batch);
3698}
3699
2c860a43
KI
3700static const struct bpf_iter_seq_info unix_seq_info = {
3701 .seq_ops = &bpf_iter_unix_seq_ops,
855d8e77
KI
3702 .init_seq_private = bpf_iter_init_unix,
3703 .fini_seq_private = bpf_iter_fini_unix,
3704 .seq_priv_size = sizeof(struct bpf_unix_iter_state),
2c860a43
KI
3705};
3706
eb7d8f1d
KI
3707static const struct bpf_func_proto *
3708bpf_iter_unix_get_func_proto(enum bpf_func_id func_id,
3709 const struct bpf_prog *prog)
3710{
3711 switch (func_id) {
3712 case BPF_FUNC_setsockopt:
3713 return &bpf_sk_setsockopt_proto;
3714 case BPF_FUNC_getsockopt:
3715 return &bpf_sk_getsockopt_proto;
3716 default:
3717 return NULL;
3718 }
3719}
3720
2c860a43
KI
3721static struct bpf_iter_reg unix_reg_info = {
3722 .target = "unix",
3723 .ctx_arg_info_size = 1,
3724 .ctx_arg_info = {
3725 { offsetof(struct bpf_iter__unix, unix_sk),
3726 PTR_TO_BTF_ID_OR_NULL },
3727 },
eb7d8f1d 3728 .get_func_proto = bpf_iter_unix_get_func_proto,
2c860a43
KI
3729 .seq_info = &unix_seq_info,
3730};
3731
3732static void __init bpf_iter_register(void)
3733{
3734 unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX];
3735 if (bpf_iter_reg_target(&unix_reg_info))
3736 pr_warn("Warning: could not register bpf iterator unix\n");
3737}
3738#endif
3739
1da177e4
LT
3740static int __init af_unix_init(void)
3741{
51bae889 3742 int i, rc = -1;
1da177e4 3743
c593642c 3744 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
1da177e4 3745
51bae889
KI
3746 for (i = 0; i < UNIX_HASH_SIZE / 2; i++) {
3747 spin_lock_init(&bsd_socket_locks[i]);
3748 INIT_HLIST_HEAD(&bsd_socket_buckets[i]);
3749 }
3750
94531cfc
JW
3751 rc = proto_register(&unix_dgram_proto, 1);
3752 if (rc != 0) {
3753 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3754 goto out;
3755 }
3756
3757 rc = proto_register(&unix_stream_proto, 1);
ac7bfa62 3758 if (rc != 0) {
5cc208be 3759 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
73e341e0 3760 proto_unregister(&unix_dgram_proto);
1da177e4
LT
3761 goto out;
3762 }
3763
3764 sock_register(&unix_family_ops);
097e66c5 3765 register_pernet_subsys(&unix_net_ops);
c6382918 3766 unix_bpf_build_proto();
2c860a43
KI
3767
3768#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3769 bpf_iter_register();
3770#endif
3771
1da177e4
LT
3772out:
3773 return rc;
3774}
3775
3776static void __exit af_unix_exit(void)
3777{
3778 sock_unregister(PF_UNIX);
94531cfc
JW
3779 proto_unregister(&unix_dgram_proto);
3780 proto_unregister(&unix_stream_proto);
097e66c5 3781 unregister_pernet_subsys(&unix_net_ops);
1da177e4
LT
3782}
3783
3d366960
DW
3784/* Earlier than device_initcall() so that other drivers invoking
3785 request_module() don't end up in a loop when modprobe tries
3786 to use a UNIX socket. But later than subsys_initcall() because
3787 we depend on stuff initialised there */
3788fs_initcall(af_unix_init);
1da177e4
LT
3789module_exit(af_unix_exit);
3790
3791MODULE_LICENSE("GPL");
3792MODULE_ALIAS_NETPROTO(PF_UNIX);