af_unix: Remove unix_mkname().
[linux-block.git] / net / unix / af_unix.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * NET4: Implementation of BSD Unix domain sockets.
4 *
113aa838 5 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
1da177e4 6 *
1da177e4
LT
7 * Fixes:
8 * Linus Torvalds : Assorted bug cures.
9 * Niibe Yutaka : async I/O support.
10 * Carsten Paeth : PF_UNIX check, address fixes.
11 * Alan Cox : Limit size of allocated blocks.
12 * Alan Cox : Fixed the stupid socketpair bug.
13 * Alan Cox : BSD compatibility fine tuning.
14 * Alan Cox : Fixed a bug in connect when interrupted.
15 * Alan Cox : Sorted out a proper draft version of
16 * file descriptor passing hacked up from
17 * Mike Shaver's work.
18 * Marty Leisner : Fixes to fd passing
19 * Nick Nevin : recvmsg bugfix.
20 * Alan Cox : Started proper garbage collector
21 * Heiko EiBfeldt : Missing verify_area check
22 * Alan Cox : Started POSIXisms
23 * Andreas Schwab : Replace inode by dentry for proper
24 * reference counting
25 * Kirk Petersen : Made this a module
26 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
27 * Lots of bug fixes.
28 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
29 * by above two patches.
30 * Andrea Arcangeli : If possible we block in connect(2)
31 * if the max backlog of the listen socket
32 * is been reached. This won't break
33 * old apps and it will avoid huge amount
34 * of socks hashed (this for unix_gc()
35 * performances reasons).
36 * Security fix that limits the max
37 * number of socks to 2*max_files and
38 * the number of skb queueable in the
39 * dgram receiver.
40 * Artur Skawina : Hash function optimizations
41 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
42 * Malcolm Beattie : Set peercred for socketpair
43 * Michal Ostrowski : Module initialization cleanup.
44 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
45 * the core infrastructure is doing that
46 * for all net proto families now (2.5.69+)
47 *
1da177e4
LT
48 * Known differences from reference BSD that was tested:
49 *
50 * [TO FIX]
51 * ECONNREFUSED is not returned from one end of a connected() socket to the
52 * other the moment one end closes.
53 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
54 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
55 * [NOT TO FIX]
56 * accept() returns a path name even if the connecting socket has closed
57 * in the meantime (BSD loses the path and gives up).
58 * accept() returns 0 length path for an unbound connector. BSD returns 16
59 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
61 * BSD af_unix apparently has connect forgetting to block properly.
62 * (need to check this with the POSIX spec in detail)
63 *
64 * Differences from 2.0.0-11-... (ANK)
65 * Bug fixes and improvements.
66 * - client shutdown killed server socket.
67 * - removed all useless cli/sti pairs.
68 *
69 * Semantic changes/extensions.
70 * - generic control message passing.
71 * - SCM_CREDENTIALS control message.
72 * - "Abstract" (not FS based) socket bindings.
73 * Abstract names are sequences of bytes (not zero terminated)
74 * started by 0, so that this name space does not intersect
75 * with BSD names.
76 */
77
5cc208be 78#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
79
1da177e4 80#include <linux/module.h>
1da177e4 81#include <linux/kernel.h>
1da177e4 82#include <linux/signal.h>
3f07c014 83#include <linux/sched/signal.h>
1da177e4
LT
84#include <linux/errno.h>
85#include <linux/string.h>
86#include <linux/stat.h>
87#include <linux/dcache.h>
88#include <linux/namei.h>
89#include <linux/socket.h>
90#include <linux/un.h>
91#include <linux/fcntl.h>
92#include <linux/termios.h>
93#include <linux/sockios.h>
94#include <linux/net.h>
95#include <linux/in.h>
96#include <linux/fs.h>
97#include <linux/slab.h>
7c0f6ba6 98#include <linux/uaccess.h>
1da177e4
LT
99#include <linux/skbuff.h>
100#include <linux/netdevice.h>
457c4cbc 101#include <net/net_namespace.h>
1da177e4 102#include <net/sock.h>
c752f073 103#include <net/tcp_states.h>
1da177e4
LT
104#include <net/af_unix.h>
105#include <linux/proc_fs.h>
106#include <linux/seq_file.h>
107#include <net/scm.h>
108#include <linux/init.h>
109#include <linux/poll.h>
1da177e4
LT
110#include <linux/rtnetlink.h>
111#include <linux/mount.h>
112#include <net/checksum.h>
113#include <linux/security.h>
2b15af6f 114#include <linux/freezer.h>
ba94f308 115#include <linux/file.h>
2c860a43 116#include <linux/btf_ids.h>
1da177e4 117
f4e65870
JA
118#include "scm.h"
119
7123aaa3 120struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
fa7ff56f
PE
121EXPORT_SYMBOL_GPL(unix_socket_table);
122DEFINE_SPINLOCK(unix_table_lock);
123EXPORT_SYMBOL_GPL(unix_table_lock);
518de9b3 124static atomic_long_t unix_nr_socks;
1da177e4 125
1da177e4 126
7123aaa3
ED
127static struct hlist_head *unix_sockets_unbound(void *addr)
128{
129 unsigned long hash = (unsigned long)addr;
130
131 hash ^= hash >> 16;
132 hash ^= hash >> 8;
133 hash %= UNIX_HASH_SIZE;
134 return &unix_socket_table[UNIX_HASH_SIZE + hash];
135}
136
137#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
1da177e4 138
877ce7c1 139#ifdef CONFIG_SECURITY_NETWORK
dc49c1f9 140static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
877ce7c1 141{
37a9a8df 142 UNIXCB(skb).secid = scm->secid;
877ce7c1
CZ
143}
144
145static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
146{
37a9a8df
SS
147 scm->secid = UNIXCB(skb).secid;
148}
149
150static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
151{
152 return (scm->secid == UNIXCB(skb).secid);
877ce7c1
CZ
153}
154#else
dc49c1f9 155static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
877ce7c1
CZ
156{ }
157
158static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
159{ }
37a9a8df
SS
160
161static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
162{
163 return true;
164}
877ce7c1
CZ
165#endif /* CONFIG_SECURITY_NETWORK */
166
1da177e4
LT
167/*
168 * SMP locking strategy:
fbe9cc4a 169 * hash table is protected with spinlock unix_table_lock
663717f6 170 * each socket state is protected by separate spin lock.
1da177e4
LT
171 */
172
95c96174 173static inline unsigned int unix_hash_fold(__wsum n)
1da177e4 174{
0a13404d 175 unsigned int hash = (__force unsigned int)csum_fold(n);
95c96174 176
1da177e4
LT
177 hash ^= hash>>8;
178 return hash&(UNIX_HASH_SIZE-1);
179}
180
181#define unix_peer(sk) (unix_sk(sk)->peer)
182
183static inline int unix_our_peer(struct sock *sk, struct sock *osk)
184{
185 return unix_peer(osk) == sk;
186}
187
188static inline int unix_may_send(struct sock *sk, struct sock *osk)
189{
6eba6a37 190 return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
1da177e4
LT
191}
192
86b18aaa 193static inline int unix_recvq_full(const struct sock *sk)
3c73419c
RW
194{
195 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
196}
197
86b18aaa
QC
198static inline int unix_recvq_full_lockless(const struct sock *sk)
199{
200 return skb_queue_len_lockless(&sk->sk_receive_queue) >
201 READ_ONCE(sk->sk_max_ack_backlog);
202}
203
fa7ff56f 204struct sock *unix_peer_get(struct sock *s)
1da177e4
LT
205{
206 struct sock *peer;
207
1c92b4e5 208 unix_state_lock(s);
1da177e4
LT
209 peer = unix_peer(s);
210 if (peer)
211 sock_hold(peer);
1c92b4e5 212 unix_state_unlock(s);
1da177e4
LT
213 return peer;
214}
fa7ff56f 215EXPORT_SYMBOL_GPL(unix_peer_get);
1da177e4
LT
216
217static inline void unix_release_addr(struct unix_address *addr)
218{
8c9814b9 219 if (refcount_dec_and_test(&addr->refcnt))
1da177e4
LT
220 kfree(addr);
221}
222
223/*
224 * Check unix socket name:
225 * - should be not zero length.
226 * - if started by not zero, should be NULL terminated (FS object)
227 * - if started by zero, it is abstract name.
228 */
ac7bfa62 229
b8a58aa6
KI
230static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len)
231{
232 if (addr_len <= offsetof(struct sockaddr_un, sun_path) ||
233 addr_len > sizeof(*sunaddr))
234 return -EINVAL;
235
236 if (sunaddr->sun_family != AF_UNIX)
237 return -EINVAL;
238
239 return 0;
240}
241
d2d8c9fd
KI
242static void unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len)
243{
244 /* This may look like an off by one error but it is a bit more
245 * subtle. 108 is the longest valid AF_UNIX path for a binding.
246 * sun_path[108] doesn't as such exist. However in kernel space
247 * we are guaranteed that it is a valid memory location in our
248 * kernel address buffer because syscall functions always pass
249 * a pointer of struct sockaddr_storage which has a bigger buffer
250 * than 108.
251 */
252 ((char *)sunaddr)[addr_len] = 0;
253}
254
1da177e4
LT
255static void __unix_remove_socket(struct sock *sk)
256{
257 sk_del_node_init(sk);
258}
259
260static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
261{
547b792c 262 WARN_ON(!sk_unhashed(sk));
1da177e4
LT
263 sk_add_node(sk, list);
264}
265
185ab886
AV
266static void __unix_set_addr(struct sock *sk, struct unix_address *addr,
267 unsigned hash)
268{
269 __unix_remove_socket(sk);
270 smp_store_release(&unix_sk(sk)->addr, addr);
271 __unix_insert_socket(&unix_socket_table[hash], sk);
272}
273
1da177e4
LT
274static inline void unix_remove_socket(struct sock *sk)
275{
fbe9cc4a 276 spin_lock(&unix_table_lock);
1da177e4 277 __unix_remove_socket(sk);
fbe9cc4a 278 spin_unlock(&unix_table_lock);
1da177e4
LT
279}
280
281static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
282{
fbe9cc4a 283 spin_lock(&unix_table_lock);
1da177e4 284 __unix_insert_socket(list, sk);
fbe9cc4a 285 spin_unlock(&unix_table_lock);
1da177e4
LT
286}
287
097e66c5
DL
288static struct sock *__unix_find_socket_byname(struct net *net,
289 struct sockaddr_un *sunname,
be752283 290 int len, unsigned int hash)
1da177e4
LT
291{
292 struct sock *s;
1da177e4 293
be752283 294 sk_for_each(s, &unix_socket_table[hash]) {
1da177e4
LT
295 struct unix_sock *u = unix_sk(s);
296
878628fb 297 if (!net_eq(sock_net(s), net))
097e66c5
DL
298 continue;
299
1da177e4
LT
300 if (u->addr->len == len &&
301 !memcmp(u->addr->name, sunname, len))
262ce0af 302 return s;
1da177e4 303 }
262ce0af 304 return NULL;
1da177e4
LT
305}
306
097e66c5
DL
307static inline struct sock *unix_find_socket_byname(struct net *net,
308 struct sockaddr_un *sunname,
be752283 309 int len, unsigned int hash)
1da177e4
LT
310{
311 struct sock *s;
312
fbe9cc4a 313 spin_lock(&unix_table_lock);
be752283 314 s = __unix_find_socket_byname(net, sunname, len, hash);
1da177e4
LT
315 if (s)
316 sock_hold(s);
fbe9cc4a 317 spin_unlock(&unix_table_lock);
1da177e4
LT
318 return s;
319}
320
6616f788 321static struct sock *unix_find_socket_byinode(struct inode *i)
1da177e4
LT
322{
323 struct sock *s;
1da177e4 324
fbe9cc4a 325 spin_lock(&unix_table_lock);
b67bfe0d 326 sk_for_each(s,
1da177e4 327 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
40ffe67d 328 struct dentry *dentry = unix_sk(s)->path.dentry;
1da177e4 329
beef5121 330 if (dentry && d_backing_inode(dentry) == i) {
1da177e4
LT
331 sock_hold(s);
332 goto found;
333 }
334 }
335 s = NULL;
336found:
fbe9cc4a 337 spin_unlock(&unix_table_lock);
1da177e4
LT
338 return s;
339}
340
7d267278
RW
341/* Support code for asymmetrically connected dgram sockets
342 *
343 * If a datagram socket is connected to a socket not itself connected
344 * to the first socket (eg, /dev/log), clients may only enqueue more
345 * messages if the present receive queue of the server socket is not
346 * "too large". This means there's a second writeability condition
347 * poll and sendmsg need to test. The dgram recv code will do a wake
348 * up on the peer_wait wait queue of a socket upon reception of a
349 * datagram which needs to be propagated to sleeping would-be writers
350 * since these might not have sent anything so far. This can't be
351 * accomplished via poll_wait because the lifetime of the server
352 * socket might be less than that of its clients if these break their
353 * association with it or if the server socket is closed while clients
354 * are still connected to it and there's no way to inform "a polling
355 * implementation" that it should let go of a certain wait queue
356 *
ac6424b9 357 * In order to propagate a wake up, a wait_queue_entry_t of the client
7d267278
RW
358 * socket is enqueued on the peer_wait queue of the server socket
359 * whose wake function does a wake_up on the ordinary client socket
360 * wait queue. This connection is established whenever a write (or
361 * poll for write) hit the flow control condition and broken when the
362 * association to the server socket is dissolved or after a wake up
363 * was relayed.
364 */
365
ac6424b9 366static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
7d267278
RW
367 void *key)
368{
369 struct unix_sock *u;
370 wait_queue_head_t *u_sleep;
371
372 u = container_of(q, struct unix_sock, peer_wake);
373
374 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
375 q);
376 u->peer_wake.private = NULL;
377
378 /* relaying can only happen while the wq still exists */
379 u_sleep = sk_sleep(&u->sk);
380 if (u_sleep)
3ad6f93e 381 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
7d267278
RW
382
383 return 0;
384}
385
386static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
387{
388 struct unix_sock *u, *u_other;
389 int rc;
390
391 u = unix_sk(sk);
392 u_other = unix_sk(other);
393 rc = 0;
394 spin_lock(&u_other->peer_wait.lock);
395
396 if (!u->peer_wake.private) {
397 u->peer_wake.private = other;
398 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
399
400 rc = 1;
401 }
402
403 spin_unlock(&u_other->peer_wait.lock);
404 return rc;
405}
406
407static void unix_dgram_peer_wake_disconnect(struct sock *sk,
408 struct sock *other)
409{
410 struct unix_sock *u, *u_other;
411
412 u = unix_sk(sk);
413 u_other = unix_sk(other);
414 spin_lock(&u_other->peer_wait.lock);
415
416 if (u->peer_wake.private == other) {
417 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
418 u->peer_wake.private = NULL;
419 }
420
421 spin_unlock(&u_other->peer_wait.lock);
422}
423
424static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
425 struct sock *other)
426{
427 unix_dgram_peer_wake_disconnect(sk, other);
428 wake_up_interruptible_poll(sk_sleep(sk),
a9a08845
LT
429 EPOLLOUT |
430 EPOLLWRNORM |
431 EPOLLWRBAND);
7d267278
RW
432}
433
434/* preconditions:
435 * - unix_peer(sk) == other
436 * - association is stable
437 */
438static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
439{
440 int connected;
441
442 connected = unix_dgram_peer_wake_connect(sk, other);
443
51f7e951
JB
444 /* If other is SOCK_DEAD, we want to make sure we signal
445 * POLLOUT, such that a subsequent write() can get a
446 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
447 * to other and its full, we will hang waiting for POLLOUT.
448 */
449 if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
7d267278
RW
450 return 1;
451
452 if (connected)
453 unix_dgram_peer_wake_disconnect(sk, other);
454
455 return 0;
456}
457
1586a587 458static int unix_writable(const struct sock *sk)
1da177e4 459{
1586a587 460 return sk->sk_state != TCP_LISTEN &&
14afee4b 461 (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
1da177e4
LT
462}
463
464static void unix_write_space(struct sock *sk)
465{
43815482
ED
466 struct socket_wq *wq;
467
468 rcu_read_lock();
1da177e4 469 if (unix_writable(sk)) {
43815482 470 wq = rcu_dereference(sk->sk_wq);
1ce0bf50 471 if (skwq_has_sleeper(wq))
67426b75 472 wake_up_interruptible_sync_poll(&wq->wait,
a9a08845 473 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
8d8ad9d7 474 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
1da177e4 475 }
43815482 476 rcu_read_unlock();
1da177e4
LT
477}
478
479/* When dgram socket disconnects (or changes its peer), we clear its receive
480 * queue of packets arrived from previous peer. First, it allows to do
481 * flow control based only on wmem_alloc; second, sk connected to peer
482 * may receive messages only from that peer. */
483static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
484{
b03efcfb 485 if (!skb_queue_empty(&sk->sk_receive_queue)) {
1da177e4
LT
486 skb_queue_purge(&sk->sk_receive_queue);
487 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
488
489 /* If one link of bidirectional dgram pipe is disconnected,
490 * we signal error. Messages are lost. Do not make this,
491 * when peer was not connected to us.
492 */
493 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
494 other->sk_err = ECONNRESET;
e3ae2365 495 sk_error_report(other);
1da177e4
LT
496 }
497 }
dc56ad70 498 other->sk_state = TCP_CLOSE;
1da177e4
LT
499}
500
501static void unix_sock_destructor(struct sock *sk)
502{
503 struct unix_sock *u = unix_sk(sk);
504
505 skb_queue_purge(&sk->sk_receive_queue);
506
314001f0
RS
507#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
508 if (u->oob_skb) {
509 kfree_skb(u->oob_skb);
510 u->oob_skb = NULL;
511 }
512#endif
14afee4b 513 WARN_ON(refcount_read(&sk->sk_wmem_alloc));
547b792c
IJ
514 WARN_ON(!sk_unhashed(sk));
515 WARN_ON(sk->sk_socket);
1da177e4 516 if (!sock_flag(sk, SOCK_DEAD)) {
5cc208be 517 pr_info("Attempt to release alive unix socket: %p\n", sk);
1da177e4
LT
518 return;
519 }
520
521 if (u->addr)
522 unix_release_addr(u->addr);
523
518de9b3 524 atomic_long_dec(&unix_nr_socks);
a8076d8d 525 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
1da177e4 526#ifdef UNIX_REFCNT_DEBUG
5cc208be 527 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
518de9b3 528 atomic_long_read(&unix_nr_socks));
1da177e4
LT
529#endif
530}
531
ded34e0f 532static void unix_release_sock(struct sock *sk, int embrion)
1da177e4
LT
533{
534 struct unix_sock *u = unix_sk(sk);
40ffe67d 535 struct path path;
1da177e4
LT
536 struct sock *skpair;
537 struct sk_buff *skb;
538 int state;
539
540 unix_remove_socket(sk);
541
542 /* Clear state */
1c92b4e5 543 unix_state_lock(sk);
1da177e4
LT
544 sock_orphan(sk);
545 sk->sk_shutdown = SHUTDOWN_MASK;
40ffe67d
AV
546 path = u->path;
547 u->path.dentry = NULL;
548 u->path.mnt = NULL;
1da177e4
LT
549 state = sk->sk_state;
550 sk->sk_state = TCP_CLOSE;
a494bd64
ED
551
552 skpair = unix_peer(sk);
553 unix_peer(sk) = NULL;
554
1c92b4e5 555 unix_state_unlock(sk);
1da177e4
LT
556
557 wake_up_interruptible_all(&u->peer_wait);
558
e27dfcea 559 if (skpair != NULL) {
1da177e4 560 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
1c92b4e5 561 unix_state_lock(skpair);
1da177e4
LT
562 /* No more writes */
563 skpair->sk_shutdown = SHUTDOWN_MASK;
564 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
565 skpair->sk_err = ECONNRESET;
1c92b4e5 566 unix_state_unlock(skpair);
1da177e4 567 skpair->sk_state_change(skpair);
8d8ad9d7 568 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
1da177e4 569 }
7d267278
RW
570
571 unix_dgram_peer_wake_disconnect(sk, skpair);
1da177e4 572 sock_put(skpair); /* It may now die */
1da177e4
LT
573 }
574
575 /* Try to flush out this socket. Throw out buffers at least */
576
577 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
e27dfcea 578 if (state == TCP_LISTEN)
1da177e4
LT
579 unix_release_sock(skb->sk, 1);
580 /* passed fds are erased in the kfree_skb hook */
73ed5d25 581 UNIXCB(skb).consumed = skb->len;
1da177e4
LT
582 kfree_skb(skb);
583 }
584
40ffe67d
AV
585 if (path.dentry)
586 path_put(&path);
1da177e4
LT
587
588 sock_put(sk);
589
590 /* ---- Socket is dead now and most probably destroyed ---- */
591
592 /*
e04dae84 593 * Fixme: BSD difference: In BSD all sockets connected to us get
1da177e4
LT
594 * ECONNRESET and we die on the spot. In Linux we behave
595 * like files and pipes do and wait for the last
596 * dereference.
597 *
598 * Can't we simply set sock->err?
599 *
600 * What the above comment does talk about? --ANK(980817)
601 */
602
9305cfa4 603 if (unix_tot_inflight)
ac7bfa62 604 unix_gc(); /* Garbage collect fds */
1da177e4
LT
605}
606
109f6e39
EB
607static void init_peercred(struct sock *sk)
608{
35306eb2
ED
609 const struct cred *old_cred;
610 struct pid *old_pid;
611
612 spin_lock(&sk->sk_peer_lock);
613 old_pid = sk->sk_peer_pid;
614 old_cred = sk->sk_peer_cred;
109f6e39
EB
615 sk->sk_peer_pid = get_pid(task_tgid(current));
616 sk->sk_peer_cred = get_current_cred();
35306eb2
ED
617 spin_unlock(&sk->sk_peer_lock);
618
619 put_pid(old_pid);
620 put_cred(old_cred);
109f6e39
EB
621}
622
623static void copy_peercred(struct sock *sk, struct sock *peersk)
624{
35306eb2
ED
625 const struct cred *old_cred;
626 struct pid *old_pid;
627
628 if (sk < peersk) {
629 spin_lock(&sk->sk_peer_lock);
630 spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
631 } else {
632 spin_lock(&peersk->sk_peer_lock);
633 spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
634 }
635 old_pid = sk->sk_peer_pid;
636 old_cred = sk->sk_peer_cred;
109f6e39
EB
637 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
638 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
35306eb2
ED
639
640 spin_unlock(&sk->sk_peer_lock);
641 spin_unlock(&peersk->sk_peer_lock);
642
643 put_pid(old_pid);
644 put_cred(old_cred);
109f6e39
EB
645}
646
1da177e4
LT
647static int unix_listen(struct socket *sock, int backlog)
648{
649 int err;
650 struct sock *sk = sock->sk;
651 struct unix_sock *u = unix_sk(sk);
652
653 err = -EOPNOTSUPP;
6eba6a37
ED
654 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
655 goto out; /* Only stream/seqpacket sockets accept */
1da177e4
LT
656 err = -EINVAL;
657 if (!u->addr)
6eba6a37 658 goto out; /* No listens on an unbound socket */
1c92b4e5 659 unix_state_lock(sk);
1da177e4
LT
660 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
661 goto out_unlock;
662 if (backlog > sk->sk_max_ack_backlog)
663 wake_up_interruptible_all(&u->peer_wait);
664 sk->sk_max_ack_backlog = backlog;
665 sk->sk_state = TCP_LISTEN;
666 /* set credentials so connect can copy them */
109f6e39 667 init_peercred(sk);
1da177e4
LT
668 err = 0;
669
670out_unlock:
1c92b4e5 671 unix_state_unlock(sk);
1da177e4
LT
672out:
673 return err;
674}
675
676static int unix_release(struct socket *);
677static int unix_bind(struct socket *, struct sockaddr *, int);
678static int unix_stream_connect(struct socket *, struct sockaddr *,
679 int addr_len, int flags);
680static int unix_socketpair(struct socket *, struct socket *);
cdfbabfb 681static int unix_accept(struct socket *, struct socket *, int, bool);
9b2c45d4 682static int unix_getname(struct socket *, struct sockaddr *, int);
a11e1d43
LT
683static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
684static __poll_t unix_dgram_poll(struct file *, struct socket *,
685 poll_table *);
1da177e4 686static int unix_ioctl(struct socket *, unsigned int, unsigned long);
5f6beb9e
AB
687#ifdef CONFIG_COMPAT
688static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
689#endif
1da177e4 690static int unix_shutdown(struct socket *, int);
1b784140
YX
691static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
692static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
869e7c62
HFS
693static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
694 size_t size, int flags);
2b514574
HFS
695static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
696 struct pipe_inode_info *, size_t size,
697 unsigned int flags);
1b784140
YX
698static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
699static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
29df44fa
CW
700static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
701 sk_read_actor_t recv_actor);
77462de1
JW
702static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc,
703 sk_read_actor_t recv_actor);
1da177e4
LT
704static int unix_dgram_connect(struct socket *, struct sockaddr *,
705 int, int);
1b784140
YX
706static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
707static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
708 int);
1da177e4 709
12663bfc 710static int unix_set_peek_off(struct sock *sk, int val)
f55bb7f9
PE
711{
712 struct unix_sock *u = unix_sk(sk);
713
6e1ce3c3 714 if (mutex_lock_interruptible(&u->iolock))
12663bfc
SL
715 return -EINTR;
716
f55bb7f9 717 sk->sk_peek_off = val;
6e1ce3c3 718 mutex_unlock(&u->iolock);
12663bfc
SL
719
720 return 0;
f55bb7f9
PE
721}
722
5c05a164 723#ifdef CONFIG_PROC_FS
3c32da19
KT
724static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
725{
726 struct sock *sk = sock->sk;
727 struct unix_sock *u;
728
729 if (sk) {
730 u = unix_sk(sock->sk);
7782040b
PA
731 seq_printf(m, "scm_fds: %u\n",
732 atomic_read(&u->scm_stat.nr_fds));
3c32da19
KT
733 }
734}
3a12500e
TK
735#else
736#define unix_show_fdinfo NULL
737#endif
f55bb7f9 738
90ddc4f0 739static const struct proto_ops unix_stream_ops = {
1da177e4
LT
740 .family = PF_UNIX,
741 .owner = THIS_MODULE,
742 .release = unix_release,
743 .bind = unix_bind,
744 .connect = unix_stream_connect,
745 .socketpair = unix_socketpair,
746 .accept = unix_accept,
747 .getname = unix_getname,
a11e1d43 748 .poll = unix_poll,
1da177e4 749 .ioctl = unix_ioctl,
5f6beb9e
AB
750#ifdef CONFIG_COMPAT
751 .compat_ioctl = unix_compat_ioctl,
752#endif
1da177e4
LT
753 .listen = unix_listen,
754 .shutdown = unix_shutdown,
1da177e4
LT
755 .sendmsg = unix_stream_sendmsg,
756 .recvmsg = unix_stream_recvmsg,
77462de1 757 .read_sock = unix_stream_read_sock,
1da177e4 758 .mmap = sock_no_mmap,
869e7c62 759 .sendpage = unix_stream_sendpage,
2b514574 760 .splice_read = unix_stream_splice_read,
fc0d7536 761 .set_peek_off = unix_set_peek_off,
3c32da19 762 .show_fdinfo = unix_show_fdinfo,
1da177e4
LT
763};
764
90ddc4f0 765static const struct proto_ops unix_dgram_ops = {
1da177e4
LT
766 .family = PF_UNIX,
767 .owner = THIS_MODULE,
768 .release = unix_release,
769 .bind = unix_bind,
770 .connect = unix_dgram_connect,
771 .socketpair = unix_socketpair,
772 .accept = sock_no_accept,
773 .getname = unix_getname,
a11e1d43 774 .poll = unix_dgram_poll,
1da177e4 775 .ioctl = unix_ioctl,
5f6beb9e
AB
776#ifdef CONFIG_COMPAT
777 .compat_ioctl = unix_compat_ioctl,
778#endif
1da177e4
LT
779 .listen = sock_no_listen,
780 .shutdown = unix_shutdown,
1da177e4 781 .sendmsg = unix_dgram_sendmsg,
29df44fa 782 .read_sock = unix_read_sock,
1da177e4
LT
783 .recvmsg = unix_dgram_recvmsg,
784 .mmap = sock_no_mmap,
785 .sendpage = sock_no_sendpage,
f55bb7f9 786 .set_peek_off = unix_set_peek_off,
3c32da19 787 .show_fdinfo = unix_show_fdinfo,
1da177e4
LT
788};
789
90ddc4f0 790static const struct proto_ops unix_seqpacket_ops = {
1da177e4
LT
791 .family = PF_UNIX,
792 .owner = THIS_MODULE,
793 .release = unix_release,
794 .bind = unix_bind,
795 .connect = unix_stream_connect,
796 .socketpair = unix_socketpair,
797 .accept = unix_accept,
798 .getname = unix_getname,
a11e1d43 799 .poll = unix_dgram_poll,
1da177e4 800 .ioctl = unix_ioctl,
5f6beb9e
AB
801#ifdef CONFIG_COMPAT
802 .compat_ioctl = unix_compat_ioctl,
803#endif
1da177e4
LT
804 .listen = unix_listen,
805 .shutdown = unix_shutdown,
1da177e4 806 .sendmsg = unix_seqpacket_sendmsg,
a05d2ad1 807 .recvmsg = unix_seqpacket_recvmsg,
1da177e4
LT
808 .mmap = sock_no_mmap,
809 .sendpage = sock_no_sendpage,
f55bb7f9 810 .set_peek_off = unix_set_peek_off,
3c32da19 811 .show_fdinfo = unix_show_fdinfo,
1da177e4
LT
812};
813
c7272e15
CW
814static void unix_close(struct sock *sk, long timeout)
815{
816 /* Nothing to do here, unix socket does not need a ->close().
817 * This is merely for sockmap.
818 */
819}
820
94531cfc
JW
821static void unix_unhash(struct sock *sk)
822{
823 /* Nothing to do here, unix socket does not need a ->unhash().
824 * This is merely for sockmap.
825 */
826}
827
828struct proto unix_dgram_proto = {
0edf0824 829 .name = "UNIX",
248969ae 830 .owner = THIS_MODULE,
248969ae 831 .obj_size = sizeof(struct unix_sock),
c7272e15 832 .close = unix_close,
c6382918 833#ifdef CONFIG_BPF_SYSCALL
94531cfc 834 .psock_update_sk_prot = unix_dgram_bpf_update_proto,
c6382918 835#endif
1da177e4
LT
836};
837
94531cfc
JW
838struct proto unix_stream_proto = {
839 .name = "UNIX-STREAM",
248969ae 840 .owner = THIS_MODULE,
248969ae 841 .obj_size = sizeof(struct unix_sock),
c7272e15 842 .close = unix_close,
94531cfc 843 .unhash = unix_unhash,
c6382918 844#ifdef CONFIG_BPF_SYSCALL
94531cfc 845 .psock_update_sk_prot = unix_stream_bpf_update_proto,
c6382918 846#endif
1da177e4
LT
847};
848
94531cfc 849static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
1da177e4 850{
1da177e4 851 struct unix_sock *u;
f4bd73b5
KI
852 struct sock *sk;
853 int err;
1da177e4 854
518de9b3 855 atomic_long_inc(&unix_nr_socks);
f4bd73b5
KI
856 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) {
857 err = -ENFILE;
858 goto err;
859 }
1da177e4 860
94531cfc
JW
861 if (type == SOCK_STREAM)
862 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
863 else /*dgram and seqpacket */
864 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
865
f4bd73b5
KI
866 if (!sk) {
867 err = -ENOMEM;
868 goto err;
869 }
1da177e4 870
6eba6a37 871 sock_init_data(sock, sk);
1da177e4 872
3aa9799e 873 sk->sk_allocation = GFP_KERNEL_ACCOUNT;
1da177e4 874 sk->sk_write_space = unix_write_space;
a0a53c8b 875 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
1da177e4
LT
876 sk->sk_destruct = unix_sock_destructor;
877 u = unix_sk(sk);
40ffe67d
AV
878 u->path.dentry = NULL;
879 u->path.mnt = NULL;
fd19f329 880 spin_lock_init(&u->lock);
516e0cc5 881 atomic_long_set(&u->inflight, 0);
1fd05ba5 882 INIT_LIST_HEAD(&u->link);
6e1ce3c3
LT
883 mutex_init(&u->iolock); /* single task reading lock */
884 mutex_init(&u->bindlock); /* single task binding lock */
1da177e4 885 init_waitqueue_head(&u->peer_wait);
7d267278 886 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
3c32da19 887 memset(&u->scm_stat, 0, sizeof(struct scm_stat));
7123aaa3 888 unix_insert_socket(unix_sockets_unbound(sk), sk);
f4bd73b5 889
f4bd73b5 890 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
f4bd73b5 891
1da177e4 892 return sk;
f4bd73b5
KI
893
894err:
895 atomic_long_dec(&unix_nr_socks);
896 return ERR_PTR(err);
1da177e4
LT
897}
898
3f378b68
EP
899static int unix_create(struct net *net, struct socket *sock, int protocol,
900 int kern)
1da177e4 901{
f4bd73b5
KI
902 struct sock *sk;
903
1da177e4
LT
904 if (protocol && protocol != PF_UNIX)
905 return -EPROTONOSUPPORT;
906
907 sock->state = SS_UNCONNECTED;
908
909 switch (sock->type) {
910 case SOCK_STREAM:
911 sock->ops = &unix_stream_ops;
912 break;
913 /*
914 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
915 * nothing uses it.
916 */
917 case SOCK_RAW:
e27dfcea 918 sock->type = SOCK_DGRAM;
df561f66 919 fallthrough;
1da177e4
LT
920 case SOCK_DGRAM:
921 sock->ops = &unix_dgram_ops;
922 break;
923 case SOCK_SEQPACKET:
924 sock->ops = &unix_seqpacket_ops;
925 break;
926 default:
927 return -ESOCKTNOSUPPORT;
928 }
929
f4bd73b5
KI
930 sk = unix_create1(net, sock, kern, sock->type);
931 if (IS_ERR(sk))
932 return PTR_ERR(sk);
933
934 return 0;
1da177e4
LT
935}
936
937static int unix_release(struct socket *sock)
938{
939 struct sock *sk = sock->sk;
940
941 if (!sk)
942 return 0;
943
c7272e15 944 sk->sk_prot->close(sk, 0);
ded34e0f 945 unix_release_sock(sk, 0);
1da177e4
LT
946 sock->sk = NULL;
947
ded34e0f 948 return 0;
1da177e4
LT
949}
950
fa39ef0e 951static struct sock *unix_find_bsd(struct net *net, struct sockaddr_un *sunaddr,
d2d8c9fd 952 int addr_len, int type)
fa39ef0e
KI
953{
954 struct inode *inode;
955 struct path path;
956 struct sock *sk;
957 int err;
958
d2d8c9fd 959 unix_mkname_bsd(sunaddr, addr_len);
fa39ef0e
KI
960 err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path);
961 if (err)
962 goto fail;
963
964 err = path_permission(&path, MAY_WRITE);
965 if (err)
966 goto path_put;
967
968 err = -ECONNREFUSED;
969 inode = d_backing_inode(path.dentry);
970 if (!S_ISSOCK(inode->i_mode))
971 goto path_put;
972
973 sk = unix_find_socket_byinode(inode);
974 if (!sk)
975 goto path_put;
976
977 err = -EPROTOTYPE;
978 if (sk->sk_type == type)
979 touch_atime(&path);
980 else
981 goto sock_put;
982
983 path_put(&path);
984
985 return sk;
986
987sock_put:
988 sock_put(sk);
989path_put:
990 path_put(&path);
991fail:
aed26f55 992 return ERR_PTR(err);
fa39ef0e
KI
993}
994
995static struct sock *unix_find_abstract(struct net *net,
996 struct sockaddr_un *sunaddr,
d2d8c9fd 997 int addr_len, int type)
fa39ef0e 998{
d2d8c9fd 999 unsigned int hash = unix_hash_fold(csum_partial(sunaddr, addr_len, 0));
fa39ef0e
KI
1000 struct dentry *dentry;
1001 struct sock *sk;
1002
1003 sk = unix_find_socket_byname(net, sunaddr, addr_len, type ^ hash);
aed26f55
KI
1004 if (!sk)
1005 return ERR_PTR(-ECONNREFUSED);
fa39ef0e
KI
1006
1007 dentry = unix_sk(sk)->path.dentry;
1008 if (dentry)
1009 touch_atime(&unix_sk(sk)->path);
1010
1011 return sk;
1012}
1013
1014static struct sock *unix_find_other(struct net *net,
1015 struct sockaddr_un *sunaddr,
d2d8c9fd 1016 int addr_len, int type)
fa39ef0e
KI
1017{
1018 struct sock *sk;
1019
1020 if (sunaddr->sun_path[0])
d2d8c9fd 1021 sk = unix_find_bsd(net, sunaddr, addr_len, type);
fa39ef0e 1022 else
d2d8c9fd 1023 sk = unix_find_abstract(net, sunaddr, addr_len, type);
fa39ef0e
KI
1024
1025 return sk;
1026}
1027
f7ed31f4 1028static int unix_autobind(struct sock *sk)
1da177e4 1029{
1da177e4 1030 struct unix_sock *u = unix_sk(sk);
6eba6a37 1031 struct unix_address *addr;
8df73ff9 1032 unsigned int retries = 0;
f7ed31f4
KI
1033 static u32 ordernum = 1;
1034 int err;
1da177e4 1035
6e1ce3c3 1036 err = mutex_lock_interruptible(&u->bindlock);
37ab4fa7
SL
1037 if (err)
1038 return err;
1da177e4 1039
1da177e4
LT
1040 if (u->addr)
1041 goto out;
1042
1043 err = -ENOMEM;
755662ce
KI
1044 addr = kzalloc(sizeof(*addr) +
1045 offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL);
1da177e4
LT
1046 if (!addr)
1047 goto out;
1048
1da177e4 1049 addr->name->sun_family = AF_UNIX;
8c9814b9 1050 refcount_set(&addr->refcnt, 1);
1da177e4
LT
1051
1052retry:
755662ce
KI
1053 addr->len = sprintf(addr->name->sun_path + 1, "%05x", ordernum) +
1054 offsetof(struct sockaddr_un, sun_path) + 1;
07f0757a 1055 addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
be752283 1056 addr->hash ^= sk->sk_type;
1da177e4 1057
fbe9cc4a 1058 spin_lock(&unix_table_lock);
1da177e4
LT
1059 ordernum = (ordernum+1)&0xFFFFF;
1060
f7ed31f4
KI
1061 if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len,
1062 addr->hash)) {
fbe9cc4a 1063 spin_unlock(&unix_table_lock);
8df73ff9
TH
1064 /*
1065 * __unix_find_socket_byname() may take long time if many names
1066 * are already in use.
1067 */
1068 cond_resched();
1069 /* Give up if all names seems to be in use. */
1070 if (retries++ == 0xFFFFF) {
1071 err = -ENOSPC;
1072 kfree(addr);
1073 goto out;
1074 }
1da177e4
LT
1075 goto retry;
1076 }
1da177e4 1077
185ab886 1078 __unix_set_addr(sk, addr, addr->hash);
fbe9cc4a 1079 spin_unlock(&unix_table_lock);
1da177e4
LT
1080 err = 0;
1081
6e1ce3c3 1082out: mutex_unlock(&u->bindlock);
1da177e4
LT
1083 return err;
1084}
1085
71e6be6f 1086static int unix_bind_bsd(struct sock *sk, struct unix_address *addr)
faf02010 1087{
71e6be6f
AV
1088 struct unix_sock *u = unix_sk(sk);
1089 umode_t mode = S_IFSOCK |
1090 (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
71e6be6f 1091 struct user_namespace *ns; // barf...
56c1731b 1092 struct path parent;
38f7bd94 1093 struct dentry *dentry;
71e6be6f
AV
1094 unsigned int hash;
1095 int err;
1096
38f7bd94
LT
1097 /*
1098 * Get the parent directory, calculate the hash for last
1099 * component.
1100 */
71e6be6f 1101 dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0);
38f7bd94 1102 if (IS_ERR(dentry))
71e6be6f
AV
1103 return PTR_ERR(dentry);
1104 ns = mnt_user_ns(parent.mnt);
faf02010 1105
38f7bd94
LT
1106 /*
1107 * All right, let's create it.
1108 */
71e6be6f 1109 err = security_path_mknod(&parent, dentry, mode, 0);
56c1731b 1110 if (!err)
71e6be6f 1111 err = vfs_mknod(ns, d_inode(parent.dentry), dentry, mode, 0);
c0c3b8d3
AV
1112 if (err)
1113 goto out;
fa42d910 1114 err = mutex_lock_interruptible(&u->bindlock);
c0c3b8d3
AV
1115 if (err)
1116 goto out_unlink;
1117 if (u->addr)
1118 goto out_unlock;
fa42d910
AV
1119
1120 addr->hash = UNIX_HASH_SIZE;
56c1731b 1121 hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
fa42d910 1122 spin_lock(&unix_table_lock);
56c1731b
AV
1123 u->path.mnt = mntget(parent.mnt);
1124 u->path.dentry = dget(dentry);
fa42d910
AV
1125 __unix_set_addr(sk, addr, hash);
1126 spin_unlock(&unix_table_lock);
1127 mutex_unlock(&u->bindlock);
56c1731b 1128 done_path_create(&parent, dentry);
fa42d910 1129 return 0;
c0c3b8d3
AV
1130
1131out_unlock:
1132 mutex_unlock(&u->bindlock);
1133 err = -EINVAL;
1134out_unlink:
1135 /* failed after successful mknod? unlink what we'd created... */
1136 vfs_unlink(ns, d_inode(parent.dentry), dentry, NULL);
1137out:
1138 done_path_create(&parent, dentry);
1139 return err;
fa42d910
AV
1140}
1141
be752283 1142static int unix_bind_abstract(struct sock *sk, struct unix_address *addr)
fa42d910
AV
1143{
1144 struct unix_sock *u = unix_sk(sk);
1145 int err;
1146
1147 err = mutex_lock_interruptible(&u->bindlock);
1148 if (err)
1149 return err;
1150
1151 if (u->addr) {
1152 mutex_unlock(&u->bindlock);
1153 return -EINVAL;
1154 }
1155
5c32a3ed
KI
1156 addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
1157 addr->hash ^= sk->sk_type;
1158
fa42d910
AV
1159 spin_lock(&unix_table_lock);
1160 if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len,
be752283 1161 addr->hash)) {
fa42d910
AV
1162 spin_unlock(&unix_table_lock);
1163 mutex_unlock(&u->bindlock);
1164 return -EADDRINUSE;
1165 }
1166 __unix_set_addr(sk, addr, addr->hash);
1167 spin_unlock(&unix_table_lock);
1168 mutex_unlock(&u->bindlock);
1169 return 0;
1170}
1171
1da177e4
LT
1172static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1173{
e27dfcea 1174 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
dae6ad8f 1175 char *sun_path = sunaddr->sun_path;
5c32a3ed 1176 struct sock *sk = sock->sk;
1da177e4 1177 struct unix_address *addr;
5c32a3ed 1178 int err;
1da177e4 1179
b8a58aa6
KI
1180 if (addr_len == offsetof(struct sockaddr_un, sun_path) &&
1181 sunaddr->sun_family == AF_UNIX)
f7ed31f4 1182 return unix_autobind(sk);
1da177e4 1183
b8a58aa6
KI
1184 err = unix_validate_addr(sunaddr, addr_len);
1185 if (err)
1186 return err;
1187
5c32a3ed
KI
1188 if (sun_path[0]) {
1189 unix_mkname_bsd(sunaddr, addr_len);
1190 addr_len = strlen(sunaddr->sun_path) +
1191 offsetof(struct sockaddr_un, sun_path) + 1;
1192 }
1193
c34d4582
AV
1194 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1195 if (!addr)
fa42d910 1196 return -ENOMEM;
c34d4582
AV
1197
1198 memcpy(addr->name, sunaddr, addr_len);
1199 addr->len = addr_len;
c34d4582 1200 refcount_set(&addr->refcnt, 1);
1da177e4 1201
fa42d910
AV
1202 if (sun_path[0])
1203 err = unix_bind_bsd(sk, addr);
1204 else
be752283 1205 err = unix_bind_abstract(sk, addr);
fa42d910 1206 if (err)
c34d4582 1207 unix_release_addr(addr);
fa42d910 1208 return err == -EEXIST ? -EADDRINUSE : err;
1da177e4
LT
1209}
1210
278a3de5
DM
1211static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1212{
1213 if (unlikely(sk1 == sk2) || !sk2) {
1214 unix_state_lock(sk1);
1215 return;
1216 }
1217 if (sk1 < sk2) {
1218 unix_state_lock(sk1);
1219 unix_state_lock_nested(sk2);
1220 } else {
1221 unix_state_lock(sk2);
1222 unix_state_lock_nested(sk1);
1223 }
1224}
1225
1226static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1227{
1228 if (unlikely(sk1 == sk2) || !sk2) {
1229 unix_state_unlock(sk1);
1230 return;
1231 }
1232 unix_state_unlock(sk1);
1233 unix_state_unlock(sk2);
1234}
1235
1da177e4
LT
1236static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1237 int alen, int flags)
1238{
1239 struct sock *sk = sock->sk;
3b1e0a65 1240 struct net *net = sock_net(sk);
e27dfcea 1241 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1da177e4 1242 struct sock *other;
1da177e4
LT
1243 int err;
1244
defbcf2d
MJ
1245 err = -EINVAL;
1246 if (alen < offsetofend(struct sockaddr, sa_family))
1247 goto out;
1248
1da177e4 1249 if (addr->sa_family != AF_UNSPEC) {
b8a58aa6
KI
1250 err = unix_validate_addr(sunaddr, alen);
1251 if (err)
1252 goto out;
1253
1da177e4 1254 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
f7ed31f4
KI
1255 !unix_sk(sk)->addr) {
1256 err = unix_autobind(sk);
1257 if (err)
1258 goto out;
1259 }
1da177e4 1260
278a3de5 1261restart:
d2d8c9fd 1262 other = unix_find_other(net, sunaddr, alen, sock->type);
aed26f55
KI
1263 if (IS_ERR(other)) {
1264 err = PTR_ERR(other);
1da177e4 1265 goto out;
aed26f55 1266 }
1da177e4 1267
278a3de5
DM
1268 unix_state_double_lock(sk, other);
1269
1270 /* Apparently VFS overslept socket death. Retry. */
1271 if (sock_flag(other, SOCK_DEAD)) {
1272 unix_state_double_unlock(sk, other);
1273 sock_put(other);
1274 goto restart;
1275 }
1da177e4
LT
1276
1277 err = -EPERM;
1278 if (!unix_may_send(sk, other))
1279 goto out_unlock;
1280
1281 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1282 if (err)
1283 goto out_unlock;
1284
dc56ad70 1285 sk->sk_state = other->sk_state = TCP_ESTABLISHED;
1da177e4
LT
1286 } else {
1287 /*
1288 * 1003.1g breaking connected state with AF_UNSPEC
1289 */
1290 other = NULL;
278a3de5 1291 unix_state_double_lock(sk, other);
1da177e4
LT
1292 }
1293
1294 /*
1295 * If it was connected, reconnect.
1296 */
1297 if (unix_peer(sk)) {
1298 struct sock *old_peer = unix_peer(sk);
dc56ad70 1299
e27dfcea 1300 unix_peer(sk) = other;
dc56ad70
ED
1301 if (!other)
1302 sk->sk_state = TCP_CLOSE;
7d267278
RW
1303 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1304
278a3de5 1305 unix_state_double_unlock(sk, other);
1da177e4
LT
1306
1307 if (other != old_peer)
1308 unix_dgram_disconnected(sk, old_peer);
1309 sock_put(old_peer);
1310 } else {
e27dfcea 1311 unix_peer(sk) = other;
278a3de5 1312 unix_state_double_unlock(sk, other);
1da177e4 1313 }
83301b53 1314
ac7bfa62 1315 return 0;
1da177e4
LT
1316
1317out_unlock:
278a3de5 1318 unix_state_double_unlock(sk, other);
1da177e4
LT
1319 sock_put(other);
1320out:
1321 return err;
1322}
1323
1324static long unix_wait_for_peer(struct sock *other, long timeo)
48851e9e 1325 __releases(&unix_sk(other)->lock)
1da177e4
LT
1326{
1327 struct unix_sock *u = unix_sk(other);
1328 int sched;
1329 DEFINE_WAIT(wait);
1330
1331 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1332
1333 sched = !sock_flag(other, SOCK_DEAD) &&
1334 !(other->sk_shutdown & RCV_SHUTDOWN) &&
3c73419c 1335 unix_recvq_full(other);
1da177e4 1336
1c92b4e5 1337 unix_state_unlock(other);
1da177e4
LT
1338
1339 if (sched)
1340 timeo = schedule_timeout(timeo);
1341
1342 finish_wait(&u->peer_wait, &wait);
1343 return timeo;
1344}
1345
1346static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1347 int addr_len, int flags)
1348{
e27dfcea 1349 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1da177e4 1350 struct sock *sk = sock->sk;
3b1e0a65 1351 struct net *net = sock_net(sk);
1da177e4
LT
1352 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1353 struct sock *newsk = NULL;
1354 struct sock *other = NULL;
1355 struct sk_buff *skb = NULL;
1da177e4
LT
1356 int st;
1357 int err;
1358 long timeo;
1359
b8a58aa6
KI
1360 err = unix_validate_addr(sunaddr, addr_len);
1361 if (err)
1362 goto out;
1363
f7ed31f4
KI
1364 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) {
1365 err = unix_autobind(sk);
1366 if (err)
1367 goto out;
1368 }
1da177e4
LT
1369
1370 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1371
1372 /* First of all allocate resources.
1373 If we will make it after state is locked,
1374 we will have to recheck all again in any case.
1375 */
1376
1da177e4 1377 /* create new sock for complete connection */
94531cfc 1378 newsk = unix_create1(sock_net(sk), NULL, 0, sock->type);
f4bd73b5
KI
1379 if (IS_ERR(newsk)) {
1380 err = PTR_ERR(newsk);
1381 newsk = NULL;
1da177e4 1382 goto out;
f4bd73b5
KI
1383 }
1384
1385 err = -ENOMEM;
1da177e4
LT
1386
1387 /* Allocate skb for sending to listening sock */
1388 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1389 if (skb == NULL)
1390 goto out;
1391
1392restart:
1393 /* Find listening sock. */
d2d8c9fd 1394 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type);
aed26f55
KI
1395 if (IS_ERR(other)) {
1396 err = PTR_ERR(other);
1397 other = NULL;
1da177e4 1398 goto out;
aed26f55 1399 }
1da177e4
LT
1400
1401 /* Latch state of peer */
1c92b4e5 1402 unix_state_lock(other);
1da177e4
LT
1403
1404 /* Apparently VFS overslept socket death. Retry. */
1405 if (sock_flag(other, SOCK_DEAD)) {
1c92b4e5 1406 unix_state_unlock(other);
1da177e4
LT
1407 sock_put(other);
1408 goto restart;
1409 }
1410
1411 err = -ECONNREFUSED;
1412 if (other->sk_state != TCP_LISTEN)
1413 goto out_unlock;
77238f2b
TS
1414 if (other->sk_shutdown & RCV_SHUTDOWN)
1415 goto out_unlock;
1da177e4 1416
3c73419c 1417 if (unix_recvq_full(other)) {
1da177e4
LT
1418 err = -EAGAIN;
1419 if (!timeo)
1420 goto out_unlock;
1421
1422 timeo = unix_wait_for_peer(other, timeo);
1423
1424 err = sock_intr_errno(timeo);
1425 if (signal_pending(current))
1426 goto out;
1427 sock_put(other);
1428 goto restart;
ac7bfa62 1429 }
1da177e4
LT
1430
1431 /* Latch our state.
1432
e5537bfc 1433 It is tricky place. We need to grab our state lock and cannot
1da177e4
LT
1434 drop lock on peer. It is dangerous because deadlock is
1435 possible. Connect to self case and simultaneous
1436 attempt to connect are eliminated by checking socket
1437 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1438 check this before attempt to grab lock.
1439
1440 Well, and we have to recheck the state after socket locked.
1441 */
1442 st = sk->sk_state;
1443
1444 switch (st) {
1445 case TCP_CLOSE:
1446 /* This is ok... continue with connect */
1447 break;
1448 case TCP_ESTABLISHED:
1449 /* Socket is already connected */
1450 err = -EISCONN;
1451 goto out_unlock;
1452 default:
1453 err = -EINVAL;
1454 goto out_unlock;
1455 }
1456
1c92b4e5 1457 unix_state_lock_nested(sk);
1da177e4
LT
1458
1459 if (sk->sk_state != st) {
1c92b4e5
DM
1460 unix_state_unlock(sk);
1461 unix_state_unlock(other);
1da177e4
LT
1462 sock_put(other);
1463 goto restart;
1464 }
1465
3610cda5 1466 err = security_unix_stream_connect(sk, other, newsk);
1da177e4 1467 if (err) {
1c92b4e5 1468 unix_state_unlock(sk);
1da177e4
LT
1469 goto out_unlock;
1470 }
1471
1472 /* The way is open! Fastly set all the necessary fields... */
1473
1474 sock_hold(sk);
1475 unix_peer(newsk) = sk;
1476 newsk->sk_state = TCP_ESTABLISHED;
1477 newsk->sk_type = sk->sk_type;
109f6e39 1478 init_peercred(newsk);
1da177e4 1479 newu = unix_sk(newsk);
eaefd110 1480 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1da177e4
LT
1481 otheru = unix_sk(other);
1482
ae3b5641
AV
1483 /* copy address information from listening to new sock
1484 *
1485 * The contents of *(otheru->addr) and otheru->path
1486 * are seen fully set up here, since we have found
1487 * otheru in hash under unix_table_lock. Insertion
1488 * into the hash chain we'd found it in had been done
1489 * in an earlier critical area protected by unix_table_lock,
1490 * the same one where we'd set *(otheru->addr) contents,
1491 * as well as otheru->path and otheru->addr itself.
1492 *
1493 * Using smp_store_release() here to set newu->addr
1494 * is enough to make those stores, as well as stores
1495 * to newu->path visible to anyone who gets newu->addr
1496 * by smp_load_acquire(). IOW, the same warranties
1497 * as for unix_sock instances bound in unix_bind() or
1498 * in unix_autobind().
1499 */
40ffe67d
AV
1500 if (otheru->path.dentry) {
1501 path_get(&otheru->path);
1502 newu->path = otheru->path;
1da177e4 1503 }
ae3b5641
AV
1504 refcount_inc(&otheru->addr->refcnt);
1505 smp_store_release(&newu->addr, otheru->addr);
1da177e4
LT
1506
1507 /* Set credentials */
109f6e39 1508 copy_peercred(sk, other);
1da177e4 1509
1da177e4
LT
1510 sock->state = SS_CONNECTED;
1511 sk->sk_state = TCP_ESTABLISHED;
830a1e5c
BL
1512 sock_hold(newsk);
1513
4e857c58 1514 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
830a1e5c 1515 unix_peer(sk) = newsk;
1da177e4 1516
1c92b4e5 1517 unix_state_unlock(sk);
1da177e4 1518
4e03d073 1519 /* take ten and send info to listening sock */
1da177e4
LT
1520 spin_lock(&other->sk_receive_queue.lock);
1521 __skb_queue_tail(&other->sk_receive_queue, skb);
1da177e4 1522 spin_unlock(&other->sk_receive_queue.lock);
1c92b4e5 1523 unix_state_unlock(other);
676d2369 1524 other->sk_data_ready(other);
1da177e4
LT
1525 sock_put(other);
1526 return 0;
1527
1528out_unlock:
1529 if (other)
1c92b4e5 1530 unix_state_unlock(other);
1da177e4
LT
1531
1532out:
40d44446 1533 kfree_skb(skb);
1da177e4
LT
1534 if (newsk)
1535 unix_release_sock(newsk, 0);
1536 if (other)
1537 sock_put(other);
1538 return err;
1539}
1540
1541static int unix_socketpair(struct socket *socka, struct socket *sockb)
1542{
e27dfcea 1543 struct sock *ska = socka->sk, *skb = sockb->sk;
1da177e4
LT
1544
1545 /* Join our sockets back to back */
1546 sock_hold(ska);
1547 sock_hold(skb);
e27dfcea
JK
1548 unix_peer(ska) = skb;
1549 unix_peer(skb) = ska;
109f6e39
EB
1550 init_peercred(ska);
1551 init_peercred(skb);
1da177e4 1552
83301b53
CW
1553 ska->sk_state = TCP_ESTABLISHED;
1554 skb->sk_state = TCP_ESTABLISHED;
1555 socka->state = SS_CONNECTED;
1556 sockb->state = SS_CONNECTED;
1da177e4
LT
1557 return 0;
1558}
1559
90c6bd34
DB
1560static void unix_sock_inherit_flags(const struct socket *old,
1561 struct socket *new)
1562{
1563 if (test_bit(SOCK_PASSCRED, &old->flags))
1564 set_bit(SOCK_PASSCRED, &new->flags);
1565 if (test_bit(SOCK_PASSSEC, &old->flags))
1566 set_bit(SOCK_PASSSEC, &new->flags);
1567}
1568
cdfbabfb
DH
1569static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1570 bool kern)
1da177e4
LT
1571{
1572 struct sock *sk = sock->sk;
1573 struct sock *tsk;
1574 struct sk_buff *skb;
1575 int err;
1576
1577 err = -EOPNOTSUPP;
6eba6a37 1578 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1da177e4
LT
1579 goto out;
1580
1581 err = -EINVAL;
1582 if (sk->sk_state != TCP_LISTEN)
1583 goto out;
1584
1585 /* If socket state is TCP_LISTEN it cannot change (for now...),
1586 * so that no locks are necessary.
1587 */
1588
1589 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1590 if (!skb) {
1591 /* This means receive shutdown. */
1592 if (err == 0)
1593 err = -EINVAL;
1594 goto out;
1595 }
1596
1597 tsk = skb->sk;
1598 skb_free_datagram(sk, skb);
1599 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1600
1601 /* attach accepted sock to socket */
1c92b4e5 1602 unix_state_lock(tsk);
1da177e4 1603 newsock->state = SS_CONNECTED;
90c6bd34 1604 unix_sock_inherit_flags(sock, newsock);
1da177e4 1605 sock_graft(tsk, newsock);
1c92b4e5 1606 unix_state_unlock(tsk);
1da177e4
LT
1607 return 0;
1608
1609out:
1610 return err;
1611}
1612
1613
9b2c45d4 1614static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1da177e4
LT
1615{
1616 struct sock *sk = sock->sk;
ae3b5641 1617 struct unix_address *addr;
13cfa97b 1618 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1da177e4
LT
1619 int err = 0;
1620
1621 if (peer) {
1622 sk = unix_peer_get(sk);
1623
1624 err = -ENOTCONN;
1625 if (!sk)
1626 goto out;
1627 err = 0;
1628 } else {
1629 sock_hold(sk);
1630 }
1631
ae3b5641
AV
1632 addr = smp_load_acquire(&unix_sk(sk)->addr);
1633 if (!addr) {
1da177e4
LT
1634 sunaddr->sun_family = AF_UNIX;
1635 sunaddr->sun_path[0] = 0;
755662ce 1636 err = offsetof(struct sockaddr_un, sun_path);
1da177e4 1637 } else {
9b2c45d4
DV
1638 err = addr->len;
1639 memcpy(sunaddr, addr->name, addr->len);
1da177e4 1640 }
1da177e4
LT
1641 sock_put(sk);
1642out:
1643 return err;
1644}
1645
cbcf0112
MS
1646static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
1647{
1648 scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1649
1650 /*
1651 * Garbage collection of unix sockets starts by selecting a set of
1652 * candidate sockets which have reference only from being in flight
1653 * (total_refs == inflight_refs). This condition is checked once during
1654 * the candidate collection phase, and candidates are marked as such, so
1655 * that non-candidates can later be ignored. While inflight_refs is
1656 * protected by unix_gc_lock, total_refs (file count) is not, hence this
1657 * is an instantaneous decision.
1658 *
1659 * Once a candidate, however, the socket must not be reinstalled into a
1660 * file descriptor while the garbage collection is in progress.
1661 *
1662 * If the above conditions are met, then the directed graph of
1663 * candidates (*) does not change while unix_gc_lock is held.
1664 *
1665 * Any operations that changes the file count through file descriptors
1666 * (dup, close, sendmsg) does not change the graph since candidates are
1667 * not installed in fds.
1668 *
1669 * Dequeing a candidate via recvmsg would install it into an fd, but
1670 * that takes unix_gc_lock to decrement the inflight count, so it's
1671 * serialized with garbage collection.
1672 *
1673 * MSG_PEEK is special in that it does not change the inflight count,
1674 * yet does install the socket into an fd. The following lock/unlock
1675 * pair is to ensure serialization with garbage collection. It must be
1676 * done between incrementing the file count and installing the file into
1677 * an fd.
1678 *
1679 * If garbage collection starts after the barrier provided by the
1680 * lock/unlock, then it will see the elevated refcount and not mark this
1681 * as a candidate. If a garbage collection is already in progress
1682 * before the file count was incremented, then the lock/unlock pair will
1683 * ensure that garbage collection is finished before progressing to
1684 * installing the fd.
1685 *
1686 * (*) A -> B where B is on the queue of A or B is on the queue of C
1687 * which is on the queue of listening socket A.
1688 */
1689 spin_lock(&unix_gc_lock);
1690 spin_unlock(&unix_gc_lock);
1691}
1692
f78a5fda 1693static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
7361c36c
EB
1694{
1695 int err = 0;
16e57262 1696
f78a5fda 1697 UNIXCB(skb).pid = get_pid(scm->pid);
6b0ee8c0
EB
1698 UNIXCB(skb).uid = scm->creds.uid;
1699 UNIXCB(skb).gid = scm->creds.gid;
7361c36c 1700 UNIXCB(skb).fp = NULL;
37a9a8df 1701 unix_get_secdata(scm, skb);
7361c36c
EB
1702 if (scm->fp && send_fds)
1703 err = unix_attach_fds(scm, skb);
1704
1705 skb->destructor = unix_destruct_scm;
1706 return err;
1707}
1708
9490f886
HFS
1709static bool unix_passcred_enabled(const struct socket *sock,
1710 const struct sock *other)
1711{
1712 return test_bit(SOCK_PASSCRED, &sock->flags) ||
1713 !other->sk_socket ||
1714 test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1715}
1716
16e57262
ED
1717/*
1718 * Some apps rely on write() giving SCM_CREDENTIALS
1719 * We include credentials if source or destination socket
1720 * asserted SOCK_PASSCRED.
1721 */
1722static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1723 const struct sock *other)
1724{
6b0ee8c0 1725 if (UNIXCB(skb).pid)
16e57262 1726 return;
9490f886 1727 if (unix_passcred_enabled(sock, other)) {
16e57262 1728 UNIXCB(skb).pid = get_pid(task_tgid(current));
6e0895c2 1729 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
16e57262
ED
1730 }
1731}
1732
9490f886
HFS
1733static int maybe_init_creds(struct scm_cookie *scm,
1734 struct socket *socket,
1735 const struct sock *other)
1736{
1737 int err;
1738 struct msghdr msg = { .msg_controllen = 0 };
1739
1740 err = scm_send(socket, &msg, scm, false);
1741 if (err)
1742 return err;
1743
1744 if (unix_passcred_enabled(socket, other)) {
1745 scm->pid = get_pid(task_tgid(current));
1746 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1747 }
1748 return err;
1749}
1750
1751static bool unix_skb_scm_eq(struct sk_buff *skb,
1752 struct scm_cookie *scm)
1753{
1754 const struct unix_skb_parms *u = &UNIXCB(skb);
1755
1756 return u->pid == scm->pid &&
1757 uid_eq(u->uid, scm->creds.uid) &&
1758 gid_eq(u->gid, scm->creds.gid) &&
1759 unix_secdata_eq(scm, skb);
1760}
1761
3c32da19
KT
1762static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
1763{
1764 struct scm_fp_list *fp = UNIXCB(skb).fp;
1765 struct unix_sock *u = unix_sk(sk);
1766
3c32da19 1767 if (unlikely(fp && fp->count))
7782040b 1768 atomic_add(fp->count, &u->scm_stat.nr_fds);
3c32da19
KT
1769}
1770
1771static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
1772{
1773 struct scm_fp_list *fp = UNIXCB(skb).fp;
1774 struct unix_sock *u = unix_sk(sk);
1775
3c32da19 1776 if (unlikely(fp && fp->count))
7782040b 1777 atomic_sub(fp->count, &u->scm_stat.nr_fds);
3c32da19
KT
1778}
1779
1da177e4
LT
1780/*
1781 * Send AF_UNIX data.
1782 */
1783
1b784140
YX
1784static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1785 size_t len)
1da177e4 1786{
1da177e4 1787 struct sock *sk = sock->sk;
3b1e0a65 1788 struct net *net = sock_net(sk);
1da177e4 1789 struct unix_sock *u = unix_sk(sk);
342dfc30 1790 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1da177e4 1791 struct sock *other = NULL;
1da177e4 1792 int err;
f78a5fda 1793 struct sk_buff *skb;
1da177e4 1794 long timeo;
7cc05662 1795 struct scm_cookie scm;
eb6a2481 1796 int data_len = 0;
7d267278 1797 int sk_locked;
1da177e4 1798
5f23b734 1799 wait_for_unix_gc();
7cc05662 1800 err = scm_send(sock, msg, &scm, false);
1da177e4
LT
1801 if (err < 0)
1802 return err;
1803
1804 err = -EOPNOTSUPP;
1805 if (msg->msg_flags&MSG_OOB)
1806 goto out;
1807
1808 if (msg->msg_namelen) {
b8a58aa6
KI
1809 err = unix_validate_addr(sunaddr, msg->msg_namelen);
1810 if (err)
1811 goto out;
1da177e4
LT
1812 } else {
1813 sunaddr = NULL;
1814 err = -ENOTCONN;
1815 other = unix_peer_get(sk);
1816 if (!other)
1817 goto out;
1818 }
1819
f7ed31f4
KI
1820 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) {
1821 err = unix_autobind(sk);
1822 if (err)
1823 goto out;
1824 }
1da177e4
LT
1825
1826 err = -EMSGSIZE;
1827 if (len > sk->sk_sndbuf - 32)
1828 goto out;
1829
31ff6aa5 1830 if (len > SKB_MAX_ALLOC) {
eb6a2481
ED
1831 data_len = min_t(size_t,
1832 len - SKB_MAX_ALLOC,
1833 MAX_SKB_FRAGS * PAGE_SIZE);
31ff6aa5
KT
1834 data_len = PAGE_ALIGN(data_len);
1835
1836 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1837 }
eb6a2481
ED
1838
1839 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
28d64271
ED
1840 msg->msg_flags & MSG_DONTWAIT, &err,
1841 PAGE_ALLOC_COSTLY_ORDER);
e27dfcea 1842 if (skb == NULL)
1da177e4
LT
1843 goto out;
1844
7cc05662 1845 err = unix_scm_to_skb(&scm, skb, true);
25888e30 1846 if (err < 0)
7361c36c 1847 goto out_free;
877ce7c1 1848
eb6a2481
ED
1849 skb_put(skb, len - data_len);
1850 skb->data_len = data_len;
1851 skb->len = len;
c0371da6 1852 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1da177e4
LT
1853 if (err)
1854 goto out_free;
1855
1856 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1857
1858restart:
1859 if (!other) {
1860 err = -ECONNRESET;
1861 if (sunaddr == NULL)
1862 goto out_free;
1863
d2d8c9fd
KI
1864 other = unix_find_other(net, sunaddr, msg->msg_namelen,
1865 sk->sk_type);
aed26f55
KI
1866 if (IS_ERR(other)) {
1867 err = PTR_ERR(other);
1868 other = NULL;
1da177e4 1869 goto out_free;
aed26f55 1870 }
1da177e4
LT
1871 }
1872
d6ae3bae
AC
1873 if (sk_filter(other, skb) < 0) {
1874 /* Toss the packet but do not return any error to the sender */
1875 err = len;
1876 goto out_free;
1877 }
1878
7d267278 1879 sk_locked = 0;
1c92b4e5 1880 unix_state_lock(other);
7d267278 1881restart_locked:
1da177e4
LT
1882 err = -EPERM;
1883 if (!unix_may_send(sk, other))
1884 goto out_unlock;
1885
7d267278 1886 if (unlikely(sock_flag(other, SOCK_DEAD))) {
1da177e4
LT
1887 /*
1888 * Check with 1003.1g - what should
1889 * datagram error
1890 */
1c92b4e5 1891 unix_state_unlock(other);
1da177e4
LT
1892 sock_put(other);
1893
7d267278
RW
1894 if (!sk_locked)
1895 unix_state_lock(sk);
1896
1da177e4 1897 err = 0;
1da177e4 1898 if (unix_peer(sk) == other) {
e27dfcea 1899 unix_peer(sk) = NULL;
7d267278
RW
1900 unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1901
1c92b4e5 1902 unix_state_unlock(sk);
1da177e4 1903
dc56ad70 1904 sk->sk_state = TCP_CLOSE;
1da177e4
LT
1905 unix_dgram_disconnected(sk, other);
1906 sock_put(other);
1907 err = -ECONNREFUSED;
1908 } else {
1c92b4e5 1909 unix_state_unlock(sk);
1da177e4
LT
1910 }
1911
1912 other = NULL;
1913 if (err)
1914 goto out_free;
1915 goto restart;
1916 }
1917
1918 err = -EPIPE;
1919 if (other->sk_shutdown & RCV_SHUTDOWN)
1920 goto out_unlock;
1921
1922 if (sk->sk_type != SOCK_SEQPACKET) {
1923 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1924 if (err)
1925 goto out_unlock;
1926 }
1927
a5527dda
RW
1928 /* other == sk && unix_peer(other) != sk if
1929 * - unix_peer(sk) == NULL, destination address bound to sk
1930 * - unix_peer(sk) == sk by time of get but disconnected before lock
1931 */
1932 if (other != sk &&
86b18aaa
QC
1933 unlikely(unix_peer(other) != sk &&
1934 unix_recvq_full_lockless(other))) {
7d267278
RW
1935 if (timeo) {
1936 timeo = unix_wait_for_peer(other, timeo);
1937
1938 err = sock_intr_errno(timeo);
1939 if (signal_pending(current))
1940 goto out_free;
1941
1942 goto restart;
1da177e4
LT
1943 }
1944
7d267278
RW
1945 if (!sk_locked) {
1946 unix_state_unlock(other);
1947 unix_state_double_lock(sk, other);
1948 }
1da177e4 1949
7d267278
RW
1950 if (unix_peer(sk) != other ||
1951 unix_dgram_peer_wake_me(sk, other)) {
1952 err = -EAGAIN;
1953 sk_locked = 1;
1954 goto out_unlock;
1955 }
1da177e4 1956
7d267278
RW
1957 if (!sk_locked) {
1958 sk_locked = 1;
1959 goto restart_locked;
1960 }
1da177e4
LT
1961 }
1962
7d267278
RW
1963 if (unlikely(sk_locked))
1964 unix_state_unlock(sk);
1965
3f66116e
AC
1966 if (sock_flag(other, SOCK_RCVTSTAMP))
1967 __net_timestamp(skb);
16e57262 1968 maybe_add_creds(skb, sock, other);
3c32da19 1969 scm_stat_add(other, skb);
7782040b 1970 skb_queue_tail(&other->sk_receive_queue, skb);
1c92b4e5 1971 unix_state_unlock(other);
676d2369 1972 other->sk_data_ready(other);
1da177e4 1973 sock_put(other);
7cc05662 1974 scm_destroy(&scm);
1da177e4
LT
1975 return len;
1976
1977out_unlock:
7d267278
RW
1978 if (sk_locked)
1979 unix_state_unlock(sk);
1c92b4e5 1980 unix_state_unlock(other);
1da177e4
LT
1981out_free:
1982 kfree_skb(skb);
1983out:
1984 if (other)
1985 sock_put(other);
7cc05662 1986 scm_destroy(&scm);
1da177e4
LT
1987 return err;
1988}
1989
e370a723 1990/* We use paged skbs for stream sockets, and limit occupancy to 32768
d4e9a408 1991 * bytes, and a minimum of a full page.
e370a723
ED
1992 */
1993#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
ac7bfa62 1994
314001f0
RS
1995#if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
1996static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other)
1997{
1998 struct unix_sock *ousk = unix_sk(other);
1999 struct sk_buff *skb;
2000 int err = 0;
2001
2002 skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err);
2003
2004 if (!skb)
2005 return err;
2006
2007 skb_put(skb, 1);
314001f0
RS
2008 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
2009
2010 if (err) {
2011 kfree_skb(skb);
2012 return err;
2013 }
2014
2015 unix_state_lock(other);
19eed721
RS
2016
2017 if (sock_flag(other, SOCK_DEAD) ||
2018 (other->sk_shutdown & RCV_SHUTDOWN)) {
2019 unix_state_unlock(other);
2020 kfree_skb(skb);
2021 return -EPIPE;
2022 }
2023
314001f0
RS
2024 maybe_add_creds(skb, sock, other);
2025 skb_get(skb);
2026
2027 if (ousk->oob_skb)
19eed721 2028 consume_skb(ousk->oob_skb);
314001f0
RS
2029
2030 ousk->oob_skb = skb;
2031
2032 scm_stat_add(other, skb);
2033 skb_queue_tail(&other->sk_receive_queue, skb);
2034 sk_send_sigurg(other);
2035 unix_state_unlock(other);
2036 other->sk_data_ready(other);
2037
2038 return err;
2039}
2040#endif
2041
1b784140
YX
2042static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
2043 size_t len)
1da177e4 2044{
1da177e4
LT
2045 struct sock *sk = sock->sk;
2046 struct sock *other = NULL;
6eba6a37 2047 int err, size;
f78a5fda 2048 struct sk_buff *skb;
e27dfcea 2049 int sent = 0;
7cc05662 2050 struct scm_cookie scm;
8ba69ba6 2051 bool fds_sent = false;
e370a723 2052 int data_len;
1da177e4 2053
5f23b734 2054 wait_for_unix_gc();
7cc05662 2055 err = scm_send(sock, msg, &scm, false);
1da177e4
LT
2056 if (err < 0)
2057 return err;
2058
2059 err = -EOPNOTSUPP;
314001f0
RS
2060 if (msg->msg_flags & MSG_OOB) {
2061#if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
2062 if (len)
2063 len--;
2064 else
2065#endif
2066 goto out_err;
2067 }
1da177e4
LT
2068
2069 if (msg->msg_namelen) {
2070 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
2071 goto out_err;
2072 } else {
1da177e4 2073 err = -ENOTCONN;
830a1e5c 2074 other = unix_peer(sk);
1da177e4
LT
2075 if (!other)
2076 goto out_err;
2077 }
2078
2079 if (sk->sk_shutdown & SEND_SHUTDOWN)
2080 goto pipe_err;
2081
6eba6a37 2082 while (sent < len) {
e370a723 2083 size = len - sent;
1da177e4
LT
2084
2085 /* Keep two messages in the pipe so it schedules better */
e370a723 2086 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1da177e4 2087
e370a723
ED
2088 /* allow fallback to order-0 allocations */
2089 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
ac7bfa62 2090
e370a723 2091 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1da177e4 2092
31ff6aa5
KT
2093 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
2094
e370a723 2095 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
28d64271
ED
2096 msg->msg_flags & MSG_DONTWAIT, &err,
2097 get_order(UNIX_SKB_FRAGS_SZ));
e370a723 2098 if (!skb)
1da177e4
LT
2099 goto out_err;
2100
f78a5fda 2101 /* Only send the fds in the first buffer */
7cc05662 2102 err = unix_scm_to_skb(&scm, skb, !fds_sent);
25888e30 2103 if (err < 0) {
7361c36c 2104 kfree_skb(skb);
f78a5fda 2105 goto out_err;
6209344f 2106 }
7361c36c 2107 fds_sent = true;
1da177e4 2108
e370a723
ED
2109 skb_put(skb, size - data_len);
2110 skb->data_len = data_len;
2111 skb->len = size;
c0371da6 2112 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
6eba6a37 2113 if (err) {
1da177e4 2114 kfree_skb(skb);
f78a5fda 2115 goto out_err;
1da177e4
LT
2116 }
2117
1c92b4e5 2118 unix_state_lock(other);
1da177e4
LT
2119
2120 if (sock_flag(other, SOCK_DEAD) ||
2121 (other->sk_shutdown & RCV_SHUTDOWN))
2122 goto pipe_err_free;
2123
16e57262 2124 maybe_add_creds(skb, sock, other);
3c32da19 2125 scm_stat_add(other, skb);
7782040b 2126 skb_queue_tail(&other->sk_receive_queue, skb);
1c92b4e5 2127 unix_state_unlock(other);
676d2369 2128 other->sk_data_ready(other);
e27dfcea 2129 sent += size;
1da177e4 2130 }
1da177e4 2131
314001f0
RS
2132#if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
2133 if (msg->msg_flags & MSG_OOB) {
2134 err = queue_oob(sock, msg, other);
2135 if (err)
2136 goto out_err;
2137 sent++;
2138 }
2139#endif
2140
7cc05662 2141 scm_destroy(&scm);
1da177e4
LT
2142
2143 return sent;
2144
2145pipe_err_free:
1c92b4e5 2146 unix_state_unlock(other);
1da177e4
LT
2147 kfree_skb(skb);
2148pipe_err:
6eba6a37
ED
2149 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
2150 send_sig(SIGPIPE, current, 0);
1da177e4
LT
2151 err = -EPIPE;
2152out_err:
7cc05662 2153 scm_destroy(&scm);
1da177e4
LT
2154 return sent ? : err;
2155}
2156
869e7c62
HFS
2157static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
2158 int offset, size_t size, int flags)
2159{
9490f886
HFS
2160 int err;
2161 bool send_sigpipe = false;
2162 bool init_scm = true;
2163 struct scm_cookie scm;
869e7c62
HFS
2164 struct sock *other, *sk = socket->sk;
2165 struct sk_buff *skb, *newskb = NULL, *tail = NULL;
2166
2167 if (flags & MSG_OOB)
2168 return -EOPNOTSUPP;
2169
2170 other = unix_peer(sk);
2171 if (!other || sk->sk_state != TCP_ESTABLISHED)
2172 return -ENOTCONN;
2173
2174 if (false) {
2175alloc_skb:
2176 unix_state_unlock(other);
6e1ce3c3 2177 mutex_unlock(&unix_sk(other)->iolock);
869e7c62
HFS
2178 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
2179 &err, 0);
2180 if (!newskb)
9490f886 2181 goto err;
869e7c62
HFS
2182 }
2183
6e1ce3c3 2184 /* we must acquire iolock as we modify already present
869e7c62
HFS
2185 * skbs in the sk_receive_queue and mess with skb->len
2186 */
6e1ce3c3 2187 err = mutex_lock_interruptible(&unix_sk(other)->iolock);
869e7c62
HFS
2188 if (err) {
2189 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
869e7c62
HFS
2190 goto err;
2191 }
2192
2193 if (sk->sk_shutdown & SEND_SHUTDOWN) {
2194 err = -EPIPE;
9490f886 2195 send_sigpipe = true;
869e7c62
HFS
2196 goto err_unlock;
2197 }
2198
2199 unix_state_lock(other);
2200
2201 if (sock_flag(other, SOCK_DEAD) ||
2202 other->sk_shutdown & RCV_SHUTDOWN) {
2203 err = -EPIPE;
9490f886 2204 send_sigpipe = true;
869e7c62
HFS
2205 goto err_state_unlock;
2206 }
2207
9490f886
HFS
2208 if (init_scm) {
2209 err = maybe_init_creds(&scm, socket, other);
2210 if (err)
2211 goto err_state_unlock;
2212 init_scm = false;
2213 }
2214
869e7c62
HFS
2215 skb = skb_peek_tail(&other->sk_receive_queue);
2216 if (tail && tail == skb) {
2217 skb = newskb;
9490f886
HFS
2218 } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2219 if (newskb) {
869e7c62 2220 skb = newskb;
9490f886
HFS
2221 } else {
2222 tail = skb;
869e7c62 2223 goto alloc_skb;
9490f886 2224 }
869e7c62
HFS
2225 } else if (newskb) {
2226 /* this is fast path, we don't necessarily need to
2227 * call to kfree_skb even though with newskb == NULL
2228 * this - does no harm
2229 */
2230 consume_skb(newskb);
8844f972 2231 newskb = NULL;
869e7c62
HFS
2232 }
2233
2234 if (skb_append_pagefrags(skb, page, offset, size)) {
2235 tail = skb;
2236 goto alloc_skb;
2237 }
2238
2239 skb->len += size;
2240 skb->data_len += size;
2241 skb->truesize += size;
14afee4b 2242 refcount_add(size, &sk->sk_wmem_alloc);
869e7c62 2243
a3a116e0 2244 if (newskb) {
9490f886
HFS
2245 err = unix_scm_to_skb(&scm, skb, false);
2246 if (err)
2247 goto err_state_unlock;
a3a116e0 2248 spin_lock(&other->sk_receive_queue.lock);
869e7c62 2249 __skb_queue_tail(&other->sk_receive_queue, newskb);
a3a116e0
HFS
2250 spin_unlock(&other->sk_receive_queue.lock);
2251 }
869e7c62
HFS
2252
2253 unix_state_unlock(other);
6e1ce3c3 2254 mutex_unlock(&unix_sk(other)->iolock);
869e7c62
HFS
2255
2256 other->sk_data_ready(other);
9490f886 2257 scm_destroy(&scm);
869e7c62
HFS
2258 return size;
2259
2260err_state_unlock:
2261 unix_state_unlock(other);
2262err_unlock:
6e1ce3c3 2263 mutex_unlock(&unix_sk(other)->iolock);
869e7c62
HFS
2264err:
2265 kfree_skb(newskb);
2266 if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2267 send_sig(SIGPIPE, current, 0);
9490f886
HFS
2268 if (!init_scm)
2269 scm_destroy(&scm);
869e7c62
HFS
2270 return err;
2271}
2272
1b784140
YX
2273static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2274 size_t len)
1da177e4
LT
2275{
2276 int err;
2277 struct sock *sk = sock->sk;
ac7bfa62 2278
1da177e4
LT
2279 err = sock_error(sk);
2280 if (err)
2281 return err;
2282
2283 if (sk->sk_state != TCP_ESTABLISHED)
2284 return -ENOTCONN;
2285
2286 if (msg->msg_namelen)
2287 msg->msg_namelen = 0;
2288
1b784140 2289 return unix_dgram_sendmsg(sock, msg, len);
1da177e4 2290}
ac7bfa62 2291
1b784140
YX
2292static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2293 size_t size, int flags)
a05d2ad1
EB
2294{
2295 struct sock *sk = sock->sk;
2296
2297 if (sk->sk_state != TCP_ESTABLISHED)
2298 return -ENOTCONN;
2299
1b784140 2300 return unix_dgram_recvmsg(sock, msg, size, flags);
a05d2ad1
EB
2301}
2302
1da177e4
LT
2303static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2304{
ae3b5641 2305 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
1da177e4 2306
ae3b5641
AV
2307 if (addr) {
2308 msg->msg_namelen = addr->len;
2309 memcpy(msg->msg_name, addr->name, addr->len);
1da177e4
LT
2310 }
2311}
2312
9825d866
CW
2313int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
2314 int flags)
1da177e4 2315{
7cc05662 2316 struct scm_cookie scm;
9825d866 2317 struct socket *sock = sk->sk_socket;
1da177e4 2318 struct unix_sock *u = unix_sk(sk);
64874280
RW
2319 struct sk_buff *skb, *last;
2320 long timeo;
fd69c399 2321 int skip;
1da177e4
LT
2322 int err;
2323
2324 err = -EOPNOTSUPP;
2325 if (flags&MSG_OOB)
2326 goto out;
2327
64874280 2328 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1da177e4 2329
64874280 2330 do {
6e1ce3c3 2331 mutex_lock(&u->iolock);
f55bb7f9 2332
64874280 2333 skip = sk_peek_offset(sk, flags);
b50b0580 2334 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
e427cad6
PA
2335 &skip, &err, &last);
2336 if (skb) {
2337 if (!(flags & MSG_PEEK))
2338 scm_stat_del(sk, skb);
64874280 2339 break;
e427cad6 2340 }
64874280 2341
6e1ce3c3 2342 mutex_unlock(&u->iolock);
64874280
RW
2343
2344 if (err != -EAGAIN)
2345 break;
2346 } while (timeo &&
b50b0580
SD
2347 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2348 &err, &timeo, last));
64874280 2349
6e1ce3c3 2350 if (!skb) { /* implies iolock unlocked */
0a112258
FZ
2351 unix_state_lock(sk);
2352 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2353 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2354 (sk->sk_shutdown & RCV_SHUTDOWN))
2355 err = 0;
2356 unix_state_unlock(sk);
64874280 2357 goto out;
0a112258 2358 }
1da177e4 2359
77b75f4d
RW
2360 if (wq_has_sleeper(&u->peer_wait))
2361 wake_up_interruptible_sync_poll(&u->peer_wait,
a9a08845
LT
2362 EPOLLOUT | EPOLLWRNORM |
2363 EPOLLWRBAND);
1da177e4
LT
2364
2365 if (msg->msg_name)
2366 unix_copy_addr(msg, skb->sk);
2367
f55bb7f9
PE
2368 if (size > skb->len - skip)
2369 size = skb->len - skip;
2370 else if (size < skb->len - skip)
1da177e4
LT
2371 msg->msg_flags |= MSG_TRUNC;
2372
51f3d02b 2373 err = skb_copy_datagram_msg(skb, skip, msg, size);
1da177e4
LT
2374 if (err)
2375 goto out_free;
2376
3f66116e
AC
2377 if (sock_flag(sk, SOCK_RCVTSTAMP))
2378 __sock_recv_timestamp(msg, sk, skb);
2379
7cc05662
CH
2380 memset(&scm, 0, sizeof(scm));
2381
2382 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2383 unix_set_secdata(&scm, skb);
1da177e4 2384
6eba6a37 2385 if (!(flags & MSG_PEEK)) {
1da177e4 2386 if (UNIXCB(skb).fp)
7cc05662 2387 unix_detach_fds(&scm, skb);
f55bb7f9
PE
2388
2389 sk_peek_offset_bwd(sk, skb->len);
6eba6a37 2390 } else {
1da177e4
LT
2391 /* It is questionable: on PEEK we could:
2392 - do not return fds - good, but too simple 8)
2393 - return fds, and do not return them on read (old strategy,
2394 apparently wrong)
2395 - clone fds (I chose it for now, it is the most universal
2396 solution)
ac7bfa62
YH
2397
2398 POSIX 1003.1g does not actually define this clearly
2399 at all. POSIX 1003.1g doesn't define a lot of things
2400 clearly however!
2401
1da177e4 2402 */
f55bb7f9
PE
2403
2404 sk_peek_offset_fwd(sk, size);
2405
1da177e4 2406 if (UNIXCB(skb).fp)
cbcf0112 2407 unix_peek_fds(&scm, skb);
1da177e4 2408 }
9f6f9af7 2409 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
1da177e4 2410
7cc05662 2411 scm_recv(sock, msg, &scm, flags);
1da177e4
LT
2412
2413out_free:
6eba6a37 2414 skb_free_datagram(sk, skb);
6e1ce3c3 2415 mutex_unlock(&u->iolock);
1da177e4
LT
2416out:
2417 return err;
2418}
29df44fa 2419
9825d866
CW
2420static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
2421 int flags)
2422{
2423 struct sock *sk = sock->sk;
2424
2425#ifdef CONFIG_BPF_SYSCALL
94531cfc
JW
2426 const struct proto *prot = READ_ONCE(sk->sk_prot);
2427
2428 if (prot != &unix_dgram_proto)
2429 return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
9825d866
CW
2430 flags & ~MSG_DONTWAIT, NULL);
2431#endif
2432 return __unix_dgram_recvmsg(sk, msg, size, flags);
2433}
2434
29df44fa
CW
2435static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
2436 sk_read_actor_t recv_actor)
2437{
2438 int copied = 0;
2439
2440 while (1) {
2441 struct unix_sock *u = unix_sk(sk);
2442 struct sk_buff *skb;
2443 int used, err;
2444
2445 mutex_lock(&u->iolock);
2446 skb = skb_recv_datagram(sk, 0, 1, &err);
2447 mutex_unlock(&u->iolock);
2448 if (!skb)
2449 return err;
2450
2451 used = recv_actor(desc, skb, 0, skb->len);
2452 if (used <= 0) {
2453 if (!copied)
2454 copied = used;
2455 kfree_skb(skb);
2456 break;
2457 } else if (used <= skb->len) {
2458 copied += used;
2459 }
2460
2461 kfree_skb(skb);
2462 if (!desc->count)
2463 break;
2464 }
2465
2466 return copied;
2467}
1da177e4
LT
2468
2469/*
79f632c7 2470 * Sleep until more data has arrived. But check for races..
1da177e4 2471 */
79f632c7 2472static long unix_stream_data_wait(struct sock *sk, long timeo,
06a77b07
WC
2473 struct sk_buff *last, unsigned int last_len,
2474 bool freezable)
1da177e4 2475{
2b514574 2476 struct sk_buff *tail;
1da177e4
LT
2477 DEFINE_WAIT(wait);
2478
1c92b4e5 2479 unix_state_lock(sk);
1da177e4
LT
2480
2481 for (;;) {
aa395145 2482 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1da177e4 2483
2b514574
HFS
2484 tail = skb_peek_tail(&sk->sk_receive_queue);
2485 if (tail != last ||
2486 (tail && tail->len != last_len) ||
1da177e4
LT
2487 sk->sk_err ||
2488 (sk->sk_shutdown & RCV_SHUTDOWN) ||
2489 signal_pending(current) ||
2490 !timeo)
2491 break;
2492
9cd3e072 2493 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
1c92b4e5 2494 unix_state_unlock(sk);
06a77b07
WC
2495 if (freezable)
2496 timeo = freezable_schedule_timeout(timeo);
2497 else
2498 timeo = schedule_timeout(timeo);
1c92b4e5 2499 unix_state_lock(sk);
b48732e4
MS
2500
2501 if (sock_flag(sk, SOCK_DEAD))
2502 break;
2503
9cd3e072 2504 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
1da177e4
LT
2505 }
2506
aa395145 2507 finish_wait(sk_sleep(sk), &wait);
1c92b4e5 2508 unix_state_unlock(sk);
1da177e4
LT
2509 return timeo;
2510}
2511
e370a723
ED
2512static unsigned int unix_skb_len(const struct sk_buff *skb)
2513{
2514 return skb->len - UNIXCB(skb).consumed;
2515}
2516
2b514574
HFS
2517struct unix_stream_read_state {
2518 int (*recv_actor)(struct sk_buff *, int, int,
2519 struct unix_stream_read_state *);
2520 struct socket *socket;
2521 struct msghdr *msg;
2522 struct pipe_inode_info *pipe;
2523 size_t size;
2524 int flags;
2525 unsigned int splice_flags;
2526};
2527
314001f0
RS
2528#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2529static int unix_stream_recv_urg(struct unix_stream_read_state *state)
2530{
2531 struct socket *sock = state->socket;
2532 struct sock *sk = sock->sk;
2533 struct unix_sock *u = unix_sk(sk);
2534 int chunk = 1;
876c14ad 2535 struct sk_buff *oob_skb;
314001f0 2536
876c14ad
RS
2537 mutex_lock(&u->iolock);
2538 unix_state_lock(sk);
2539
2540 if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) {
2541 unix_state_unlock(sk);
2542 mutex_unlock(&u->iolock);
314001f0 2543 return -EINVAL;
876c14ad 2544 }
314001f0 2545
876c14ad 2546 oob_skb = u->oob_skb;
314001f0
RS
2547
2548 if (!(state->flags & MSG_PEEK)) {
314001f0
RS
2549 u->oob_skb = NULL;
2550 }
876c14ad
RS
2551
2552 unix_state_unlock(sk);
2553
2554 chunk = state->recv_actor(oob_skb, 0, chunk, state);
2555
2556 if (!(state->flags & MSG_PEEK)) {
2557 UNIXCB(oob_skb).consumed += 1;
2558 kfree_skb(oob_skb);
2559 }
2560
2561 mutex_unlock(&u->iolock);
2562
2563 if (chunk < 0)
2564 return -EFAULT;
2565
314001f0
RS
2566 state->msg->msg_flags |= MSG_OOB;
2567 return 1;
2568}
2569
2570static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
2571 int flags, int copied)
2572{
2573 struct unix_sock *u = unix_sk(sk);
2574
2575 if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) {
2576 skb_unlink(skb, &sk->sk_receive_queue);
2577 consume_skb(skb);
2578 skb = NULL;
2579 } else {
2580 if (skb == u->oob_skb) {
2581 if (copied) {
2582 skb = NULL;
2583 } else if (sock_flag(sk, SOCK_URGINLINE)) {
2584 if (!(flags & MSG_PEEK)) {
2585 u->oob_skb = NULL;
2586 consume_skb(skb);
2587 }
2588 } else if (!(flags & MSG_PEEK)) {
2589 skb_unlink(skb, &sk->sk_receive_queue);
2590 consume_skb(skb);
2591 skb = skb_peek(&sk->sk_receive_queue);
2592 }
2593 }
2594 }
2595 return skb;
2596}
2597#endif
2598
77462de1
JW
2599static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc,
2600 sk_read_actor_t recv_actor)
2601{
2602 if (unlikely(sk->sk_state != TCP_ESTABLISHED))
2603 return -ENOTCONN;
2604
2605 return unix_read_sock(sk, desc, recv_actor);
2606}
2607
06a77b07
WC
2608static int unix_stream_read_generic(struct unix_stream_read_state *state,
2609 bool freezable)
1da177e4 2610{
7cc05662 2611 struct scm_cookie scm;
2b514574 2612 struct socket *sock = state->socket;
1da177e4
LT
2613 struct sock *sk = sock->sk;
2614 struct unix_sock *u = unix_sk(sk);
1da177e4 2615 int copied = 0;
2b514574 2616 int flags = state->flags;
de144391 2617 int noblock = flags & MSG_DONTWAIT;
2b514574 2618 bool check_creds = false;
1da177e4
LT
2619 int target;
2620 int err = 0;
2621 long timeo;
fc0d7536 2622 int skip;
2b514574
HFS
2623 size_t size = state->size;
2624 unsigned int last_len;
1da177e4 2625
1b92ee3d
RW
2626 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2627 err = -EINVAL;
1da177e4 2628 goto out;
1b92ee3d 2629 }
1da177e4 2630
1b92ee3d
RW
2631 if (unlikely(flags & MSG_OOB)) {
2632 err = -EOPNOTSUPP;
314001f0 2633#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
314001f0 2634 err = unix_stream_recv_urg(state);
314001f0 2635#endif
1da177e4 2636 goto out;
1b92ee3d 2637 }
1da177e4 2638
2b514574 2639 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
de144391 2640 timeo = sock_rcvtimeo(sk, noblock);
1da177e4 2641
2b514574
HFS
2642 memset(&scm, 0, sizeof(scm));
2643
1da177e4
LT
2644 /* Lock the socket to prevent queue disordering
2645 * while sleeps in memcpy_tomsg
2646 */
6e1ce3c3 2647 mutex_lock(&u->iolock);
1da177e4 2648
a0917e0b 2649 skip = max(sk_peek_offset(sk, flags), 0);
e9193d60 2650
6eba6a37 2651 do {
1da177e4 2652 int chunk;
73ed5d25 2653 bool drop_skb;
79f632c7 2654 struct sk_buff *skb, *last;
1da177e4 2655
18eceb81 2656redo:
3c0d2f37 2657 unix_state_lock(sk);
b48732e4
MS
2658 if (sock_flag(sk, SOCK_DEAD)) {
2659 err = -ECONNRESET;
2660 goto unlock;
2661 }
79f632c7 2662 last = skb = skb_peek(&sk->sk_receive_queue);
2b514574 2663 last_len = last ? last->len : 0;
314001f0
RS
2664
2665#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2666 if (skb) {
2667 skb = manage_oob(skb, sk, flags, copied);
2668 if (!skb) {
2669 unix_state_unlock(sk);
2670 if (copied)
2671 break;
2672 goto redo;
2673 }
2674 }
2675#endif
fc0d7536 2676again:
6eba6a37 2677 if (skb == NULL) {
1da177e4 2678 if (copied >= target)
3c0d2f37 2679 goto unlock;
1da177e4
LT
2680
2681 /*
2682 * POSIX 1003.1g mandates this order.
2683 */
ac7bfa62 2684
6eba6a37
ED
2685 err = sock_error(sk);
2686 if (err)
3c0d2f37 2687 goto unlock;
1da177e4 2688 if (sk->sk_shutdown & RCV_SHUTDOWN)
3c0d2f37
MS
2689 goto unlock;
2690
2691 unix_state_unlock(sk);
1b92ee3d
RW
2692 if (!timeo) {
2693 err = -EAGAIN;
1da177e4 2694 break;
1b92ee3d
RW
2695 }
2696
6e1ce3c3 2697 mutex_unlock(&u->iolock);
1da177e4 2698
2b514574 2699 timeo = unix_stream_data_wait(sk, timeo, last,
06a77b07 2700 last_len, freezable);
1da177e4 2701
3822b5c2 2702 if (signal_pending(current)) {
1da177e4 2703 err = sock_intr_errno(timeo);
fa0dc04d 2704 scm_destroy(&scm);
1da177e4
LT
2705 goto out;
2706 }
b3ca9b02 2707
6e1ce3c3 2708 mutex_lock(&u->iolock);
18eceb81 2709 goto redo;
2b514574 2710unlock:
3c0d2f37
MS
2711 unix_state_unlock(sk);
2712 break;
1da177e4 2713 }
fc0d7536 2714
e370a723
ED
2715 while (skip >= unix_skb_len(skb)) {
2716 skip -= unix_skb_len(skb);
79f632c7 2717 last = skb;
2b514574 2718 last_len = skb->len;
fc0d7536 2719 skb = skb_peek_next(skb, &sk->sk_receive_queue);
79f632c7
BP
2720 if (!skb)
2721 goto again;
fc0d7536
PE
2722 }
2723
3c0d2f37 2724 unix_state_unlock(sk);
1da177e4
LT
2725
2726 if (check_creds) {
2727 /* Never glue messages from different writers */
9490f886 2728 if (!unix_skb_scm_eq(skb, &scm))
1da177e4 2729 break;
0e82e7f6 2730 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
1da177e4 2731 /* Copy credentials */
7cc05662 2732 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
37a9a8df 2733 unix_set_secdata(&scm, skb);
2b514574 2734 check_creds = true;
1da177e4
LT
2735 }
2736
2737 /* Copy address just once */
2b514574
HFS
2738 if (state->msg && state->msg->msg_name) {
2739 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2740 state->msg->msg_name);
2741 unix_copy_addr(state->msg, skb->sk);
1da177e4
LT
2742 sunaddr = NULL;
2743 }
2744
e370a723 2745 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
73ed5d25 2746 skb_get(skb);
2b514574 2747 chunk = state->recv_actor(skb, skip, chunk, state);
73ed5d25
HFS
2748 drop_skb = !unix_skb_len(skb);
2749 /* skb is only safe to use if !drop_skb */
2750 consume_skb(skb);
2b514574 2751 if (chunk < 0) {
1da177e4
LT
2752 if (copied == 0)
2753 copied = -EFAULT;
2754 break;
2755 }
2756 copied += chunk;
2757 size -= chunk;
2758
73ed5d25
HFS
2759 if (drop_skb) {
2760 /* the skb was touched by a concurrent reader;
2761 * we should not expect anything from this skb
2762 * anymore and assume it invalid - we can be
2763 * sure it was dropped from the socket queue
2764 *
2765 * let's report a short read
2766 */
2767 err = 0;
2768 break;
2769 }
2770
1da177e4 2771 /* Mark read part of skb as used */
6eba6a37 2772 if (!(flags & MSG_PEEK)) {
e370a723 2773 UNIXCB(skb).consumed += chunk;
1da177e4 2774
fc0d7536
PE
2775 sk_peek_offset_bwd(sk, chunk);
2776
3c32da19 2777 if (UNIXCB(skb).fp) {
3c32da19 2778 scm_stat_del(sk, skb);
7cc05662 2779 unix_detach_fds(&scm, skb);
3c32da19 2780 }
1da177e4 2781
e370a723 2782 if (unix_skb_len(skb))
1da177e4 2783 break;
1da177e4 2784
6f01fd6e 2785 skb_unlink(skb, &sk->sk_receive_queue);
70d4bf6d 2786 consume_skb(skb);
1da177e4 2787
7cc05662 2788 if (scm.fp)
1da177e4 2789 break;
6eba6a37 2790 } else {
1da177e4
LT
2791 /* It is questionable, see note in unix_dgram_recvmsg.
2792 */
2793 if (UNIXCB(skb).fp)
cbcf0112 2794 unix_peek_fds(&scm, skb);
1da177e4 2795
e9193d60 2796 sk_peek_offset_fwd(sk, chunk);
fc0d7536 2797
9f389e35
AC
2798 if (UNIXCB(skb).fp)
2799 break;
2800
e9193d60 2801 skip = 0;
9f389e35
AC
2802 last = skb;
2803 last_len = skb->len;
2804 unix_state_lock(sk);
2805 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2806 if (skb)
2807 goto again;
2808 unix_state_unlock(sk);
1da177e4
LT
2809 break;
2810 }
2811 } while (size);
2812
6e1ce3c3 2813 mutex_unlock(&u->iolock);
2b514574
HFS
2814 if (state->msg)
2815 scm_recv(sock, state->msg, &scm, flags);
2816 else
2817 scm_destroy(&scm);
1da177e4
LT
2818out:
2819 return copied ? : err;
2820}
2821
2b514574
HFS
2822static int unix_stream_read_actor(struct sk_buff *skb,
2823 int skip, int chunk,
2824 struct unix_stream_read_state *state)
2825{
2826 int ret;
2827
2828 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2829 state->msg, chunk);
2830 return ret ?: chunk;
2831}
2832
94531cfc
JW
2833int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg,
2834 size_t size, int flags)
2835{
2836 struct unix_stream_read_state state = {
2837 .recv_actor = unix_stream_read_actor,
2838 .socket = sk->sk_socket,
2839 .msg = msg,
2840 .size = size,
2841 .flags = flags
2842 };
2843
2844 return unix_stream_read_generic(&state, true);
2845}
2846
2b514574
HFS
2847static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2848 size_t size, int flags)
2849{
2850 struct unix_stream_read_state state = {
2851 .recv_actor = unix_stream_read_actor,
2852 .socket = sock,
2853 .msg = msg,
2854 .size = size,
2855 .flags = flags
2856 };
2857
94531cfc
JW
2858#ifdef CONFIG_BPF_SYSCALL
2859 struct sock *sk = sock->sk;
2860 const struct proto *prot = READ_ONCE(sk->sk_prot);
2861
2862 if (prot != &unix_stream_proto)
2863 return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
2864 flags & ~MSG_DONTWAIT, NULL);
2865#endif
06a77b07 2866 return unix_stream_read_generic(&state, true);
2b514574
HFS
2867}
2868
2b514574
HFS
2869static int unix_stream_splice_actor(struct sk_buff *skb,
2870 int skip, int chunk,
2871 struct unix_stream_read_state *state)
2872{
2873 return skb_splice_bits(skb, state->socket->sk,
2874 UNIXCB(skb).consumed + skip,
25869262 2875 state->pipe, chunk, state->splice_flags);
2b514574
HFS
2876}
2877
2878static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
2879 struct pipe_inode_info *pipe,
2880 size_t size, unsigned int flags)
2881{
2882 struct unix_stream_read_state state = {
2883 .recv_actor = unix_stream_splice_actor,
2884 .socket = sock,
2885 .pipe = pipe,
2886 .size = size,
2887 .splice_flags = flags,
2888 };
2889
2890 if (unlikely(*ppos))
2891 return -ESPIPE;
2892
2893 if (sock->file->f_flags & O_NONBLOCK ||
2894 flags & SPLICE_F_NONBLOCK)
2895 state.flags = MSG_DONTWAIT;
2896
06a77b07 2897 return unix_stream_read_generic(&state, false);
2b514574
HFS
2898}
2899
1da177e4
LT
2900static int unix_shutdown(struct socket *sock, int mode)
2901{
2902 struct sock *sk = sock->sk;
2903 struct sock *other;
2904
fc61b928
XW
2905 if (mode < SHUT_RD || mode > SHUT_RDWR)
2906 return -EINVAL;
2907 /* This maps:
2908 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2909 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
2910 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2911 */
2912 ++mode;
7180a031
AC
2913
2914 unix_state_lock(sk);
2915 sk->sk_shutdown |= mode;
2916 other = unix_peer(sk);
2917 if (other)
2918 sock_hold(other);
2919 unix_state_unlock(sk);
2920 sk->sk_state_change(sk);
2921
2922 if (other &&
2923 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2924
2925 int peer_mode = 0;
94531cfc 2926 const struct proto *prot = READ_ONCE(other->sk_prot);
7180a031 2927
d359902d
JW
2928 if (prot->unhash)
2929 prot->unhash(other);
7180a031
AC
2930 if (mode&RCV_SHUTDOWN)
2931 peer_mode |= SEND_SHUTDOWN;
2932 if (mode&SEND_SHUTDOWN)
2933 peer_mode |= RCV_SHUTDOWN;
2934 unix_state_lock(other);
2935 other->sk_shutdown |= peer_mode;
2936 unix_state_unlock(other);
2937 other->sk_state_change(other);
d0c6416b 2938 if (peer_mode == SHUTDOWN_MASK)
7180a031 2939 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
d0c6416b 2940 else if (peer_mode & RCV_SHUTDOWN)
7180a031 2941 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1da177e4 2942 }
7180a031
AC
2943 if (other)
2944 sock_put(other);
2945
1da177e4
LT
2946 return 0;
2947}
2948
885ee74d
PE
2949long unix_inq_len(struct sock *sk)
2950{
2951 struct sk_buff *skb;
2952 long amount = 0;
2953
2954 if (sk->sk_state == TCP_LISTEN)
2955 return -EINVAL;
2956
2957 spin_lock(&sk->sk_receive_queue.lock);
2958 if (sk->sk_type == SOCK_STREAM ||
2959 sk->sk_type == SOCK_SEQPACKET) {
2960 skb_queue_walk(&sk->sk_receive_queue, skb)
e370a723 2961 amount += unix_skb_len(skb);
885ee74d
PE
2962 } else {
2963 skb = skb_peek(&sk->sk_receive_queue);
2964 if (skb)
2965 amount = skb->len;
2966 }
2967 spin_unlock(&sk->sk_receive_queue.lock);
2968
2969 return amount;
2970}
2971EXPORT_SYMBOL_GPL(unix_inq_len);
2972
2973long unix_outq_len(struct sock *sk)
2974{
2975 return sk_wmem_alloc_get(sk);
2976}
2977EXPORT_SYMBOL_GPL(unix_outq_len);
2978
ba94f308
AV
2979static int unix_open_file(struct sock *sk)
2980{
2981 struct path path;
2982 struct file *f;
2983 int fd;
2984
2985 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2986 return -EPERM;
2987
ae3b5641
AV
2988 if (!smp_load_acquire(&unix_sk(sk)->addr))
2989 return -ENOENT;
2990
ba94f308 2991 path = unix_sk(sk)->path;
ae3b5641 2992 if (!path.dentry)
ba94f308 2993 return -ENOENT;
ba94f308
AV
2994
2995 path_get(&path);
ba94f308
AV
2996
2997 fd = get_unused_fd_flags(O_CLOEXEC);
2998 if (fd < 0)
2999 goto out;
3000
3001 f = dentry_open(&path, O_PATH, current_cred());
3002 if (IS_ERR(f)) {
3003 put_unused_fd(fd);
3004 fd = PTR_ERR(f);
3005 goto out;
3006 }
3007
3008 fd_install(fd, f);
3009out:
3010 path_put(&path);
3011
3012 return fd;
3013}
3014
1da177e4
LT
3015static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3016{
3017 struct sock *sk = sock->sk;
e27dfcea 3018 long amount = 0;
1da177e4
LT
3019 int err;
3020
6eba6a37
ED
3021 switch (cmd) {
3022 case SIOCOUTQ:
885ee74d 3023 amount = unix_outq_len(sk);
6eba6a37
ED
3024 err = put_user(amount, (int __user *)arg);
3025 break;
3026 case SIOCINQ:
885ee74d
PE
3027 amount = unix_inq_len(sk);
3028 if (amount < 0)
3029 err = amount;
3030 else
1da177e4 3031 err = put_user(amount, (int __user *)arg);
885ee74d 3032 break;
ba94f308
AV
3033 case SIOCUNIXFILE:
3034 err = unix_open_file(sk);
3035 break;
314001f0
RS
3036#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3037 case SIOCATMARK:
3038 {
3039 struct sk_buff *skb;
3040 struct unix_sock *u = unix_sk(sk);
3041 int answ = 0;
3042
3043 skb = skb_peek(&sk->sk_receive_queue);
3044 if (skb && skb == u->oob_skb)
3045 answ = 1;
3046 err = put_user(answ, (int __user *)arg);
3047 }
3048 break;
3049#endif
6eba6a37
ED
3050 default:
3051 err = -ENOIOCTLCMD;
3052 break;
1da177e4
LT
3053 }
3054 return err;
3055}
3056
5f6beb9e
AB
3057#ifdef CONFIG_COMPAT
3058static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3059{
3060 return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
3061}
3062#endif
3063
a11e1d43 3064static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
1da177e4
LT
3065{
3066 struct sock *sk = sock->sk;
a11e1d43
LT
3067 __poll_t mask;
3068
89ab066d 3069 sock_poll_wait(file, sock, wait);
a11e1d43 3070 mask = 0;
1da177e4
LT
3071
3072 /* exceptional events? */
3073 if (sk->sk_err)
a9a08845 3074 mask |= EPOLLERR;
1da177e4 3075 if (sk->sk_shutdown == SHUTDOWN_MASK)
a9a08845 3076 mask |= EPOLLHUP;
f348d70a 3077 if (sk->sk_shutdown & RCV_SHUTDOWN)
a9a08845 3078 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
1da177e4
LT
3079
3080 /* readable? */
3ef7cf57 3081 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
a9a08845 3082 mask |= EPOLLIN | EPOLLRDNORM;
af493388
CW
3083 if (sk_is_readable(sk))
3084 mask |= EPOLLIN | EPOLLRDNORM;
1da177e4
LT
3085
3086 /* Connection-based need to check for termination and startup */
6eba6a37
ED
3087 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
3088 sk->sk_state == TCP_CLOSE)
a9a08845 3089 mask |= EPOLLHUP;
1da177e4
LT
3090
3091 /*
3092 * we set writable also when the other side has shut down the
3093 * connection. This prevents stuck sockets.
3094 */
3095 if (unix_writable(sk))
a9a08845 3096 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
1da177e4
LT
3097
3098 return mask;
3099}
3100
a11e1d43
LT
3101static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
3102 poll_table *wait)
3c73419c 3103{
ec0d215f 3104 struct sock *sk = sock->sk, *other;
a11e1d43
LT
3105 unsigned int writable;
3106 __poll_t mask;
3107
89ab066d 3108 sock_poll_wait(file, sock, wait);
a11e1d43 3109 mask = 0;
3c73419c
RW
3110
3111 /* exceptional events? */
3ef7cf57 3112 if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
a9a08845
LT
3113 mask |= EPOLLERR |
3114 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
7d4c04fc 3115
3c73419c 3116 if (sk->sk_shutdown & RCV_SHUTDOWN)
a9a08845 3117 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3c73419c 3118 if (sk->sk_shutdown == SHUTDOWN_MASK)
a9a08845 3119 mask |= EPOLLHUP;
3c73419c
RW
3120
3121 /* readable? */
3ef7cf57 3122 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
a9a08845 3123 mask |= EPOLLIN | EPOLLRDNORM;
af493388
CW
3124 if (sk_is_readable(sk))
3125 mask |= EPOLLIN | EPOLLRDNORM;
3c73419c
RW
3126
3127 /* Connection-based need to check for termination and startup */
3128 if (sk->sk_type == SOCK_SEQPACKET) {
3129 if (sk->sk_state == TCP_CLOSE)
a9a08845 3130 mask |= EPOLLHUP;
3c73419c
RW
3131 /* connection hasn't started yet? */
3132 if (sk->sk_state == TCP_SYN_SENT)
3133 return mask;
3134 }
3135
973a34aa 3136 /* No write status requested, avoid expensive OUT tests. */
a11e1d43 3137 if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
973a34aa
ED
3138 return mask;
3139
ec0d215f 3140 writable = unix_writable(sk);
7d267278
RW
3141 if (writable) {
3142 unix_state_lock(sk);
3143
3144 other = unix_peer(sk);
3145 if (other && unix_peer(other) != sk &&
04f08eb4 3146 unix_recvq_full_lockless(other) &&
7d267278
RW
3147 unix_dgram_peer_wake_me(sk, other))
3148 writable = 0;
3149
3150 unix_state_unlock(sk);
ec0d215f
RW
3151 }
3152
3153 if (writable)
a9a08845 3154 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3c73419c 3155 else
9cd3e072 3156 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
3c73419c 3157
3c73419c
RW
3158 return mask;
3159}
1da177e4
LT
3160
3161#ifdef CONFIG_PROC_FS
a53eb3fe 3162
7123aaa3
ED
3163#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
3164
3165#define get_bucket(x) ((x) >> BUCKET_SPACE)
3166#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
3167#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
a53eb3fe 3168
7123aaa3 3169static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
1da177e4 3170{
7123aaa3
ED
3171 unsigned long offset = get_offset(*pos);
3172 unsigned long bucket = get_bucket(*pos);
3173 struct sock *sk;
3174 unsigned long count = 0;
1da177e4 3175
7123aaa3
ED
3176 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
3177 if (sock_net(sk) != seq_file_net(seq))
097e66c5 3178 continue;
7123aaa3
ED
3179 if (++count == offset)
3180 break;
3181 }
3182
3183 return sk;
3184}
3185
3186static struct sock *unix_next_socket(struct seq_file *seq,
3187 struct sock *sk,
3188 loff_t *pos)
3189{
3190 unsigned long bucket;
3191
3192 while (sk > (struct sock *)SEQ_START_TOKEN) {
3193 sk = sk_next(sk);
3194 if (!sk)
3195 goto next_bucket;
3196 if (sock_net(sk) == seq_file_net(seq))
3197 return sk;
1da177e4 3198 }
7123aaa3
ED
3199
3200 do {
3201 sk = unix_from_bucket(seq, pos);
3202 if (sk)
3203 return sk;
3204
3205next_bucket:
3206 bucket = get_bucket(*pos) + 1;
3207 *pos = set_bucket_offset(bucket, 1);
3208 } while (bucket < ARRAY_SIZE(unix_socket_table));
3209
1da177e4
LT
3210 return NULL;
3211}
3212
1da177e4 3213static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
9a429c49 3214 __acquires(unix_table_lock)
1da177e4 3215{
fbe9cc4a 3216 spin_lock(&unix_table_lock);
7123aaa3
ED
3217
3218 if (!*pos)
3219 return SEQ_START_TOKEN;
3220
3221 if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
3222 return NULL;
3223
3224 return unix_next_socket(seq, NULL, pos);
1da177e4
LT
3225}
3226
3227static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3228{
3229 ++*pos;
7123aaa3 3230 return unix_next_socket(seq, v, pos);
1da177e4
LT
3231}
3232
3233static void unix_seq_stop(struct seq_file *seq, void *v)
9a429c49 3234 __releases(unix_table_lock)
1da177e4 3235{
fbe9cc4a 3236 spin_unlock(&unix_table_lock);
1da177e4
LT
3237}
3238
3239static int unix_seq_show(struct seq_file *seq, void *v)
3240{
ac7bfa62 3241
b9f3124f 3242 if (v == SEQ_START_TOKEN)
1da177e4
LT
3243 seq_puts(seq, "Num RefCount Protocol Flags Type St "
3244 "Inode Path\n");
3245 else {
3246 struct sock *s = v;
3247 struct unix_sock *u = unix_sk(s);
1c92b4e5 3248 unix_state_lock(s);
1da177e4 3249
71338aa7 3250 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
1da177e4 3251 s,
41c6d650 3252 refcount_read(&s->sk_refcnt),
1da177e4
LT
3253 0,
3254 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
3255 s->sk_type,
3256 s->sk_socket ?
3257 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
3258 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
3259 sock_i_ino(s));
3260
ae3b5641 3261 if (u->addr) { // under unix_table_lock here
1da177e4
LT
3262 int i, len;
3263 seq_putc(seq, ' ');
3264
3265 i = 0;
755662ce
KI
3266 len = u->addr->len -
3267 offsetof(struct sockaddr_un, sun_path);
1da177e4
LT
3268 if (!UNIX_ABSTRACT(s))
3269 len--;
3270 else {
3271 seq_putc(seq, '@');
3272 i++;
3273 }
3274 for ( ; i < len; i++)
e7947ea7
IB
3275 seq_putc(seq, u->addr->name->sun_path[i] ?:
3276 '@');
1da177e4 3277 }
1c92b4e5 3278 unix_state_unlock(s);
1da177e4
LT
3279 seq_putc(seq, '\n');
3280 }
3281
3282 return 0;
3283}
3284
56b3d975 3285static const struct seq_operations unix_seq_ops = {
1da177e4
LT
3286 .start = unix_seq_start,
3287 .next = unix_seq_next,
3288 .stop = unix_seq_stop,
3289 .show = unix_seq_show,
3290};
2c860a43
KI
3291
3292#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL)
3293struct bpf_iter__unix {
3294 __bpf_md_ptr(struct bpf_iter_meta *, meta);
3295 __bpf_md_ptr(struct unix_sock *, unix_sk);
3296 uid_t uid __aligned(8);
3297};
3298
3299static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
3300 struct unix_sock *unix_sk, uid_t uid)
3301{
3302 struct bpf_iter__unix ctx;
3303
3304 meta->seq_num--; /* skip SEQ_START_TOKEN */
3305 ctx.meta = meta;
3306 ctx.unix_sk = unix_sk;
3307 ctx.uid = uid;
3308 return bpf_iter_run_prog(prog, &ctx);
3309}
3310
3311static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
3312{
3313 struct bpf_iter_meta meta;
3314 struct bpf_prog *prog;
3315 struct sock *sk = v;
3316 uid_t uid;
3317
3318 if (v == SEQ_START_TOKEN)
3319 return 0;
3320
3321 uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
3322 meta.seq = seq;
3323 prog = bpf_iter_get_info(&meta, false);
3324 return unix_prog_seq_show(prog, &meta, v, uid);
3325}
3326
3327static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
3328{
3329 struct bpf_iter_meta meta;
3330 struct bpf_prog *prog;
3331
3332 if (!v) {
3333 meta.seq = seq;
3334 prog = bpf_iter_get_info(&meta, true);
3335 if (prog)
3336 (void)unix_prog_seq_show(prog, &meta, v, 0);
3337 }
3338
3339 unix_seq_stop(seq, v);
3340}
3341
3342static const struct seq_operations bpf_iter_unix_seq_ops = {
3343 .start = unix_seq_start,
3344 .next = unix_seq_next,
3345 .stop = bpf_iter_unix_seq_stop,
3346 .show = bpf_iter_unix_seq_show,
3347};
3348#endif
1da177e4
LT
3349#endif
3350
ec1b4cf7 3351static const struct net_proto_family unix_family_ops = {
1da177e4
LT
3352 .family = PF_UNIX,
3353 .create = unix_create,
3354 .owner = THIS_MODULE,
3355};
3356
097e66c5 3357
2c8c1e72 3358static int __net_init unix_net_init(struct net *net)
097e66c5
DL
3359{
3360 int error = -ENOMEM;
3361
a0a53c8b 3362 net->unx.sysctl_max_dgram_qlen = 10;
1597fbc0
PE
3363 if (unix_sysctl_register(net))
3364 goto out;
d392e497 3365
097e66c5 3366#ifdef CONFIG_PROC_FS
c3506372
CH
3367 if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
3368 sizeof(struct seq_net_private))) {
1597fbc0 3369 unix_sysctl_unregister(net);
097e66c5 3370 goto out;
1597fbc0 3371 }
097e66c5
DL
3372#endif
3373 error = 0;
3374out:
48dcc33e 3375 return error;
097e66c5
DL
3376}
3377
2c8c1e72 3378static void __net_exit unix_net_exit(struct net *net)
097e66c5 3379{
1597fbc0 3380 unix_sysctl_unregister(net);
ece31ffd 3381 remove_proc_entry("unix", net->proc_net);
097e66c5
DL
3382}
3383
3384static struct pernet_operations unix_net_ops = {
3385 .init = unix_net_init,
3386 .exit = unix_net_exit,
3387};
3388
2c860a43
KI
3389#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3390DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
3391 struct unix_sock *unix_sk, uid_t uid)
3392
3393static const struct bpf_iter_seq_info unix_seq_info = {
3394 .seq_ops = &bpf_iter_unix_seq_ops,
3395 .init_seq_private = bpf_iter_init_seq_net,
3396 .fini_seq_private = bpf_iter_fini_seq_net,
3397 .seq_priv_size = sizeof(struct seq_net_private),
3398};
3399
3400static struct bpf_iter_reg unix_reg_info = {
3401 .target = "unix",
3402 .ctx_arg_info_size = 1,
3403 .ctx_arg_info = {
3404 { offsetof(struct bpf_iter__unix, unix_sk),
3405 PTR_TO_BTF_ID_OR_NULL },
3406 },
3407 .seq_info = &unix_seq_info,
3408};
3409
3410static void __init bpf_iter_register(void)
3411{
3412 unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX];
3413 if (bpf_iter_reg_target(&unix_reg_info))
3414 pr_warn("Warning: could not register bpf iterator unix\n");
3415}
3416#endif
3417
1da177e4
LT
3418static int __init af_unix_init(void)
3419{
3420 int rc = -1;
1da177e4 3421
c593642c 3422 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
1da177e4 3423
94531cfc
JW
3424 rc = proto_register(&unix_dgram_proto, 1);
3425 if (rc != 0) {
3426 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3427 goto out;
3428 }
3429
3430 rc = proto_register(&unix_stream_proto, 1);
ac7bfa62 3431 if (rc != 0) {
5cc208be 3432 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
1da177e4
LT
3433 goto out;
3434 }
3435
3436 sock_register(&unix_family_ops);
097e66c5 3437 register_pernet_subsys(&unix_net_ops);
c6382918 3438 unix_bpf_build_proto();
2c860a43
KI
3439
3440#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3441 bpf_iter_register();
3442#endif
3443
1da177e4
LT
3444out:
3445 return rc;
3446}
3447
3448static void __exit af_unix_exit(void)
3449{
3450 sock_unregister(PF_UNIX);
94531cfc
JW
3451 proto_unregister(&unix_dgram_proto);
3452 proto_unregister(&unix_stream_proto);
097e66c5 3453 unregister_pernet_subsys(&unix_net_ops);
1da177e4
LT
3454}
3455
3d366960
DW
3456/* Earlier than device_initcall() so that other drivers invoking
3457 request_module() don't end up in a loop when modprobe tries
3458 to use a UNIX socket. But later than subsys_initcall() because
3459 we depend on stuff initialised there */
3460fs_initcall(af_unix_init);
1da177e4
LT
3461module_exit(af_unix_exit);
3462
3463MODULE_LICENSE("GPL");
3464MODULE_ALIAS_NETPROTO(PF_UNIX);