1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * NET4: Implementation of BSD Unix domain sockets.
5 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
8 * Linus Torvalds : Assorted bug cures.
9 * Niibe Yutaka : async I/O support.
10 * Carsten Paeth : PF_UNIX check, address fixes.
11 * Alan Cox : Limit size of allocated blocks.
12 * Alan Cox : Fixed the stupid socketpair bug.
13 * Alan Cox : BSD compatibility fine tuning.
14 * Alan Cox : Fixed a bug in connect when interrupted.
15 * Alan Cox : Sorted out a proper draft version of
16 * file descriptor passing hacked up from
18 * Marty Leisner : Fixes to fd passing
19 * Nick Nevin : recvmsg bugfix.
20 * Alan Cox : Started proper garbage collector
21 * Heiko EiBfeldt : Missing verify_area check
22 * Alan Cox : Started POSIXisms
23 * Andreas Schwab : Replace inode by dentry for proper
25 * Kirk Petersen : Made this a module
26 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
28 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
29 * by above two patches.
30 * Andrea Arcangeli : If possible we block in connect(2)
31 * if the max backlog of the listen socket
32 * is been reached. This won't break
33 * old apps and it will avoid huge amount
34 * of socks hashed (this for unix_gc()
35 * performances reasons).
36 * Security fix that limits the max
37 * number of socks to 2*max_files and
38 * the number of skb queueable in the
40 * Artur Skawina : Hash function optimizations
41 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
42 * Malcolm Beattie : Set peercred for socketpair
43 * Michal Ostrowski : Module initialization cleanup.
44 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
45 * the core infrastructure is doing that
46 * for all net proto families now (2.5.69+)
48 * Known differences from reference BSD that was tested:
51 * ECONNREFUSED is not returned from one end of a connected() socket to the
52 * other the moment one end closes.
53 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
54 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
56 * accept() returns a path name even if the connecting socket has closed
57 * in the meantime (BSD loses the path and gives up).
58 * accept() returns 0 length path for an unbound connector. BSD returns 16
59 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
61 * BSD af_unix apparently has connect forgetting to block properly.
62 * (need to check this with the POSIX spec in detail)
64 * Differences from 2.0.0-11-... (ANK)
65 * Bug fixes and improvements.
66 * - client shutdown killed server socket.
67 * - removed all useless cli/sti pairs.
69 * Semantic changes/extensions.
70 * - generic control message passing.
71 * - SCM_CREDENTIALS control message.
72 * - "Abstract" (not FS based) socket bindings.
73 * Abstract names are sequences of bytes (not zero terminated)
74 * started by 0, so that this name space does not intersect
78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
80 #include <linux/module.h>
81 #include <linux/kernel.h>
82 #include <linux/signal.h>
83 #include <linux/sched/signal.h>
84 #include <linux/errno.h>
85 #include <linux/string.h>
86 #include <linux/stat.h>
87 #include <linux/dcache.h>
88 #include <linux/namei.h>
89 #include <linux/socket.h>
91 #include <linux/fcntl.h>
92 #include <linux/filter.h>
93 #include <linux/termios.h>
94 #include <linux/sockios.h>
95 #include <linux/net.h>
98 #include <linux/slab.h>
99 #include <linux/uaccess.h>
100 #include <linux/skbuff.h>
101 #include <linux/netdevice.h>
102 #include <net/net_namespace.h>
103 #include <net/sock.h>
104 #include <net/tcp_states.h>
105 #include <net/af_unix.h>
106 #include <linux/proc_fs.h>
107 #include <linux/seq_file.h>
109 #include <linux/init.h>
110 #include <linux/poll.h>
111 #include <linux/rtnetlink.h>
112 #include <linux/mount.h>
113 #include <net/checksum.h>
114 #include <linux/security.h>
115 #include <linux/splice.h>
116 #include <linux/freezer.h>
117 #include <linux/file.h>
118 #include <linux/btf_ids.h>
119 #include <linux/bpf-cgroup.h>
121 static atomic_long_t unix_nr_socks;
122 static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2];
123 static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
125 /* SMP locking strategy:
126 * hash table is protected with spinlock.
127 * each socket state is protected by separate spinlock.
130 static unsigned int unix_unbound_hash(struct sock *sk)
132 unsigned long hash = (unsigned long)sk;
138 return hash & UNIX_HASH_MOD;
141 static unsigned int unix_bsd_hash(struct inode *i)
143 return i->i_ino & UNIX_HASH_MOD;
146 static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr,
147 int addr_len, int type)
149 __wsum csum = csum_partial(sunaddr, addr_len, 0);
152 hash = (__force unsigned int)csum_fold(csum);
156 return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD);
159 static void unix_table_double_lock(struct net *net,
160 unsigned int hash1, unsigned int hash2)
162 if (hash1 == hash2) {
163 spin_lock(&net->unx.table.locks[hash1]);
170 spin_lock(&net->unx.table.locks[hash1]);
171 spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING);
174 static void unix_table_double_unlock(struct net *net,
175 unsigned int hash1, unsigned int hash2)
177 if (hash1 == hash2) {
178 spin_unlock(&net->unx.table.locks[hash1]);
182 spin_unlock(&net->unx.table.locks[hash1]);
183 spin_unlock(&net->unx.table.locks[hash2]);
186 #ifdef CONFIG_SECURITY_NETWORK
187 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
189 UNIXCB(skb).secid = scm->secid;
192 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
194 scm->secid = UNIXCB(skb).secid;
197 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
199 return (scm->secid == UNIXCB(skb).secid);
202 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
205 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
208 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
212 #endif /* CONFIG_SECURITY_NETWORK */
214 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
216 return unix_peer(osk) == sk;
219 static inline int unix_may_send(struct sock *sk, struct sock *osk)
221 return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
224 static inline int unix_recvq_full(const struct sock *sk)
226 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
229 static inline int unix_recvq_full_lockless(const struct sock *sk)
231 return skb_queue_len_lockless(&sk->sk_receive_queue) >
232 READ_ONCE(sk->sk_max_ack_backlog);
235 struct sock *unix_peer_get(struct sock *s)
243 unix_state_unlock(s);
246 EXPORT_SYMBOL_GPL(unix_peer_get);
248 static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr,
251 struct unix_address *addr;
253 addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL);
257 refcount_set(&addr->refcnt, 1);
258 addr->len = addr_len;
259 memcpy(addr->name, sunaddr, addr_len);
264 static inline void unix_release_addr(struct unix_address *addr)
266 if (refcount_dec_and_test(&addr->refcnt))
271 * Check unix socket name:
272 * - should be not zero length.
273 * - if started by not zero, should be NULL terminated (FS object)
274 * - if started by zero, it is abstract name.
277 static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len)
279 if (addr_len <= offsetof(struct sockaddr_un, sun_path) ||
280 addr_len > sizeof(*sunaddr))
283 if (sunaddr->sun_family != AF_UNIX)
289 static int unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len)
291 struct sockaddr_storage *addr = (struct sockaddr_storage *)sunaddr;
292 short offset = offsetof(struct sockaddr_storage, __data);
294 BUILD_BUG_ON(offset != offsetof(struct sockaddr_un, sun_path));
296 /* This may look like an off by one error but it is a bit more
297 * subtle. 108 is the longest valid AF_UNIX path for a binding.
298 * sun_path[108] doesn't as such exist. However in kernel space
299 * we are guaranteed that it is a valid memory location in our
300 * kernel address buffer because syscall functions always pass
301 * a pointer of struct sockaddr_storage which has a bigger buffer
302 * than 108. Also, we must terminate sun_path for strlen() in
305 addr->__data[addr_len - offset] = 0;
307 /* Don't pass sunaddr->sun_path to strlen(). Otherwise, 108 will
308 * cause panic if CONFIG_FORTIFY_SOURCE=y. Let __fortify_strlen()
309 * know the actual buffer.
311 return strlen(addr->__data) + offset + 1;
314 static void __unix_remove_socket(struct sock *sk)
316 sk_del_node_init(sk);
319 static void __unix_insert_socket(struct net *net, struct sock *sk)
321 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
322 sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]);
325 static void __unix_set_addr_hash(struct net *net, struct sock *sk,
326 struct unix_address *addr, unsigned int hash)
328 __unix_remove_socket(sk);
329 smp_store_release(&unix_sk(sk)->addr, addr);
332 __unix_insert_socket(net, sk);
335 static void unix_remove_socket(struct net *net, struct sock *sk)
337 spin_lock(&net->unx.table.locks[sk->sk_hash]);
338 __unix_remove_socket(sk);
339 spin_unlock(&net->unx.table.locks[sk->sk_hash]);
342 static void unix_insert_unbound_socket(struct net *net, struct sock *sk)
344 spin_lock(&net->unx.table.locks[sk->sk_hash]);
345 __unix_insert_socket(net, sk);
346 spin_unlock(&net->unx.table.locks[sk->sk_hash]);
349 static void unix_insert_bsd_socket(struct sock *sk)
351 spin_lock(&bsd_socket_locks[sk->sk_hash]);
352 sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]);
353 spin_unlock(&bsd_socket_locks[sk->sk_hash]);
356 static void unix_remove_bsd_socket(struct sock *sk)
358 if (!hlist_unhashed(&sk->sk_bind_node)) {
359 spin_lock(&bsd_socket_locks[sk->sk_hash]);
360 __sk_del_bind_node(sk);
361 spin_unlock(&bsd_socket_locks[sk->sk_hash]);
363 sk_node_init(&sk->sk_bind_node);
367 static struct sock *__unix_find_socket_byname(struct net *net,
368 struct sockaddr_un *sunname,
369 int len, unsigned int hash)
373 sk_for_each(s, &net->unx.table.buckets[hash]) {
374 struct unix_sock *u = unix_sk(s);
376 if (u->addr->len == len &&
377 !memcmp(u->addr->name, sunname, len))
383 static inline struct sock *unix_find_socket_byname(struct net *net,
384 struct sockaddr_un *sunname,
385 int len, unsigned int hash)
389 spin_lock(&net->unx.table.locks[hash]);
390 s = __unix_find_socket_byname(net, sunname, len, hash);
393 spin_unlock(&net->unx.table.locks[hash]);
397 static struct sock *unix_find_socket_byinode(struct inode *i)
399 unsigned int hash = unix_bsd_hash(i);
402 spin_lock(&bsd_socket_locks[hash]);
403 sk_for_each_bound(s, &bsd_socket_buckets[hash]) {
404 struct dentry *dentry = unix_sk(s)->path.dentry;
406 if (dentry && d_backing_inode(dentry) == i) {
408 spin_unlock(&bsd_socket_locks[hash]);
412 spin_unlock(&bsd_socket_locks[hash]);
416 /* Support code for asymmetrically connected dgram sockets
418 * If a datagram socket is connected to a socket not itself connected
419 * to the first socket (eg, /dev/log), clients may only enqueue more
420 * messages if the present receive queue of the server socket is not
421 * "too large". This means there's a second writeability condition
422 * poll and sendmsg need to test. The dgram recv code will do a wake
423 * up on the peer_wait wait queue of a socket upon reception of a
424 * datagram which needs to be propagated to sleeping would-be writers
425 * since these might not have sent anything so far. This can't be
426 * accomplished via poll_wait because the lifetime of the server
427 * socket might be less than that of its clients if these break their
428 * association with it or if the server socket is closed while clients
429 * are still connected to it and there's no way to inform "a polling
430 * implementation" that it should let go of a certain wait queue
432 * In order to propagate a wake up, a wait_queue_entry_t of the client
433 * socket is enqueued on the peer_wait queue of the server socket
434 * whose wake function does a wake_up on the ordinary client socket
435 * wait queue. This connection is established whenever a write (or
436 * poll for write) hit the flow control condition and broken when the
437 * association to the server socket is dissolved or after a wake up
441 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
445 wait_queue_head_t *u_sleep;
447 u = container_of(q, struct unix_sock, peer_wake);
449 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
451 u->peer_wake.private = NULL;
453 /* relaying can only happen while the wq still exists */
454 u_sleep = sk_sleep(&u->sk);
456 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
461 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
463 struct unix_sock *u, *u_other;
467 u_other = unix_sk(other);
469 spin_lock(&u_other->peer_wait.lock);
471 if (!u->peer_wake.private) {
472 u->peer_wake.private = other;
473 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
478 spin_unlock(&u_other->peer_wait.lock);
482 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
485 struct unix_sock *u, *u_other;
488 u_other = unix_sk(other);
489 spin_lock(&u_other->peer_wait.lock);
491 if (u->peer_wake.private == other) {
492 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
493 u->peer_wake.private = NULL;
496 spin_unlock(&u_other->peer_wait.lock);
499 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
502 unix_dgram_peer_wake_disconnect(sk, other);
503 wake_up_interruptible_poll(sk_sleep(sk),
510 * - unix_peer(sk) == other
511 * - association is stable
513 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
517 connected = unix_dgram_peer_wake_connect(sk, other);
519 /* If other is SOCK_DEAD, we want to make sure we signal
520 * POLLOUT, such that a subsequent write() can get a
521 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
522 * to other and its full, we will hang waiting for POLLOUT.
524 if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD))
528 unix_dgram_peer_wake_disconnect(sk, other);
533 static int unix_writable(const struct sock *sk)
535 return sk->sk_state != TCP_LISTEN &&
536 (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
539 static void unix_write_space(struct sock *sk)
541 struct socket_wq *wq;
544 if (unix_writable(sk)) {
545 wq = rcu_dereference(sk->sk_wq);
546 if (skwq_has_sleeper(wq))
547 wake_up_interruptible_sync_poll(&wq->wait,
548 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
549 sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
554 /* When dgram socket disconnects (or changes its peer), we clear its receive
555 * queue of packets arrived from previous peer. First, it allows to do
556 * flow control based only on wmem_alloc; second, sk connected to peer
557 * may receive messages only from that peer. */
558 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
560 if (!skb_queue_empty(&sk->sk_receive_queue)) {
561 skb_queue_purge(&sk->sk_receive_queue);
562 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
564 /* If one link of bidirectional dgram pipe is disconnected,
565 * we signal error. Messages are lost. Do not make this,
566 * when peer was not connected to us.
568 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
569 WRITE_ONCE(other->sk_err, ECONNRESET);
570 sk_error_report(other);
575 static void unix_sock_destructor(struct sock *sk)
577 struct unix_sock *u = unix_sk(sk);
579 skb_queue_purge(&sk->sk_receive_queue);
581 DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc));
582 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
583 DEBUG_NET_WARN_ON_ONCE(sk->sk_socket);
584 if (!sock_flag(sk, SOCK_DEAD)) {
585 pr_info("Attempt to release alive unix socket: %p\n", sk);
590 unix_release_addr(u->addr);
592 atomic_long_dec(&unix_nr_socks);
593 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
594 #ifdef UNIX_REFCNT_DEBUG
595 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
596 atomic_long_read(&unix_nr_socks));
600 static void unix_release_sock(struct sock *sk, int embrion)
602 struct unix_sock *u = unix_sk(sk);
608 unix_remove_socket(sock_net(sk), sk);
609 unix_remove_bsd_socket(sk);
614 WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
616 u->path.dentry = NULL;
618 state = sk->sk_state;
619 WRITE_ONCE(sk->sk_state, TCP_CLOSE);
621 skpair = unix_peer(sk);
622 unix_peer(sk) = NULL;
624 unix_state_unlock(sk);
626 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
628 kfree_skb(u->oob_skb);
633 wake_up_interruptible_all(&u->peer_wait);
635 if (skpair != NULL) {
636 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
637 unix_state_lock(skpair);
639 WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
640 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
641 WRITE_ONCE(skpair->sk_err, ECONNRESET);
642 unix_state_unlock(skpair);
643 skpair->sk_state_change(skpair);
644 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
647 unix_dgram_peer_wake_disconnect(sk, skpair);
648 sock_put(skpair); /* It may now die */
651 /* Try to flush out this socket. Throw out buffers at least */
653 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
654 if (state == TCP_LISTEN)
655 unix_release_sock(skb->sk, 1);
656 /* passed fds are erased in the kfree_skb hook */
657 UNIXCB(skb).consumed = skb->len;
666 /* ---- Socket is dead now and most probably destroyed ---- */
669 * Fixme: BSD difference: In BSD all sockets connected to us get
670 * ECONNRESET and we die on the spot. In Linux we behave
671 * like files and pipes do and wait for the last
674 * Can't we simply set sock->err?
676 * What the above comment does talk about? --ANK(980817)
679 if (READ_ONCE(unix_tot_inflight))
680 unix_gc(); /* Garbage collect fds */
683 static void init_peercred(struct sock *sk)
685 const struct cred *old_cred;
688 spin_lock(&sk->sk_peer_lock);
689 old_pid = sk->sk_peer_pid;
690 old_cred = sk->sk_peer_cred;
691 sk->sk_peer_pid = get_pid(task_tgid(current));
692 sk->sk_peer_cred = get_current_cred();
693 spin_unlock(&sk->sk_peer_lock);
699 static void copy_peercred(struct sock *sk, struct sock *peersk)
701 const struct cred *old_cred;
705 spin_lock(&sk->sk_peer_lock);
706 spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
708 spin_lock(&peersk->sk_peer_lock);
709 spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
711 old_pid = sk->sk_peer_pid;
712 old_cred = sk->sk_peer_cred;
713 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
714 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
716 spin_unlock(&sk->sk_peer_lock);
717 spin_unlock(&peersk->sk_peer_lock);
723 static int unix_listen(struct socket *sock, int backlog)
726 struct sock *sk = sock->sk;
727 struct unix_sock *u = unix_sk(sk);
730 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
731 goto out; /* Only stream/seqpacket sockets accept */
733 if (!READ_ONCE(u->addr))
734 goto out; /* No listens on an unbound socket */
736 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
738 if (backlog > sk->sk_max_ack_backlog)
739 wake_up_interruptible_all(&u->peer_wait);
740 sk->sk_max_ack_backlog = backlog;
741 WRITE_ONCE(sk->sk_state, TCP_LISTEN);
743 /* set credentials so connect can copy them */
748 unix_state_unlock(sk);
753 static int unix_release(struct socket *);
754 static int unix_bind(struct socket *, struct sockaddr *, int);
755 static int unix_stream_connect(struct socket *, struct sockaddr *,
756 int addr_len, int flags);
757 static int unix_socketpair(struct socket *, struct socket *);
758 static int unix_accept(struct socket *, struct socket *, struct proto_accept_arg *arg);
759 static int unix_getname(struct socket *, struct sockaddr *, int);
760 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
761 static __poll_t unix_dgram_poll(struct file *, struct socket *,
763 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
765 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
767 static int unix_shutdown(struct socket *, int);
768 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
769 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
770 static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
771 struct pipe_inode_info *, size_t size,
773 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
774 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
775 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
776 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
777 static int unix_dgram_connect(struct socket *, struct sockaddr *,
779 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
780 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
783 #ifdef CONFIG_PROC_FS
784 static int unix_count_nr_fds(struct sock *sk)
790 spin_lock(&sk->sk_receive_queue.lock);
791 skb = skb_peek(&sk->sk_receive_queue);
793 u = unix_sk(skb->sk);
794 nr_fds += atomic_read(&u->scm_stat.nr_fds);
795 skb = skb_peek_next(skb, &sk->sk_receive_queue);
797 spin_unlock(&sk->sk_receive_queue.lock);
802 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
804 struct sock *sk = sock->sk;
805 unsigned char s_state;
810 s_state = READ_ONCE(sk->sk_state);
813 /* SOCK_STREAM and SOCK_SEQPACKET sockets never change their
814 * sk_state after switching to TCP_ESTABLISHED or TCP_LISTEN.
815 * SOCK_DGRAM is ordinary. So, no lock is needed.
817 if (sock->type == SOCK_DGRAM || s_state == TCP_ESTABLISHED)
818 nr_fds = atomic_read(&u->scm_stat.nr_fds);
819 else if (s_state == TCP_LISTEN)
820 nr_fds = unix_count_nr_fds(sk);
822 seq_printf(m, "scm_fds: %u\n", nr_fds);
826 #define unix_show_fdinfo NULL
829 static const struct proto_ops unix_stream_ops = {
831 .owner = THIS_MODULE,
832 .release = unix_release,
834 .connect = unix_stream_connect,
835 .socketpair = unix_socketpair,
836 .accept = unix_accept,
837 .getname = unix_getname,
841 .compat_ioctl = unix_compat_ioctl,
843 .listen = unix_listen,
844 .shutdown = unix_shutdown,
845 .sendmsg = unix_stream_sendmsg,
846 .recvmsg = unix_stream_recvmsg,
847 .read_skb = unix_stream_read_skb,
848 .mmap = sock_no_mmap,
849 .splice_read = unix_stream_splice_read,
850 .set_peek_off = sk_set_peek_off,
851 .show_fdinfo = unix_show_fdinfo,
854 static const struct proto_ops unix_dgram_ops = {
856 .owner = THIS_MODULE,
857 .release = unix_release,
859 .connect = unix_dgram_connect,
860 .socketpair = unix_socketpair,
861 .accept = sock_no_accept,
862 .getname = unix_getname,
863 .poll = unix_dgram_poll,
866 .compat_ioctl = unix_compat_ioctl,
868 .listen = sock_no_listen,
869 .shutdown = unix_shutdown,
870 .sendmsg = unix_dgram_sendmsg,
871 .read_skb = unix_read_skb,
872 .recvmsg = unix_dgram_recvmsg,
873 .mmap = sock_no_mmap,
874 .set_peek_off = sk_set_peek_off,
875 .show_fdinfo = unix_show_fdinfo,
878 static const struct proto_ops unix_seqpacket_ops = {
880 .owner = THIS_MODULE,
881 .release = unix_release,
883 .connect = unix_stream_connect,
884 .socketpair = unix_socketpair,
885 .accept = unix_accept,
886 .getname = unix_getname,
887 .poll = unix_dgram_poll,
890 .compat_ioctl = unix_compat_ioctl,
892 .listen = unix_listen,
893 .shutdown = unix_shutdown,
894 .sendmsg = unix_seqpacket_sendmsg,
895 .recvmsg = unix_seqpacket_recvmsg,
896 .mmap = sock_no_mmap,
897 .set_peek_off = sk_set_peek_off,
898 .show_fdinfo = unix_show_fdinfo,
901 static void unix_close(struct sock *sk, long timeout)
903 /* Nothing to do here, unix socket does not need a ->close().
904 * This is merely for sockmap.
908 static void unix_unhash(struct sock *sk)
910 /* Nothing to do here, unix socket does not need a ->unhash().
911 * This is merely for sockmap.
915 static bool unix_bpf_bypass_getsockopt(int level, int optname)
917 if (level == SOL_SOCKET) {
929 struct proto unix_dgram_proto = {
931 .owner = THIS_MODULE,
932 .obj_size = sizeof(struct unix_sock),
934 .bpf_bypass_getsockopt = unix_bpf_bypass_getsockopt,
935 #ifdef CONFIG_BPF_SYSCALL
936 .psock_update_sk_prot = unix_dgram_bpf_update_proto,
940 struct proto unix_stream_proto = {
941 .name = "UNIX-STREAM",
942 .owner = THIS_MODULE,
943 .obj_size = sizeof(struct unix_sock),
945 .unhash = unix_unhash,
946 .bpf_bypass_getsockopt = unix_bpf_bypass_getsockopt,
947 #ifdef CONFIG_BPF_SYSCALL
948 .psock_update_sk_prot = unix_stream_bpf_update_proto,
952 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
958 atomic_long_inc(&unix_nr_socks);
959 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) {
964 if (type == SOCK_STREAM)
965 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
966 else /*dgram and seqpacket */
967 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
974 sock_init_data(sock, sk);
976 sk->sk_hash = unix_unbound_hash(sk);
977 sk->sk_allocation = GFP_KERNEL_ACCOUNT;
978 sk->sk_write_space = unix_write_space;
979 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
980 sk->sk_destruct = unix_sock_destructor;
984 u->path.dentry = NULL;
986 spin_lock_init(&u->lock);
987 mutex_init(&u->iolock); /* single task reading lock */
988 mutex_init(&u->bindlock); /* single task binding lock */
989 init_waitqueue_head(&u->peer_wait);
990 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
991 memset(&u->scm_stat, 0, sizeof(struct scm_stat));
992 unix_insert_unbound_socket(net, sk);
994 sock_prot_inuse_add(net, sk->sk_prot, 1);
999 atomic_long_dec(&unix_nr_socks);
1000 return ERR_PTR(err);
1003 static int unix_create(struct net *net, struct socket *sock, int protocol,
1008 if (protocol && protocol != PF_UNIX)
1009 return -EPROTONOSUPPORT;
1011 sock->state = SS_UNCONNECTED;
1013 switch (sock->type) {
1015 sock->ops = &unix_stream_ops;
1018 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
1022 sock->type = SOCK_DGRAM;
1025 sock->ops = &unix_dgram_ops;
1027 case SOCK_SEQPACKET:
1028 sock->ops = &unix_seqpacket_ops;
1031 return -ESOCKTNOSUPPORT;
1034 sk = unix_create1(net, sock, kern, sock->type);
1041 static int unix_release(struct socket *sock)
1043 struct sock *sk = sock->sk;
1048 sk->sk_prot->close(sk, 0);
1049 unix_release_sock(sk, 0);
1055 static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len,
1058 struct inode *inode;
1063 unix_mkname_bsd(sunaddr, addr_len);
1064 err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path);
1068 err = path_permission(&path, MAY_WRITE);
1072 err = -ECONNREFUSED;
1073 inode = d_backing_inode(path.dentry);
1074 if (!S_ISSOCK(inode->i_mode))
1077 sk = unix_find_socket_byinode(inode);
1082 if (sk->sk_type == type)
1096 return ERR_PTR(err);
1099 static struct sock *unix_find_abstract(struct net *net,
1100 struct sockaddr_un *sunaddr,
1101 int addr_len, int type)
1103 unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type);
1104 struct dentry *dentry;
1107 sk = unix_find_socket_byname(net, sunaddr, addr_len, hash);
1109 return ERR_PTR(-ECONNREFUSED);
1111 dentry = unix_sk(sk)->path.dentry;
1113 touch_atime(&unix_sk(sk)->path);
1118 static struct sock *unix_find_other(struct net *net,
1119 struct sockaddr_un *sunaddr,
1120 int addr_len, int type)
1124 if (sunaddr->sun_path[0])
1125 sk = unix_find_bsd(sunaddr, addr_len, type);
1127 sk = unix_find_abstract(net, sunaddr, addr_len, type);
1132 static int unix_autobind(struct sock *sk)
1134 struct unix_sock *u = unix_sk(sk);
1135 unsigned int new_hash, old_hash;
1136 struct net *net = sock_net(sk);
1137 struct unix_address *addr;
1138 u32 lastnum, ordernum;
1141 err = mutex_lock_interruptible(&u->bindlock);
1149 addr = kzalloc(sizeof(*addr) +
1150 offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL);
1154 addr->len = offsetof(struct sockaddr_un, sun_path) + 6;
1155 addr->name->sun_family = AF_UNIX;
1156 refcount_set(&addr->refcnt, 1);
1158 old_hash = sk->sk_hash;
1159 ordernum = get_random_u32();
1160 lastnum = ordernum & 0xFFFFF;
1162 ordernum = (ordernum + 1) & 0xFFFFF;
1163 sprintf(addr->name->sun_path + 1, "%05x", ordernum);
1165 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
1166 unix_table_double_lock(net, old_hash, new_hash);
1168 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) {
1169 unix_table_double_unlock(net, old_hash, new_hash);
1171 /* __unix_find_socket_byname() may take long time if many names
1172 * are already in use.
1176 if (ordernum == lastnum) {
1177 /* Give up if all names seems to be in use. */
1179 unix_release_addr(addr);
1186 __unix_set_addr_hash(net, sk, addr, new_hash);
1187 unix_table_double_unlock(net, old_hash, new_hash);
1190 out: mutex_unlock(&u->bindlock);
1194 static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
1197 umode_t mode = S_IFSOCK |
1198 (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
1199 struct unix_sock *u = unix_sk(sk);
1200 unsigned int new_hash, old_hash;
1201 struct net *net = sock_net(sk);
1202 struct mnt_idmap *idmap;
1203 struct unix_address *addr;
1204 struct dentry *dentry;
1208 addr_len = unix_mkname_bsd(sunaddr, addr_len);
1209 addr = unix_create_addr(sunaddr, addr_len);
1214 * Get the parent directory, calculate the hash for last
1217 dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0);
1218 if (IS_ERR(dentry)) {
1219 err = PTR_ERR(dentry);
1224 * All right, let's create it.
1226 idmap = mnt_idmap(parent.mnt);
1227 err = security_path_mknod(&parent, dentry, mode, 0);
1229 err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0);
1232 err = mutex_lock_interruptible(&u->bindlock);
1238 old_hash = sk->sk_hash;
1239 new_hash = unix_bsd_hash(d_backing_inode(dentry));
1240 unix_table_double_lock(net, old_hash, new_hash);
1241 u->path.mnt = mntget(parent.mnt);
1242 u->path.dentry = dget(dentry);
1243 __unix_set_addr_hash(net, sk, addr, new_hash);
1244 unix_table_double_unlock(net, old_hash, new_hash);
1245 unix_insert_bsd_socket(sk);
1246 mutex_unlock(&u->bindlock);
1247 done_path_create(&parent, dentry);
1251 mutex_unlock(&u->bindlock);
1254 /* failed after successful mknod? unlink what we'd created... */
1255 vfs_unlink(idmap, d_inode(parent.dentry), dentry, NULL);
1257 done_path_create(&parent, dentry);
1259 unix_release_addr(addr);
1260 return err == -EEXIST ? -EADDRINUSE : err;
1263 static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
1266 struct unix_sock *u = unix_sk(sk);
1267 unsigned int new_hash, old_hash;
1268 struct net *net = sock_net(sk);
1269 struct unix_address *addr;
1272 addr = unix_create_addr(sunaddr, addr_len);
1276 err = mutex_lock_interruptible(&u->bindlock);
1285 old_hash = sk->sk_hash;
1286 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
1287 unix_table_double_lock(net, old_hash, new_hash);
1289 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash))
1292 __unix_set_addr_hash(net, sk, addr, new_hash);
1293 unix_table_double_unlock(net, old_hash, new_hash);
1294 mutex_unlock(&u->bindlock);
1298 unix_table_double_unlock(net, old_hash, new_hash);
1301 mutex_unlock(&u->bindlock);
1303 unix_release_addr(addr);
1307 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1309 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1310 struct sock *sk = sock->sk;
1313 if (addr_len == offsetof(struct sockaddr_un, sun_path) &&
1314 sunaddr->sun_family == AF_UNIX)
1315 return unix_autobind(sk);
1317 err = unix_validate_addr(sunaddr, addr_len);
1321 if (sunaddr->sun_path[0])
1322 err = unix_bind_bsd(sk, sunaddr, addr_len);
1324 err = unix_bind_abstract(sk, sunaddr, addr_len);
1329 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1331 if (unlikely(sk1 == sk2) || !sk2) {
1332 unix_state_lock(sk1);
1338 unix_state_lock(sk1);
1339 unix_state_lock_nested(sk2, U_LOCK_SECOND);
1342 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1344 if (unlikely(sk1 == sk2) || !sk2) {
1345 unix_state_unlock(sk1);
1348 unix_state_unlock(sk1);
1349 unix_state_unlock(sk2);
1352 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1353 int alen, int flags)
1355 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1356 struct sock *sk = sock->sk;
1361 if (alen < offsetofend(struct sockaddr, sa_family))
1364 if (addr->sa_family != AF_UNSPEC) {
1365 err = unix_validate_addr(sunaddr, alen);
1369 err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, addr, &alen);
1373 if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
1374 test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
1375 !READ_ONCE(unix_sk(sk)->addr)) {
1376 err = unix_autobind(sk);
1382 other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type);
1383 if (IS_ERR(other)) {
1384 err = PTR_ERR(other);
1388 unix_state_double_lock(sk, other);
1390 /* Apparently VFS overslept socket death. Retry. */
1391 if (sock_flag(other, SOCK_DEAD)) {
1392 unix_state_double_unlock(sk, other);
1398 if (!unix_may_send(sk, other))
1401 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1405 WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED);
1406 WRITE_ONCE(other->sk_state, TCP_ESTABLISHED);
1409 * 1003.1g breaking connected state with AF_UNSPEC
1412 unix_state_double_lock(sk, other);
1416 * If it was connected, reconnect.
1418 if (unix_peer(sk)) {
1419 struct sock *old_peer = unix_peer(sk);
1421 unix_peer(sk) = other;
1423 WRITE_ONCE(sk->sk_state, TCP_CLOSE);
1424 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1426 unix_state_double_unlock(sk, other);
1428 if (other != old_peer) {
1429 unix_dgram_disconnected(sk, old_peer);
1431 unix_state_lock(old_peer);
1432 if (!unix_peer(old_peer))
1433 WRITE_ONCE(old_peer->sk_state, TCP_CLOSE);
1434 unix_state_unlock(old_peer);
1439 unix_peer(sk) = other;
1440 unix_state_double_unlock(sk, other);
1446 unix_state_double_unlock(sk, other);
1452 static long unix_wait_for_peer(struct sock *other, long timeo)
1453 __releases(&unix_sk(other)->lock)
1455 struct unix_sock *u = unix_sk(other);
1459 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1461 sched = !sock_flag(other, SOCK_DEAD) &&
1462 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1463 unix_recvq_full_lockless(other);
1465 unix_state_unlock(other);
1468 timeo = schedule_timeout(timeo);
1470 finish_wait(&u->peer_wait, &wait);
1474 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1475 int addr_len, int flags)
1477 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1478 struct sock *sk = sock->sk, *newsk = NULL, *other = NULL;
1479 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1480 struct net *net = sock_net(sk);
1481 struct sk_buff *skb = NULL;
1486 err = unix_validate_addr(sunaddr, addr_len);
1490 err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, &addr_len);
1494 if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
1495 test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
1496 !READ_ONCE(u->addr)) {
1497 err = unix_autobind(sk);
1502 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1504 /* First of all allocate resources.
1505 If we will make it after state is locked,
1506 we will have to recheck all again in any case.
1509 /* create new sock for complete connection */
1510 newsk = unix_create1(net, NULL, 0, sock->type);
1511 if (IS_ERR(newsk)) {
1512 err = PTR_ERR(newsk);
1519 /* Allocate skb for sending to listening sock */
1520 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1525 /* Find listening sock. */
1526 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type);
1527 if (IS_ERR(other)) {
1528 err = PTR_ERR(other);
1533 /* Latch state of peer */
1534 unix_state_lock(other);
1536 /* Apparently VFS overslept socket death. Retry. */
1537 if (sock_flag(other, SOCK_DEAD)) {
1538 unix_state_unlock(other);
1543 err = -ECONNREFUSED;
1544 if (other->sk_state != TCP_LISTEN)
1546 if (other->sk_shutdown & RCV_SHUTDOWN)
1549 if (unix_recvq_full(other)) {
1554 timeo = unix_wait_for_peer(other, timeo);
1556 err = sock_intr_errno(timeo);
1557 if (signal_pending(current))
1565 It is tricky place. We need to grab our state lock and cannot
1566 drop lock on peer. It is dangerous because deadlock is
1567 possible. Connect to self case and simultaneous
1568 attempt to connect are eliminated by checking socket
1569 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1570 check this before attempt to grab lock.
1572 Well, and we have to recheck the state after socket locked.
1578 /* This is ok... continue with connect */
1580 case TCP_ESTABLISHED:
1581 /* Socket is already connected */
1589 unix_state_lock_nested(sk, U_LOCK_SECOND);
1591 if (sk->sk_state != st) {
1592 unix_state_unlock(sk);
1593 unix_state_unlock(other);
1598 err = security_unix_stream_connect(sk, other, newsk);
1600 unix_state_unlock(sk);
1604 /* The way is open! Fastly set all the necessary fields... */
1607 unix_peer(newsk) = sk;
1608 newsk->sk_state = TCP_ESTABLISHED;
1609 newsk->sk_type = sk->sk_type;
1610 init_peercred(newsk);
1611 newu = unix_sk(newsk);
1612 newu->listener = other;
1613 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1614 otheru = unix_sk(other);
1616 /* copy address information from listening to new sock
1618 * The contents of *(otheru->addr) and otheru->path
1619 * are seen fully set up here, since we have found
1620 * otheru in hash under its lock. Insertion into the
1621 * hash chain we'd found it in had been done in an
1622 * earlier critical area protected by the chain's lock,
1623 * the same one where we'd set *(otheru->addr) contents,
1624 * as well as otheru->path and otheru->addr itself.
1626 * Using smp_store_release() here to set newu->addr
1627 * is enough to make those stores, as well as stores
1628 * to newu->path visible to anyone who gets newu->addr
1629 * by smp_load_acquire(). IOW, the same warranties
1630 * as for unix_sock instances bound in unix_bind() or
1631 * in unix_autobind().
1633 if (otheru->path.dentry) {
1634 path_get(&otheru->path);
1635 newu->path = otheru->path;
1637 refcount_inc(&otheru->addr->refcnt);
1638 smp_store_release(&newu->addr, otheru->addr);
1640 /* Set credentials */
1641 copy_peercred(sk, other);
1643 sock->state = SS_CONNECTED;
1644 WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED);
1647 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1648 unix_peer(sk) = newsk;
1650 unix_state_unlock(sk);
1652 /* take ten and send info to listening sock */
1653 spin_lock(&other->sk_receive_queue.lock);
1654 __skb_queue_tail(&other->sk_receive_queue, skb);
1655 spin_unlock(&other->sk_receive_queue.lock);
1656 unix_state_unlock(other);
1657 other->sk_data_ready(other);
1663 unix_state_unlock(other);
1668 unix_release_sock(newsk, 0);
1674 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1676 struct sock *ska = socka->sk, *skb = sockb->sk;
1678 /* Join our sockets back to back */
1681 unix_peer(ska) = skb;
1682 unix_peer(skb) = ska;
1686 ska->sk_state = TCP_ESTABLISHED;
1687 skb->sk_state = TCP_ESTABLISHED;
1688 socka->state = SS_CONNECTED;
1689 sockb->state = SS_CONNECTED;
1693 static void unix_sock_inherit_flags(const struct socket *old,
1696 if (test_bit(SOCK_PASSCRED, &old->flags))
1697 set_bit(SOCK_PASSCRED, &new->flags);
1698 if (test_bit(SOCK_PASSPIDFD, &old->flags))
1699 set_bit(SOCK_PASSPIDFD, &new->flags);
1700 if (test_bit(SOCK_PASSSEC, &old->flags))
1701 set_bit(SOCK_PASSSEC, &new->flags);
1704 static int unix_accept(struct socket *sock, struct socket *newsock,
1705 struct proto_accept_arg *arg)
1707 struct sock *sk = sock->sk;
1708 struct sk_buff *skb;
1711 arg->err = -EOPNOTSUPP;
1712 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1716 if (sk->sk_state != TCP_LISTEN)
1719 /* If socket state is TCP_LISTEN it cannot change (for now...),
1720 * so that no locks are necessary.
1723 skb = skb_recv_datagram(sk, (arg->flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
1726 /* This means receive shutdown. */
1733 skb_free_datagram(sk, skb);
1734 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1736 /* attach accepted sock to socket */
1737 unix_state_lock(tsk);
1738 unix_update_edges(unix_sk(tsk));
1739 newsock->state = SS_CONNECTED;
1740 unix_sock_inherit_flags(sock, newsock);
1741 sock_graft(tsk, newsock);
1742 unix_state_unlock(tsk);
1750 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1752 struct sock *sk = sock->sk;
1753 struct unix_address *addr;
1754 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1758 sk = unix_peer_get(sk);
1768 addr = smp_load_acquire(&unix_sk(sk)->addr);
1770 sunaddr->sun_family = AF_UNIX;
1771 sunaddr->sun_path[0] = 0;
1772 err = offsetof(struct sockaddr_un, sun_path);
1775 memcpy(sunaddr, addr->name, addr->len);
1778 BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err,
1779 CGROUP_UNIX_GETPEERNAME);
1781 BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err,
1782 CGROUP_UNIX_GETSOCKNAME);
1789 /* The "user->unix_inflight" variable is protected by the garbage
1790 * collection lock, and we just read it locklessly here. If you go
1791 * over the limit, there might be a tiny race in actually noticing
1792 * it across threads. Tough.
1794 static inline bool too_many_unix_fds(struct task_struct *p)
1796 struct user_struct *user = current_user();
1798 if (unlikely(READ_ONCE(user->unix_inflight) > task_rlimit(p, RLIMIT_NOFILE)))
1799 return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1803 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1805 if (too_many_unix_fds(current))
1806 return -ETOOMANYREFS;
1808 UNIXCB(skb).fp = scm->fp;
1811 if (unix_prepare_fpl(UNIXCB(skb).fp))
1817 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1819 scm->fp = UNIXCB(skb).fp;
1820 UNIXCB(skb).fp = NULL;
1822 unix_destroy_fpl(scm->fp);
1825 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
1827 scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1830 static void unix_destruct_scm(struct sk_buff *skb)
1832 struct scm_cookie scm;
1834 memset(&scm, 0, sizeof(scm));
1835 scm.pid = UNIXCB(skb).pid;
1837 unix_detach_fds(&scm, skb);
1839 /* Alas, it calls VFS */
1840 /* So fscking what? fput() had been SMP-safe since the last Summer */
1845 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1849 UNIXCB(skb).pid = get_pid(scm->pid);
1850 UNIXCB(skb).uid = scm->creds.uid;
1851 UNIXCB(skb).gid = scm->creds.gid;
1852 UNIXCB(skb).fp = NULL;
1853 unix_get_secdata(scm, skb);
1854 if (scm->fp && send_fds)
1855 err = unix_attach_fds(scm, skb);
1857 skb->destructor = unix_destruct_scm;
1861 static bool unix_passcred_enabled(const struct socket *sock,
1862 const struct sock *other)
1864 return test_bit(SOCK_PASSCRED, &sock->flags) ||
1865 test_bit(SOCK_PASSPIDFD, &sock->flags) ||
1866 !other->sk_socket ||
1867 test_bit(SOCK_PASSCRED, &other->sk_socket->flags) ||
1868 test_bit(SOCK_PASSPIDFD, &other->sk_socket->flags);
1872 * Some apps rely on write() giving SCM_CREDENTIALS
1873 * We include credentials if source or destination socket
1874 * asserted SOCK_PASSCRED.
1876 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1877 const struct sock *other)
1879 if (UNIXCB(skb).pid)
1881 if (unix_passcred_enabled(sock, other)) {
1882 UNIXCB(skb).pid = get_pid(task_tgid(current));
1883 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1887 static bool unix_skb_scm_eq(struct sk_buff *skb,
1888 struct scm_cookie *scm)
1890 return UNIXCB(skb).pid == scm->pid &&
1891 uid_eq(UNIXCB(skb).uid, scm->creds.uid) &&
1892 gid_eq(UNIXCB(skb).gid, scm->creds.gid) &&
1893 unix_secdata_eq(scm, skb);
1896 static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
1898 struct scm_fp_list *fp = UNIXCB(skb).fp;
1899 struct unix_sock *u = unix_sk(sk);
1901 if (unlikely(fp && fp->count)) {
1902 atomic_add(fp->count, &u->scm_stat.nr_fds);
1903 unix_add_edges(fp, u);
1907 static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
1909 struct scm_fp_list *fp = UNIXCB(skb).fp;
1910 struct unix_sock *u = unix_sk(sk);
1912 if (unlikely(fp && fp->count)) {
1913 atomic_sub(fp->count, &u->scm_stat.nr_fds);
1919 * Send AF_UNIX data.
1922 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1925 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1926 struct sock *sk = sock->sk, *other = NULL;
1927 struct unix_sock *u = unix_sk(sk);
1928 struct scm_cookie scm;
1929 struct sk_buff *skb;
1935 err = scm_send(sock, msg, &scm, false);
1939 wait_for_unix_gc(scm.fp);
1942 if (msg->msg_flags&MSG_OOB)
1945 if (msg->msg_namelen) {
1946 err = unix_validate_addr(sunaddr, msg->msg_namelen);
1950 err = BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk,
1959 other = unix_peer_get(sk);
1964 if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
1965 test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
1966 !READ_ONCE(u->addr)) {
1967 err = unix_autobind(sk);
1973 if (len > sk->sk_sndbuf - 32)
1976 if (len > SKB_MAX_ALLOC) {
1977 data_len = min_t(size_t,
1978 len - SKB_MAX_ALLOC,
1979 MAX_SKB_FRAGS * PAGE_SIZE);
1980 data_len = PAGE_ALIGN(data_len);
1982 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1985 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1986 msg->msg_flags & MSG_DONTWAIT, &err,
1987 PAGE_ALLOC_COSTLY_ORDER);
1991 err = unix_scm_to_skb(&scm, skb, true);
1995 skb_put(skb, len - data_len);
1996 skb->data_len = data_len;
1998 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
2002 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
2007 if (sunaddr == NULL)
2010 other = unix_find_other(sock_net(sk), sunaddr, msg->msg_namelen,
2012 if (IS_ERR(other)) {
2013 err = PTR_ERR(other);
2019 if (sk_filter(other, skb) < 0) {
2020 /* Toss the packet but do not return any error to the sender */
2026 unix_state_lock(other);
2029 if (!unix_may_send(sk, other))
2032 if (unlikely(sock_flag(other, SOCK_DEAD))) {
2034 * Check with 1003.1g - what should
2037 unix_state_unlock(other);
2041 unix_state_lock(sk);
2044 if (sk->sk_type == SOCK_SEQPACKET) {
2045 /* We are here only when racing with unix_release_sock()
2046 * is clearing @other. Never change state to TCP_CLOSE
2047 * unlike SOCK_DGRAM wants.
2049 unix_state_unlock(sk);
2051 } else if (unix_peer(sk) == other) {
2052 unix_peer(sk) = NULL;
2053 unix_dgram_peer_wake_disconnect_wakeup(sk, other);
2055 WRITE_ONCE(sk->sk_state, TCP_CLOSE);
2056 unix_state_unlock(sk);
2058 unix_dgram_disconnected(sk, other);
2060 err = -ECONNREFUSED;
2062 unix_state_unlock(sk);
2072 if (other->sk_shutdown & RCV_SHUTDOWN)
2075 if (sk->sk_type != SOCK_SEQPACKET) {
2076 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
2081 /* other == sk && unix_peer(other) != sk if
2082 * - unix_peer(sk) == NULL, destination address bound to sk
2083 * - unix_peer(sk) == sk by time of get but disconnected before lock
2086 unlikely(unix_peer(other) != sk &&
2087 unix_recvq_full_lockless(other))) {
2089 timeo = unix_wait_for_peer(other, timeo);
2091 err = sock_intr_errno(timeo);
2092 if (signal_pending(current))
2099 unix_state_unlock(other);
2100 unix_state_double_lock(sk, other);
2103 if (unix_peer(sk) != other ||
2104 unix_dgram_peer_wake_me(sk, other)) {
2112 goto restart_locked;
2116 if (unlikely(sk_locked))
2117 unix_state_unlock(sk);
2119 if (sock_flag(other, SOCK_RCVTSTAMP))
2120 __net_timestamp(skb);
2121 maybe_add_creds(skb, sock, other);
2122 scm_stat_add(other, skb);
2123 skb_queue_tail(&other->sk_receive_queue, skb);
2124 unix_state_unlock(other);
2125 other->sk_data_ready(other);
2132 unix_state_unlock(sk);
2133 unix_state_unlock(other);
2143 /* We use paged skbs for stream sockets, and limit occupancy to 32768
2144 * bytes, and a minimum of a full page.
2146 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
2148 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2149 static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other,
2150 struct scm_cookie *scm, bool fds_sent)
2152 struct unix_sock *ousk = unix_sk(other);
2153 struct sk_buff *skb;
2156 skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err);
2161 err = unix_scm_to_skb(scm, skb, !fds_sent);
2167 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
2174 unix_state_lock(other);
2176 if (sock_flag(other, SOCK_DEAD) ||
2177 (other->sk_shutdown & RCV_SHUTDOWN)) {
2178 unix_state_unlock(other);
2183 maybe_add_creds(skb, sock, other);
2186 scm_stat_add(other, skb);
2188 spin_lock(&other->sk_receive_queue.lock);
2190 consume_skb(ousk->oob_skb);
2191 WRITE_ONCE(ousk->oob_skb, skb);
2192 __skb_queue_tail(&other->sk_receive_queue, skb);
2193 spin_unlock(&other->sk_receive_queue.lock);
2195 sk_send_sigurg(other);
2196 unix_state_unlock(other);
2197 other->sk_data_ready(other);
2203 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
2206 struct sock *sk = sock->sk;
2207 struct sock *other = NULL;
2209 struct sk_buff *skb;
2211 struct scm_cookie scm;
2212 bool fds_sent = false;
2215 err = scm_send(sock, msg, &scm, false);
2219 wait_for_unix_gc(scm.fp);
2222 if (msg->msg_flags & MSG_OOB) {
2223 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2231 if (msg->msg_namelen) {
2232 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
2236 other = unix_peer(sk);
2241 if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
2244 while (sent < len) {
2247 if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
2248 skb = sock_alloc_send_pskb(sk, 0, 0,
2249 msg->msg_flags & MSG_DONTWAIT,
2252 /* Keep two messages in the pipe so it schedules better */
2253 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
2255 /* allow fallback to order-0 allocations */
2256 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
2258 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
2260 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
2262 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
2263 msg->msg_flags & MSG_DONTWAIT, &err,
2264 get_order(UNIX_SKB_FRAGS_SZ));
2269 /* Only send the fds in the first buffer */
2270 err = unix_scm_to_skb(&scm, skb, !fds_sent);
2277 if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
2278 err = skb_splice_from_iter(skb, &msg->msg_iter, size,
2285 refcount_add(size, &sk->sk_wmem_alloc);
2287 skb_put(skb, size - data_len);
2288 skb->data_len = data_len;
2290 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
2297 unix_state_lock(other);
2299 if (sock_flag(other, SOCK_DEAD) ||
2300 (other->sk_shutdown & RCV_SHUTDOWN))
2303 maybe_add_creds(skb, sock, other);
2304 scm_stat_add(other, skb);
2305 skb_queue_tail(&other->sk_receive_queue, skb);
2306 unix_state_unlock(other);
2307 other->sk_data_ready(other);
2311 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2312 if (msg->msg_flags & MSG_OOB) {
2313 err = queue_oob(sock, msg, other, &scm, fds_sent);
2325 unix_state_unlock(other);
2328 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
2329 send_sig(SIGPIPE, current, 0);
2333 return sent ? : err;
2336 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2340 struct sock *sk = sock->sk;
2342 err = sock_error(sk);
2346 if (sk->sk_state != TCP_ESTABLISHED)
2349 if (msg->msg_namelen)
2350 msg->msg_namelen = 0;
2352 return unix_dgram_sendmsg(sock, msg, len);
2355 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2356 size_t size, int flags)
2358 struct sock *sk = sock->sk;
2360 if (sk->sk_state != TCP_ESTABLISHED)
2363 return unix_dgram_recvmsg(sock, msg, size, flags);
2366 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2368 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2371 msg->msg_namelen = addr->len;
2372 memcpy(msg->msg_name, addr->name, addr->len);
2376 int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
2379 struct scm_cookie scm;
2380 struct socket *sock = sk->sk_socket;
2381 struct unix_sock *u = unix_sk(sk);
2382 struct sk_buff *skb, *last;
2391 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2394 mutex_lock(&u->iolock);
2396 skip = sk_peek_offset(sk, flags);
2397 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
2398 &skip, &err, &last);
2400 if (!(flags & MSG_PEEK))
2401 scm_stat_del(sk, skb);
2405 mutex_unlock(&u->iolock);
2410 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2411 &err, &timeo, last));
2413 if (!skb) { /* implies iolock unlocked */
2414 unix_state_lock(sk);
2415 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2416 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2417 (sk->sk_shutdown & RCV_SHUTDOWN))
2419 unix_state_unlock(sk);
2423 if (wq_has_sleeper(&u->peer_wait))
2424 wake_up_interruptible_sync_poll(&u->peer_wait,
2425 EPOLLOUT | EPOLLWRNORM |
2428 if (msg->msg_name) {
2429 unix_copy_addr(msg, skb->sk);
2431 BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk,
2436 if (size > skb->len - skip)
2437 size = skb->len - skip;
2438 else if (size < skb->len - skip)
2439 msg->msg_flags |= MSG_TRUNC;
2441 err = skb_copy_datagram_msg(skb, skip, msg, size);
2445 if (sock_flag(sk, SOCK_RCVTSTAMP))
2446 __sock_recv_timestamp(msg, sk, skb);
2448 memset(&scm, 0, sizeof(scm));
2450 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2451 unix_set_secdata(&scm, skb);
2453 if (!(flags & MSG_PEEK)) {
2455 unix_detach_fds(&scm, skb);
2457 sk_peek_offset_bwd(sk, skb->len);
2459 /* It is questionable: on PEEK we could:
2460 - do not return fds - good, but too simple 8)
2461 - return fds, and do not return them on read (old strategy,
2463 - clone fds (I chose it for now, it is the most universal
2466 POSIX 1003.1g does not actually define this clearly
2467 at all. POSIX 1003.1g doesn't define a lot of things
2472 sk_peek_offset_fwd(sk, size);
2475 unix_peek_fds(&scm, skb);
2477 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2479 scm_recv_unix(sock, msg, &scm, flags);
2482 skb_free_datagram(sk, skb);
2483 mutex_unlock(&u->iolock);
2488 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
2491 struct sock *sk = sock->sk;
2493 #ifdef CONFIG_BPF_SYSCALL
2494 const struct proto *prot = READ_ONCE(sk->sk_prot);
2496 if (prot != &unix_dgram_proto)
2497 return prot->recvmsg(sk, msg, size, flags, NULL);
2499 return __unix_dgram_recvmsg(sk, msg, size, flags);
2502 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
2504 struct unix_sock *u = unix_sk(sk);
2505 struct sk_buff *skb;
2508 mutex_lock(&u->iolock);
2509 skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
2510 mutex_unlock(&u->iolock);
2514 return recv_actor(sk, skb);
2518 * Sleep until more data has arrived. But check for races..
2520 static long unix_stream_data_wait(struct sock *sk, long timeo,
2521 struct sk_buff *last, unsigned int last_len,
2524 unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE;
2525 struct sk_buff *tail;
2528 unix_state_lock(sk);
2531 prepare_to_wait(sk_sleep(sk), &wait, state);
2533 tail = skb_peek_tail(&sk->sk_receive_queue);
2535 (tail && tail->len != last_len) ||
2537 (sk->sk_shutdown & RCV_SHUTDOWN) ||
2538 signal_pending(current) ||
2542 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2543 unix_state_unlock(sk);
2544 timeo = schedule_timeout(timeo);
2545 unix_state_lock(sk);
2547 if (sock_flag(sk, SOCK_DEAD))
2550 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2553 finish_wait(sk_sleep(sk), &wait);
2554 unix_state_unlock(sk);
2558 static unsigned int unix_skb_len(const struct sk_buff *skb)
2560 return skb->len - UNIXCB(skb).consumed;
2563 struct unix_stream_read_state {
2564 int (*recv_actor)(struct sk_buff *, int, int,
2565 struct unix_stream_read_state *);
2566 struct socket *socket;
2568 struct pipe_inode_info *pipe;
2571 unsigned int splice_flags;
2574 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2575 static int unix_stream_recv_urg(struct unix_stream_read_state *state)
2577 struct socket *sock = state->socket;
2578 struct sock *sk = sock->sk;
2579 struct unix_sock *u = unix_sk(sk);
2581 struct sk_buff *oob_skb;
2583 mutex_lock(&u->iolock);
2584 unix_state_lock(sk);
2585 spin_lock(&sk->sk_receive_queue.lock);
2587 if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) {
2588 spin_unlock(&sk->sk_receive_queue.lock);
2589 unix_state_unlock(sk);
2590 mutex_unlock(&u->iolock);
2594 oob_skb = u->oob_skb;
2596 if (!(state->flags & MSG_PEEK))
2597 WRITE_ONCE(u->oob_skb, NULL);
2601 spin_unlock(&sk->sk_receive_queue.lock);
2602 unix_state_unlock(sk);
2604 chunk = state->recv_actor(oob_skb, 0, chunk, state);
2606 if (!(state->flags & MSG_PEEK))
2607 UNIXCB(oob_skb).consumed += 1;
2609 consume_skb(oob_skb);
2611 mutex_unlock(&u->iolock);
2616 state->msg->msg_flags |= MSG_OOB;
2620 static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
2621 int flags, int copied)
2623 struct unix_sock *u = unix_sk(sk);
2625 if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) {
2626 skb_unlink(skb, &sk->sk_receive_queue);
2630 struct sk_buff *unlinked_skb = NULL;
2632 spin_lock(&sk->sk_receive_queue.lock);
2634 if (skb == u->oob_skb) {
2637 } else if (sock_flag(sk, SOCK_URGINLINE)) {
2638 if (!(flags & MSG_PEEK)) {
2639 WRITE_ONCE(u->oob_skb, NULL);
2642 } else if (flags & MSG_PEEK) {
2645 __skb_unlink(skb, &sk->sk_receive_queue);
2646 WRITE_ONCE(u->oob_skb, NULL);
2648 skb = skb_peek(&sk->sk_receive_queue);
2652 spin_unlock(&sk->sk_receive_queue.lock);
2655 WARN_ON_ONCE(skb_unref(unlinked_skb));
2656 kfree_skb(unlinked_skb);
2663 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
2665 if (unlikely(sk->sk_state != TCP_ESTABLISHED))
2668 return unix_read_skb(sk, recv_actor);
2671 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2674 struct scm_cookie scm;
2675 struct socket *sock = state->socket;
2676 struct sock *sk = sock->sk;
2677 struct unix_sock *u = unix_sk(sk);
2679 int flags = state->flags;
2680 int noblock = flags & MSG_DONTWAIT;
2681 bool check_creds = false;
2686 size_t size = state->size;
2687 unsigned int last_len;
2689 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2694 if (unlikely(flags & MSG_OOB)) {
2696 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2697 err = unix_stream_recv_urg(state);
2702 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2703 timeo = sock_rcvtimeo(sk, noblock);
2705 memset(&scm, 0, sizeof(scm));
2707 /* Lock the socket to prevent queue disordering
2708 * while sleeps in memcpy_tomsg
2710 mutex_lock(&u->iolock);
2712 skip = max(sk_peek_offset(sk, flags), 0);
2717 struct sk_buff *skb, *last;
2720 unix_state_lock(sk);
2721 if (sock_flag(sk, SOCK_DEAD)) {
2725 last = skb = skb_peek(&sk->sk_receive_queue);
2726 last_len = last ? last->len : 0;
2729 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2731 skb = manage_oob(skb, sk, flags, copied);
2732 if (!skb && copied) {
2733 unix_state_unlock(sk);
2739 if (copied >= target)
2743 * POSIX 1003.1g mandates this order.
2746 err = sock_error(sk);
2749 if (sk->sk_shutdown & RCV_SHUTDOWN)
2752 unix_state_unlock(sk);
2758 mutex_unlock(&u->iolock);
2760 timeo = unix_stream_data_wait(sk, timeo, last,
2761 last_len, freezable);
2763 if (signal_pending(current)) {
2764 err = sock_intr_errno(timeo);
2769 mutex_lock(&u->iolock);
2772 unix_state_unlock(sk);
2776 while (skip >= unix_skb_len(skb)) {
2777 skip -= unix_skb_len(skb);
2779 last_len = skb->len;
2780 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2785 unix_state_unlock(sk);
2788 /* Never glue messages from different writers */
2789 if (!unix_skb_scm_eq(skb, &scm))
2791 } else if (test_bit(SOCK_PASSCRED, &sock->flags) ||
2792 test_bit(SOCK_PASSPIDFD, &sock->flags)) {
2793 /* Copy credentials */
2794 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2795 unix_set_secdata(&scm, skb);
2799 /* Copy address just once */
2800 if (state->msg && state->msg->msg_name) {
2801 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2802 state->msg->msg_name);
2803 unix_copy_addr(state->msg, skb->sk);
2805 BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk,
2806 state->msg->msg_name,
2807 &state->msg->msg_namelen);
2812 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2814 chunk = state->recv_actor(skb, skip, chunk, state);
2815 drop_skb = !unix_skb_len(skb);
2816 /* skb is only safe to use if !drop_skb */
2827 /* the skb was touched by a concurrent reader;
2828 * we should not expect anything from this skb
2829 * anymore and assume it invalid - we can be
2830 * sure it was dropped from the socket queue
2832 * let's report a short read
2838 /* Mark read part of skb as used */
2839 if (!(flags & MSG_PEEK)) {
2840 UNIXCB(skb).consumed += chunk;
2842 sk_peek_offset_bwd(sk, chunk);
2844 if (UNIXCB(skb).fp) {
2845 scm_stat_del(sk, skb);
2846 unix_detach_fds(&scm, skb);
2849 if (unix_skb_len(skb))
2852 skb_unlink(skb, &sk->sk_receive_queue);
2858 /* It is questionable, see note in unix_dgram_recvmsg.
2861 unix_peek_fds(&scm, skb);
2863 sk_peek_offset_fwd(sk, chunk);
2870 last_len = skb->len;
2871 unix_state_lock(sk);
2872 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2875 unix_state_unlock(sk);
2880 mutex_unlock(&u->iolock);
2882 scm_recv_unix(sock, state->msg, &scm, flags);
2886 return copied ? : err;
2889 static int unix_stream_read_actor(struct sk_buff *skb,
2890 int skip, int chunk,
2891 struct unix_stream_read_state *state)
2895 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2897 return ret ?: chunk;
2900 int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg,
2901 size_t size, int flags)
2903 struct unix_stream_read_state state = {
2904 .recv_actor = unix_stream_read_actor,
2905 .socket = sk->sk_socket,
2911 return unix_stream_read_generic(&state, true);
2914 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2915 size_t size, int flags)
2917 struct unix_stream_read_state state = {
2918 .recv_actor = unix_stream_read_actor,
2925 #ifdef CONFIG_BPF_SYSCALL
2926 struct sock *sk = sock->sk;
2927 const struct proto *prot = READ_ONCE(sk->sk_prot);
2929 if (prot != &unix_stream_proto)
2930 return prot->recvmsg(sk, msg, size, flags, NULL);
2932 return unix_stream_read_generic(&state, true);
2935 static int unix_stream_splice_actor(struct sk_buff *skb,
2936 int skip, int chunk,
2937 struct unix_stream_read_state *state)
2939 return skb_splice_bits(skb, state->socket->sk,
2940 UNIXCB(skb).consumed + skip,
2941 state->pipe, chunk, state->splice_flags);
2944 static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
2945 struct pipe_inode_info *pipe,
2946 size_t size, unsigned int flags)
2948 struct unix_stream_read_state state = {
2949 .recv_actor = unix_stream_splice_actor,
2953 .splice_flags = flags,
2956 if (unlikely(*ppos))
2959 if (sock->file->f_flags & O_NONBLOCK ||
2960 flags & SPLICE_F_NONBLOCK)
2961 state.flags = MSG_DONTWAIT;
2963 return unix_stream_read_generic(&state, false);
2966 static int unix_shutdown(struct socket *sock, int mode)
2968 struct sock *sk = sock->sk;
2971 if (mode < SHUT_RD || mode > SHUT_RDWR)
2974 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2975 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
2976 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2980 unix_state_lock(sk);
2981 WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode);
2982 other = unix_peer(sk);
2985 unix_state_unlock(sk);
2986 sk->sk_state_change(sk);
2989 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2992 const struct proto *prot = READ_ONCE(other->sk_prot);
2995 prot->unhash(other);
2996 if (mode&RCV_SHUTDOWN)
2997 peer_mode |= SEND_SHUTDOWN;
2998 if (mode&SEND_SHUTDOWN)
2999 peer_mode |= RCV_SHUTDOWN;
3000 unix_state_lock(other);
3001 WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode);
3002 unix_state_unlock(other);
3003 other->sk_state_change(other);
3004 if (peer_mode == SHUTDOWN_MASK)
3005 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
3006 else if (peer_mode & RCV_SHUTDOWN)
3007 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
3015 long unix_inq_len(struct sock *sk)
3017 struct sk_buff *skb;
3020 if (READ_ONCE(sk->sk_state) == TCP_LISTEN)
3023 spin_lock(&sk->sk_receive_queue.lock);
3024 if (sk->sk_type == SOCK_STREAM ||
3025 sk->sk_type == SOCK_SEQPACKET) {
3026 skb_queue_walk(&sk->sk_receive_queue, skb)
3027 amount += unix_skb_len(skb);
3029 skb = skb_peek(&sk->sk_receive_queue);
3033 spin_unlock(&sk->sk_receive_queue.lock);
3037 EXPORT_SYMBOL_GPL(unix_inq_len);
3039 long unix_outq_len(struct sock *sk)
3041 return sk_wmem_alloc_get(sk);
3043 EXPORT_SYMBOL_GPL(unix_outq_len);
3045 static int unix_open_file(struct sock *sk)
3051 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
3054 if (!smp_load_acquire(&unix_sk(sk)->addr))
3057 path = unix_sk(sk)->path;
3063 fd = get_unused_fd_flags(O_CLOEXEC);
3067 f = dentry_open(&path, O_PATH, current_cred());
3081 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3083 struct sock *sk = sock->sk;
3089 amount = unix_outq_len(sk);
3090 err = put_user(amount, (int __user *)arg);
3093 amount = unix_inq_len(sk);
3097 err = put_user(amount, (int __user *)arg);
3100 err = unix_open_file(sk);
3102 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3105 struct sk_buff *skb;
3108 skb = skb_peek(&sk->sk_receive_queue);
3109 if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb))
3111 err = put_user(answ, (int __user *)arg);
3122 #ifdef CONFIG_COMPAT
3123 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3125 return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
3129 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
3131 struct sock *sk = sock->sk;
3135 sock_poll_wait(file, sock, wait);
3137 shutdown = READ_ONCE(sk->sk_shutdown);
3139 /* exceptional events? */
3140 if (READ_ONCE(sk->sk_err))
3142 if (shutdown == SHUTDOWN_MASK)
3144 if (shutdown & RCV_SHUTDOWN)
3145 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3148 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3149 mask |= EPOLLIN | EPOLLRDNORM;
3150 if (sk_is_readable(sk))
3151 mask |= EPOLLIN | EPOLLRDNORM;
3152 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3153 if (READ_ONCE(unix_sk(sk)->oob_skb))
3157 /* Connection-based need to check for termination and startup */
3158 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
3159 sk->sk_state == TCP_CLOSE)
3163 * we set writable also when the other side has shut down the
3164 * connection. This prevents stuck sockets.
3166 if (unix_writable(sk))
3167 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3172 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
3175 struct sock *sk = sock->sk, *other;
3176 unsigned int writable;
3180 sock_poll_wait(file, sock, wait);
3182 shutdown = READ_ONCE(sk->sk_shutdown);
3184 /* exceptional events? */
3185 if (READ_ONCE(sk->sk_err) ||
3186 !skb_queue_empty_lockless(&sk->sk_error_queue))
3188 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
3190 if (shutdown & RCV_SHUTDOWN)
3191 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3192 if (shutdown == SHUTDOWN_MASK)
3196 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3197 mask |= EPOLLIN | EPOLLRDNORM;
3198 if (sk_is_readable(sk))
3199 mask |= EPOLLIN | EPOLLRDNORM;
3201 /* Connection-based need to check for termination and startup */
3202 if (sk->sk_type == SOCK_SEQPACKET) {
3203 if (sk->sk_state == TCP_CLOSE)
3205 /* connection hasn't started yet? */
3206 if (sk->sk_state == TCP_SYN_SENT)
3210 /* No write status requested, avoid expensive OUT tests. */
3211 if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
3214 writable = unix_writable(sk);
3216 unix_state_lock(sk);
3218 other = unix_peer(sk);
3219 if (other && unix_peer(other) != sk &&
3220 unix_recvq_full_lockless(other) &&
3221 unix_dgram_peer_wake_me(sk, other))
3224 unix_state_unlock(sk);
3228 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3230 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
3235 #ifdef CONFIG_PROC_FS
3237 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
3239 #define get_bucket(x) ((x) >> BUCKET_SPACE)
3240 #define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1))
3241 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
3243 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
3245 unsigned long offset = get_offset(*pos);
3246 unsigned long bucket = get_bucket(*pos);
3247 unsigned long count = 0;
3250 for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]);
3251 sk; sk = sk_next(sk)) {
3252 if (++count == offset)
3259 static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos)
3261 unsigned long bucket = get_bucket(*pos);
3262 struct net *net = seq_file_net(seq);
3265 while (bucket < UNIX_HASH_SIZE) {
3266 spin_lock(&net->unx.table.locks[bucket]);
3268 sk = unix_from_bucket(seq, pos);
3272 spin_unlock(&net->unx.table.locks[bucket]);
3274 *pos = set_bucket_offset(++bucket, 1);
3280 static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk,
3283 unsigned long bucket = get_bucket(*pos);
3290 spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]);
3292 *pos = set_bucket_offset(++bucket, 1);
3294 return unix_get_first(seq, pos);
3297 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
3300 return SEQ_START_TOKEN;
3302 return unix_get_first(seq, pos);
3305 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3309 if (v == SEQ_START_TOKEN)
3310 return unix_get_first(seq, pos);
3312 return unix_get_next(seq, v, pos);
3315 static void unix_seq_stop(struct seq_file *seq, void *v)
3317 struct sock *sk = v;
3320 spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]);
3323 static int unix_seq_show(struct seq_file *seq, void *v)
3326 if (v == SEQ_START_TOKEN)
3327 seq_puts(seq, "Num RefCount Protocol Flags Type St "
3331 struct unix_sock *u = unix_sk(s);
3334 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
3336 refcount_read(&s->sk_refcnt),
3338 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
3341 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
3342 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
3345 if (u->addr) { // under a hash table lock here
3350 len = u->addr->len -
3351 offsetof(struct sockaddr_un, sun_path);
3352 if (u->addr->name->sun_path[0]) {
3358 for ( ; i < len; i++)
3359 seq_putc(seq, u->addr->name->sun_path[i] ?:
3362 unix_state_unlock(s);
3363 seq_putc(seq, '\n');
3369 static const struct seq_operations unix_seq_ops = {
3370 .start = unix_seq_start,
3371 .next = unix_seq_next,
3372 .stop = unix_seq_stop,
3373 .show = unix_seq_show,
3376 #ifdef CONFIG_BPF_SYSCALL
3377 struct bpf_unix_iter_state {
3378 struct seq_net_private p;
3379 unsigned int cur_sk;
3380 unsigned int end_sk;
3381 unsigned int max_sk;
3382 struct sock **batch;
3383 bool st_bucket_done;
3386 struct bpf_iter__unix {
3387 __bpf_md_ptr(struct bpf_iter_meta *, meta);
3388 __bpf_md_ptr(struct unix_sock *, unix_sk);
3389 uid_t uid __aligned(8);
3392 static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
3393 struct unix_sock *unix_sk, uid_t uid)
3395 struct bpf_iter__unix ctx;
3397 meta->seq_num--; /* skip SEQ_START_TOKEN */
3399 ctx.unix_sk = unix_sk;
3401 return bpf_iter_run_prog(prog, &ctx);
3404 static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk)
3407 struct bpf_unix_iter_state *iter = seq->private;
3408 unsigned int expected = 1;
3411 sock_hold(start_sk);
3412 iter->batch[iter->end_sk++] = start_sk;
3414 for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) {
3415 if (iter->end_sk < iter->max_sk) {
3417 iter->batch[iter->end_sk++] = sk;
3423 spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]);
3428 static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter)
3430 while (iter->cur_sk < iter->end_sk)
3431 sock_put(iter->batch[iter->cur_sk++]);
3434 static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter,
3435 unsigned int new_batch_sz)
3437 struct sock **new_batch;
3439 new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
3440 GFP_USER | __GFP_NOWARN);
3444 bpf_iter_unix_put_batch(iter);
3445 kvfree(iter->batch);
3446 iter->batch = new_batch;
3447 iter->max_sk = new_batch_sz;
3452 static struct sock *bpf_iter_unix_batch(struct seq_file *seq,
3455 struct bpf_unix_iter_state *iter = seq->private;
3456 unsigned int expected;
3457 bool resized = false;
3460 if (iter->st_bucket_done)
3461 *pos = set_bucket_offset(get_bucket(*pos) + 1, 1);
3464 /* Get a new batch */
3468 sk = unix_get_first(seq, pos);
3470 return NULL; /* Done */
3472 expected = bpf_iter_unix_hold_batch(seq, sk);
3474 if (iter->end_sk == expected) {
3475 iter->st_bucket_done = true;
3479 if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) {
3487 static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos)
3490 return SEQ_START_TOKEN;
3492 /* bpf iter does not support lseek, so it always
3493 * continue from where it was stop()-ped.
3495 return bpf_iter_unix_batch(seq, pos);
3498 static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3500 struct bpf_unix_iter_state *iter = seq->private;
3503 /* Whenever seq_next() is called, the iter->cur_sk is
3504 * done with seq_show(), so advance to the next sk in
3507 if (iter->cur_sk < iter->end_sk)
3508 sock_put(iter->batch[iter->cur_sk++]);
3512 if (iter->cur_sk < iter->end_sk)
3513 sk = iter->batch[iter->cur_sk];
3515 sk = bpf_iter_unix_batch(seq, pos);
3520 static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
3522 struct bpf_iter_meta meta;
3523 struct bpf_prog *prog;
3524 struct sock *sk = v;
3529 if (v == SEQ_START_TOKEN)
3532 slow = lock_sock_fast(sk);
3534 if (unlikely(sk_unhashed(sk))) {
3539 uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
3541 prog = bpf_iter_get_info(&meta, false);
3542 ret = unix_prog_seq_show(prog, &meta, v, uid);
3544 unlock_sock_fast(sk, slow);
3548 static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
3550 struct bpf_unix_iter_state *iter = seq->private;
3551 struct bpf_iter_meta meta;
3552 struct bpf_prog *prog;
3556 prog = bpf_iter_get_info(&meta, true);
3558 (void)unix_prog_seq_show(prog, &meta, v, 0);
3561 if (iter->cur_sk < iter->end_sk)
3562 bpf_iter_unix_put_batch(iter);
3565 static const struct seq_operations bpf_iter_unix_seq_ops = {
3566 .start = bpf_iter_unix_seq_start,
3567 .next = bpf_iter_unix_seq_next,
3568 .stop = bpf_iter_unix_seq_stop,
3569 .show = bpf_iter_unix_seq_show,
3574 static const struct net_proto_family unix_family_ops = {
3576 .create = unix_create,
3577 .owner = THIS_MODULE,
3581 static int __net_init unix_net_init(struct net *net)
3585 net->unx.sysctl_max_dgram_qlen = 10;
3586 if (unix_sysctl_register(net))
3589 #ifdef CONFIG_PROC_FS
3590 if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
3591 sizeof(struct seq_net_private)))
3595 net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE,
3596 sizeof(spinlock_t), GFP_KERNEL);
3597 if (!net->unx.table.locks)
3600 net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE,
3601 sizeof(struct hlist_head),
3603 if (!net->unx.table.buckets)
3606 for (i = 0; i < UNIX_HASH_SIZE; i++) {
3607 spin_lock_init(&net->unx.table.locks[i]);
3608 INIT_HLIST_HEAD(&net->unx.table.buckets[i]);
3614 kvfree(net->unx.table.locks);
3616 #ifdef CONFIG_PROC_FS
3617 remove_proc_entry("unix", net->proc_net);
3620 unix_sysctl_unregister(net);
3625 static void __net_exit unix_net_exit(struct net *net)
3627 kvfree(net->unx.table.buckets);
3628 kvfree(net->unx.table.locks);
3629 unix_sysctl_unregister(net);
3630 remove_proc_entry("unix", net->proc_net);
3633 static struct pernet_operations unix_net_ops = {
3634 .init = unix_net_init,
3635 .exit = unix_net_exit,
3638 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3639 DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
3640 struct unix_sock *unix_sk, uid_t uid)
3642 #define INIT_BATCH_SZ 16
3644 static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux)
3646 struct bpf_unix_iter_state *iter = priv_data;
3649 err = bpf_iter_init_seq_net(priv_data, aux);
3653 err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ);
3655 bpf_iter_fini_seq_net(priv_data);
3662 static void bpf_iter_fini_unix(void *priv_data)
3664 struct bpf_unix_iter_state *iter = priv_data;
3666 bpf_iter_fini_seq_net(priv_data);
3667 kvfree(iter->batch);
3670 static const struct bpf_iter_seq_info unix_seq_info = {
3671 .seq_ops = &bpf_iter_unix_seq_ops,
3672 .init_seq_private = bpf_iter_init_unix,
3673 .fini_seq_private = bpf_iter_fini_unix,
3674 .seq_priv_size = sizeof(struct bpf_unix_iter_state),
3677 static const struct bpf_func_proto *
3678 bpf_iter_unix_get_func_proto(enum bpf_func_id func_id,
3679 const struct bpf_prog *prog)
3682 case BPF_FUNC_setsockopt:
3683 return &bpf_sk_setsockopt_proto;
3684 case BPF_FUNC_getsockopt:
3685 return &bpf_sk_getsockopt_proto;
3691 static struct bpf_iter_reg unix_reg_info = {
3693 .ctx_arg_info_size = 1,
3695 { offsetof(struct bpf_iter__unix, unix_sk),
3696 PTR_TO_BTF_ID_OR_NULL },
3698 .get_func_proto = bpf_iter_unix_get_func_proto,
3699 .seq_info = &unix_seq_info,
3702 static void __init bpf_iter_register(void)
3704 unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX];
3705 if (bpf_iter_reg_target(&unix_reg_info))
3706 pr_warn("Warning: could not register bpf iterator unix\n");
3710 static int __init af_unix_init(void)
3714 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
3716 for (i = 0; i < UNIX_HASH_SIZE / 2; i++) {
3717 spin_lock_init(&bsd_socket_locks[i]);
3718 INIT_HLIST_HEAD(&bsd_socket_buckets[i]);
3721 rc = proto_register(&unix_dgram_proto, 1);
3723 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3727 rc = proto_register(&unix_stream_proto, 1);
3729 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3730 proto_unregister(&unix_dgram_proto);
3734 sock_register(&unix_family_ops);
3735 register_pernet_subsys(&unix_net_ops);
3736 unix_bpf_build_proto();
3738 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3739 bpf_iter_register();
3746 /* Later than subsys_initcall() because we depend on stuff initialised there */
3747 fs_initcall(af_unix_init);