af_unix: Convert the unix_sk macro to an inline function for type safety
[linux-2.6-block.git] / net / unix / af_unix.c
CommitLineData
1da177e4
LT
1/*
2 * NET4: Implementation of BSD Unix domain sockets.
3 *
113aa838 4 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
1da177e4
LT
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
1da177e4
LT
11 * Fixes:
12 * Linus Torvalds : Assorted bug cures.
13 * Niibe Yutaka : async I/O support.
14 * Carsten Paeth : PF_UNIX check, address fixes.
15 * Alan Cox : Limit size of allocated blocks.
16 * Alan Cox : Fixed the stupid socketpair bug.
17 * Alan Cox : BSD compatibility fine tuning.
18 * Alan Cox : Fixed a bug in connect when interrupted.
19 * Alan Cox : Sorted out a proper draft version of
20 * file descriptor passing hacked up from
21 * Mike Shaver's work.
22 * Marty Leisner : Fixes to fd passing
23 * Nick Nevin : recvmsg bugfix.
24 * Alan Cox : Started proper garbage collector
25 * Heiko EiBfeldt : Missing verify_area check
26 * Alan Cox : Started POSIXisms
27 * Andreas Schwab : Replace inode by dentry for proper
28 * reference counting
29 * Kirk Petersen : Made this a module
30 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
31 * Lots of bug fixes.
32 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
33 * by above two patches.
34 * Andrea Arcangeli : If possible we block in connect(2)
35 * if the max backlog of the listen socket
36 * is been reached. This won't break
37 * old apps and it will avoid huge amount
38 * of socks hashed (this for unix_gc()
39 * performances reasons).
40 * Security fix that limits the max
41 * number of socks to 2*max_files and
42 * the number of skb queueable in the
43 * dgram receiver.
44 * Artur Skawina : Hash function optimizations
45 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
46 * Malcolm Beattie : Set peercred for socketpair
47 * Michal Ostrowski : Module initialization cleanup.
48 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
49 * the core infrastructure is doing that
50 * for all net proto families now (2.5.69+)
51 *
52 *
53 * Known differences from reference BSD that was tested:
54 *
55 * [TO FIX]
56 * ECONNREFUSED is not returned from one end of a connected() socket to the
57 * other the moment one end closes.
58 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
59 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
60 * [NOT TO FIX]
61 * accept() returns a path name even if the connecting socket has closed
62 * in the meantime (BSD loses the path and gives up).
63 * accept() returns 0 length path for an unbound connector. BSD returns 16
64 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
66 * BSD af_unix apparently has connect forgetting to block properly.
67 * (need to check this with the POSIX spec in detail)
68 *
69 * Differences from 2.0.0-11-... (ANK)
70 * Bug fixes and improvements.
71 * - client shutdown killed server socket.
72 * - removed all useless cli/sti pairs.
73 *
74 * Semantic changes/extensions.
75 * - generic control message passing.
76 * - SCM_CREDENTIALS control message.
77 * - "Abstract" (not FS based) socket bindings.
78 * Abstract names are sequences of bytes (not zero terminated)
79 * started by 0, so that this name space does not intersect
80 * with BSD names.
81 */
82
5cc208be 83#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
84
1da177e4 85#include <linux/module.h>
1da177e4 86#include <linux/kernel.h>
1da177e4
LT
87#include <linux/signal.h>
88#include <linux/sched.h>
89#include <linux/errno.h>
90#include <linux/string.h>
91#include <linux/stat.h>
92#include <linux/dcache.h>
93#include <linux/namei.h>
94#include <linux/socket.h>
95#include <linux/un.h>
96#include <linux/fcntl.h>
97#include <linux/termios.h>
98#include <linux/sockios.h>
99#include <linux/net.h>
100#include <linux/in.h>
101#include <linux/fs.h>
102#include <linux/slab.h>
103#include <asm/uaccess.h>
104#include <linux/skbuff.h>
105#include <linux/netdevice.h>
457c4cbc 106#include <net/net_namespace.h>
1da177e4 107#include <net/sock.h>
c752f073 108#include <net/tcp_states.h>
1da177e4
LT
109#include <net/af_unix.h>
110#include <linux/proc_fs.h>
111#include <linux/seq_file.h>
112#include <net/scm.h>
113#include <linux/init.h>
114#include <linux/poll.h>
1da177e4
LT
115#include <linux/rtnetlink.h>
116#include <linux/mount.h>
117#include <net/checksum.h>
118#include <linux/security.h>
2b15af6f 119#include <linux/freezer.h>
1da177e4 120
7123aaa3 121struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
fa7ff56f
PE
122EXPORT_SYMBOL_GPL(unix_socket_table);
123DEFINE_SPINLOCK(unix_table_lock);
124EXPORT_SYMBOL_GPL(unix_table_lock);
518de9b3 125static atomic_long_t unix_nr_socks;
1da177e4 126
1da177e4 127
7123aaa3
ED
128static struct hlist_head *unix_sockets_unbound(void *addr)
129{
130 unsigned long hash = (unsigned long)addr;
131
132 hash ^= hash >> 16;
133 hash ^= hash >> 8;
134 hash %= UNIX_HASH_SIZE;
135 return &unix_socket_table[UNIX_HASH_SIZE + hash];
136}
137
138#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
1da177e4 139
877ce7c1 140#ifdef CONFIG_SECURITY_NETWORK
dc49c1f9 141static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
877ce7c1 142{
37a9a8df 143 UNIXCB(skb).secid = scm->secid;
877ce7c1
CZ
144}
145
146static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
147{
37a9a8df
SS
148 scm->secid = UNIXCB(skb).secid;
149}
150
151static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
152{
153 return (scm->secid == UNIXCB(skb).secid);
877ce7c1
CZ
154}
155#else
dc49c1f9 156static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
877ce7c1
CZ
157{ }
158
159static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
160{ }
37a9a8df
SS
161
162static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
163{
164 return true;
165}
877ce7c1
CZ
166#endif /* CONFIG_SECURITY_NETWORK */
167
1da177e4
LT
168/*
169 * SMP locking strategy:
fbe9cc4a 170 * hash table is protected with spinlock unix_table_lock
663717f6 171 * each socket state is protected by separate spin lock.
1da177e4
LT
172 */
173
95c96174 174static inline unsigned int unix_hash_fold(__wsum n)
1da177e4 175{
0a13404d 176 unsigned int hash = (__force unsigned int)csum_fold(n);
95c96174 177
1da177e4
LT
178 hash ^= hash>>8;
179 return hash&(UNIX_HASH_SIZE-1);
180}
181
182#define unix_peer(sk) (unix_sk(sk)->peer)
183
184static inline int unix_our_peer(struct sock *sk, struct sock *osk)
185{
186 return unix_peer(osk) == sk;
187}
188
189static inline int unix_may_send(struct sock *sk, struct sock *osk)
190{
6eba6a37 191 return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
1da177e4
LT
192}
193
3c73419c
RW
194static inline int unix_recvq_full(struct sock const *sk)
195{
196 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
197}
198
fa7ff56f 199struct sock *unix_peer_get(struct sock *s)
1da177e4
LT
200{
201 struct sock *peer;
202
1c92b4e5 203 unix_state_lock(s);
1da177e4
LT
204 peer = unix_peer(s);
205 if (peer)
206 sock_hold(peer);
1c92b4e5 207 unix_state_unlock(s);
1da177e4
LT
208 return peer;
209}
fa7ff56f 210EXPORT_SYMBOL_GPL(unix_peer_get);
1da177e4
LT
211
212static inline void unix_release_addr(struct unix_address *addr)
213{
214 if (atomic_dec_and_test(&addr->refcnt))
215 kfree(addr);
216}
217
218/*
219 * Check unix socket name:
220 * - should be not zero length.
221 * - if started by not zero, should be NULL terminated (FS object)
222 * - if started by zero, it is abstract name.
223 */
ac7bfa62 224
95c96174 225static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
1da177e4
LT
226{
227 if (len <= sizeof(short) || len > sizeof(*sunaddr))
228 return -EINVAL;
229 if (!sunaddr || sunaddr->sun_family != AF_UNIX)
230 return -EINVAL;
231 if (sunaddr->sun_path[0]) {
232 /*
233 * This may look like an off by one error but it is a bit more
234 * subtle. 108 is the longest valid AF_UNIX path for a binding.
25985edc 235 * sun_path[108] doesn't as such exist. However in kernel space
1da177e4
LT
236 * we are guaranteed that it is a valid memory location in our
237 * kernel address buffer.
238 */
e27dfcea 239 ((char *)sunaddr)[len] = 0;
1da177e4
LT
240 len = strlen(sunaddr->sun_path)+1+sizeof(short);
241 return len;
242 }
243
07f0757a 244 *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
1da177e4
LT
245 return len;
246}
247
248static void __unix_remove_socket(struct sock *sk)
249{
250 sk_del_node_init(sk);
251}
252
253static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
254{
547b792c 255 WARN_ON(!sk_unhashed(sk));
1da177e4
LT
256 sk_add_node(sk, list);
257}
258
259static inline void unix_remove_socket(struct sock *sk)
260{
fbe9cc4a 261 spin_lock(&unix_table_lock);
1da177e4 262 __unix_remove_socket(sk);
fbe9cc4a 263 spin_unlock(&unix_table_lock);
1da177e4
LT
264}
265
266static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
267{
fbe9cc4a 268 spin_lock(&unix_table_lock);
1da177e4 269 __unix_insert_socket(list, sk);
fbe9cc4a 270 spin_unlock(&unix_table_lock);
1da177e4
LT
271}
272
097e66c5
DL
273static struct sock *__unix_find_socket_byname(struct net *net,
274 struct sockaddr_un *sunname,
95c96174 275 int len, int type, unsigned int hash)
1da177e4
LT
276{
277 struct sock *s;
1da177e4 278
b67bfe0d 279 sk_for_each(s, &unix_socket_table[hash ^ type]) {
1da177e4
LT
280 struct unix_sock *u = unix_sk(s);
281
878628fb 282 if (!net_eq(sock_net(s), net))
097e66c5
DL
283 continue;
284
1da177e4
LT
285 if (u->addr->len == len &&
286 !memcmp(u->addr->name, sunname, len))
287 goto found;
288 }
289 s = NULL;
290found:
291 return s;
292}
293
097e66c5
DL
294static inline struct sock *unix_find_socket_byname(struct net *net,
295 struct sockaddr_un *sunname,
1da177e4 296 int len, int type,
95c96174 297 unsigned int hash)
1da177e4
LT
298{
299 struct sock *s;
300
fbe9cc4a 301 spin_lock(&unix_table_lock);
097e66c5 302 s = __unix_find_socket_byname(net, sunname, len, type, hash);
1da177e4
LT
303 if (s)
304 sock_hold(s);
fbe9cc4a 305 spin_unlock(&unix_table_lock);
1da177e4
LT
306 return s;
307}
308
6616f788 309static struct sock *unix_find_socket_byinode(struct inode *i)
1da177e4
LT
310{
311 struct sock *s;
1da177e4 312
fbe9cc4a 313 spin_lock(&unix_table_lock);
b67bfe0d 314 sk_for_each(s,
1da177e4 315 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
40ffe67d 316 struct dentry *dentry = unix_sk(s)->path.dentry;
1da177e4 317
a25b376b 318 if (dentry && d_backing_inode(dentry) == i) {
1da177e4
LT
319 sock_hold(s);
320 goto found;
321 }
322 }
323 s = NULL;
324found:
fbe9cc4a 325 spin_unlock(&unix_table_lock);
1da177e4
LT
326 return s;
327}
328
329static inline int unix_writable(struct sock *sk)
330{
331 return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
332}
333
334static void unix_write_space(struct sock *sk)
335{
43815482
ED
336 struct socket_wq *wq;
337
338 rcu_read_lock();
1da177e4 339 if (unix_writable(sk)) {
43815482
ED
340 wq = rcu_dereference(sk->sk_wq);
341 if (wq_has_sleeper(wq))
67426b75
ED
342 wake_up_interruptible_sync_poll(&wq->wait,
343 POLLOUT | POLLWRNORM | POLLWRBAND);
8d8ad9d7 344 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
1da177e4 345 }
43815482 346 rcu_read_unlock();
1da177e4
LT
347}
348
349/* When dgram socket disconnects (or changes its peer), we clear its receive
350 * queue of packets arrived from previous peer. First, it allows to do
351 * flow control based only on wmem_alloc; second, sk connected to peer
352 * may receive messages only from that peer. */
353static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
354{
b03efcfb 355 if (!skb_queue_empty(&sk->sk_receive_queue)) {
1da177e4
LT
356 skb_queue_purge(&sk->sk_receive_queue);
357 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
358
359 /* If one link of bidirectional dgram pipe is disconnected,
360 * we signal error. Messages are lost. Do not make this,
361 * when peer was not connected to us.
362 */
363 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
364 other->sk_err = ECONNRESET;
365 other->sk_error_report(other);
366 }
367 }
368}
369
370static void unix_sock_destructor(struct sock *sk)
371{
372 struct unix_sock *u = unix_sk(sk);
373
374 skb_queue_purge(&sk->sk_receive_queue);
375
547b792c
IJ
376 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
377 WARN_ON(!sk_unhashed(sk));
378 WARN_ON(sk->sk_socket);
1da177e4 379 if (!sock_flag(sk, SOCK_DEAD)) {
5cc208be 380 pr_info("Attempt to release alive unix socket: %p\n", sk);
1da177e4
LT
381 return;
382 }
383
384 if (u->addr)
385 unix_release_addr(u->addr);
386
518de9b3 387 atomic_long_dec(&unix_nr_socks);
6f756a8c 388 local_bh_disable();
a8076d8d 389 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
6f756a8c 390 local_bh_enable();
1da177e4 391#ifdef UNIX_REFCNT_DEBUG
5cc208be 392 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
518de9b3 393 atomic_long_read(&unix_nr_socks));
1da177e4
LT
394#endif
395}
396
ded34e0f 397static void unix_release_sock(struct sock *sk, int embrion)
1da177e4
LT
398{
399 struct unix_sock *u = unix_sk(sk);
40ffe67d 400 struct path path;
1da177e4
LT
401 struct sock *skpair;
402 struct sk_buff *skb;
403 int state;
404
405 unix_remove_socket(sk);
406
407 /* Clear state */
1c92b4e5 408 unix_state_lock(sk);
1da177e4
LT
409 sock_orphan(sk);
410 sk->sk_shutdown = SHUTDOWN_MASK;
40ffe67d
AV
411 path = u->path;
412 u->path.dentry = NULL;
413 u->path.mnt = NULL;
1da177e4
LT
414 state = sk->sk_state;
415 sk->sk_state = TCP_CLOSE;
1c92b4e5 416 unix_state_unlock(sk);
1da177e4
LT
417
418 wake_up_interruptible_all(&u->peer_wait);
419
e27dfcea 420 skpair = unix_peer(sk);
1da177e4 421
e27dfcea 422 if (skpair != NULL) {
1da177e4 423 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
1c92b4e5 424 unix_state_lock(skpair);
1da177e4
LT
425 /* No more writes */
426 skpair->sk_shutdown = SHUTDOWN_MASK;
427 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
428 skpair->sk_err = ECONNRESET;
1c92b4e5 429 unix_state_unlock(skpair);
1da177e4 430 skpair->sk_state_change(skpair);
8d8ad9d7 431 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
1da177e4
LT
432 }
433 sock_put(skpair); /* It may now die */
434 unix_peer(sk) = NULL;
435 }
436
437 /* Try to flush out this socket. Throw out buffers at least */
438
439 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
e27dfcea 440 if (state == TCP_LISTEN)
1da177e4
LT
441 unix_release_sock(skb->sk, 1);
442 /* passed fds are erased in the kfree_skb hook */
443 kfree_skb(skb);
444 }
445
40ffe67d
AV
446 if (path.dentry)
447 path_put(&path);
1da177e4
LT
448
449 sock_put(sk);
450
451 /* ---- Socket is dead now and most probably destroyed ---- */
452
453 /*
e04dae84 454 * Fixme: BSD difference: In BSD all sockets connected to us get
1da177e4
LT
455 * ECONNRESET and we die on the spot. In Linux we behave
456 * like files and pipes do and wait for the last
457 * dereference.
458 *
459 * Can't we simply set sock->err?
460 *
461 * What the above comment does talk about? --ANK(980817)
462 */
463
9305cfa4 464 if (unix_tot_inflight)
ac7bfa62 465 unix_gc(); /* Garbage collect fds */
1da177e4
LT
466}
467
109f6e39
EB
468static void init_peercred(struct sock *sk)
469{
470 put_pid(sk->sk_peer_pid);
471 if (sk->sk_peer_cred)
472 put_cred(sk->sk_peer_cred);
473 sk->sk_peer_pid = get_pid(task_tgid(current));
474 sk->sk_peer_cred = get_current_cred();
475}
476
477static void copy_peercred(struct sock *sk, struct sock *peersk)
478{
479 put_pid(sk->sk_peer_pid);
480 if (sk->sk_peer_cred)
481 put_cred(sk->sk_peer_cred);
482 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
483 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
484}
485
1da177e4
LT
486static int unix_listen(struct socket *sock, int backlog)
487{
488 int err;
489 struct sock *sk = sock->sk;
490 struct unix_sock *u = unix_sk(sk);
109f6e39 491 struct pid *old_pid = NULL;
1da177e4
LT
492
493 err = -EOPNOTSUPP;
6eba6a37
ED
494 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
495 goto out; /* Only stream/seqpacket sockets accept */
1da177e4
LT
496 err = -EINVAL;
497 if (!u->addr)
6eba6a37 498 goto out; /* No listens on an unbound socket */
1c92b4e5 499 unix_state_lock(sk);
1da177e4
LT
500 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
501 goto out_unlock;
502 if (backlog > sk->sk_max_ack_backlog)
503 wake_up_interruptible_all(&u->peer_wait);
504 sk->sk_max_ack_backlog = backlog;
505 sk->sk_state = TCP_LISTEN;
506 /* set credentials so connect can copy them */
109f6e39 507 init_peercred(sk);
1da177e4
LT
508 err = 0;
509
510out_unlock:
1c92b4e5 511 unix_state_unlock(sk);
109f6e39 512 put_pid(old_pid);
1da177e4
LT
513out:
514 return err;
515}
516
517static int unix_release(struct socket *);
518static int unix_bind(struct socket *, struct sockaddr *, int);
519static int unix_stream_connect(struct socket *, struct sockaddr *,
520 int addr_len, int flags);
521static int unix_socketpair(struct socket *, struct socket *);
522static int unix_accept(struct socket *, struct socket *, int);
523static int unix_getname(struct socket *, struct sockaddr *, int *, int);
524static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
ec0d215f
RW
525static unsigned int unix_dgram_poll(struct file *, struct socket *,
526 poll_table *);
1da177e4
LT
527static int unix_ioctl(struct socket *, unsigned int, unsigned long);
528static int unix_shutdown(struct socket *, int);
1b784140
YX
529static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
530static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
869e7c62
HFS
531static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
532 size_t size, int flags);
2b514574
HFS
533static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
534 struct pipe_inode_info *, size_t size,
535 unsigned int flags);
1b784140
YX
536static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
537static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
1da177e4
LT
538static int unix_dgram_connect(struct socket *, struct sockaddr *,
539 int, int);
1b784140
YX
540static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
541static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
542 int);
1da177e4 543
12663bfc 544static int unix_set_peek_off(struct sock *sk, int val)
f55bb7f9
PE
545{
546 struct unix_sock *u = unix_sk(sk);
547
12663bfc
SL
548 if (mutex_lock_interruptible(&u->readlock))
549 return -EINTR;
550
f55bb7f9
PE
551 sk->sk_peek_off = val;
552 mutex_unlock(&u->readlock);
12663bfc
SL
553
554 return 0;
f55bb7f9
PE
555}
556
557
90ddc4f0 558static const struct proto_ops unix_stream_ops = {
1da177e4
LT
559 .family = PF_UNIX,
560 .owner = THIS_MODULE,
561 .release = unix_release,
562 .bind = unix_bind,
563 .connect = unix_stream_connect,
564 .socketpair = unix_socketpair,
565 .accept = unix_accept,
566 .getname = unix_getname,
567 .poll = unix_poll,
568 .ioctl = unix_ioctl,
569 .listen = unix_listen,
570 .shutdown = unix_shutdown,
571 .setsockopt = sock_no_setsockopt,
572 .getsockopt = sock_no_getsockopt,
573 .sendmsg = unix_stream_sendmsg,
574 .recvmsg = unix_stream_recvmsg,
575 .mmap = sock_no_mmap,
869e7c62 576 .sendpage = unix_stream_sendpage,
2b514574 577 .splice_read = unix_stream_splice_read,
fc0d7536 578 .set_peek_off = unix_set_peek_off,
1da177e4
LT
579};
580
90ddc4f0 581static const struct proto_ops unix_dgram_ops = {
1da177e4
LT
582 .family = PF_UNIX,
583 .owner = THIS_MODULE,
584 .release = unix_release,
585 .bind = unix_bind,
586 .connect = unix_dgram_connect,
587 .socketpair = unix_socketpair,
588 .accept = sock_no_accept,
589 .getname = unix_getname,
ec0d215f 590 .poll = unix_dgram_poll,
1da177e4
LT
591 .ioctl = unix_ioctl,
592 .listen = sock_no_listen,
593 .shutdown = unix_shutdown,
594 .setsockopt = sock_no_setsockopt,
595 .getsockopt = sock_no_getsockopt,
596 .sendmsg = unix_dgram_sendmsg,
597 .recvmsg = unix_dgram_recvmsg,
598 .mmap = sock_no_mmap,
599 .sendpage = sock_no_sendpage,
f55bb7f9 600 .set_peek_off = unix_set_peek_off,
1da177e4
LT
601};
602
90ddc4f0 603static const struct proto_ops unix_seqpacket_ops = {
1da177e4
LT
604 .family = PF_UNIX,
605 .owner = THIS_MODULE,
606 .release = unix_release,
607 .bind = unix_bind,
608 .connect = unix_stream_connect,
609 .socketpair = unix_socketpair,
610 .accept = unix_accept,
611 .getname = unix_getname,
ec0d215f 612 .poll = unix_dgram_poll,
1da177e4
LT
613 .ioctl = unix_ioctl,
614 .listen = unix_listen,
615 .shutdown = unix_shutdown,
616 .setsockopt = sock_no_setsockopt,
617 .getsockopt = sock_no_getsockopt,
618 .sendmsg = unix_seqpacket_sendmsg,
a05d2ad1 619 .recvmsg = unix_seqpacket_recvmsg,
1da177e4
LT
620 .mmap = sock_no_mmap,
621 .sendpage = sock_no_sendpage,
f55bb7f9 622 .set_peek_off = unix_set_peek_off,
1da177e4
LT
623};
624
625static struct proto unix_proto = {
248969ae
ED
626 .name = "UNIX",
627 .owner = THIS_MODULE,
248969ae 628 .obj_size = sizeof(struct unix_sock),
1da177e4
LT
629};
630
a09785a2
IM
631/*
632 * AF_UNIX sockets do not interact with hardware, hence they
633 * dont trigger interrupts - so it's safe for them to have
634 * bh-unsafe locking for their sk_receive_queue.lock. Split off
635 * this special lock-class by reinitializing the spinlock key:
636 */
637static struct lock_class_key af_unix_sk_receive_queue_lock_key;
638
11aa9c28 639static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
1da177e4
LT
640{
641 struct sock *sk = NULL;
642 struct unix_sock *u;
643
518de9b3
ED
644 atomic_long_inc(&unix_nr_socks);
645 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
1da177e4
LT
646 goto out;
647
11aa9c28 648 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
1da177e4
LT
649 if (!sk)
650 goto out;
651
6eba6a37 652 sock_init_data(sock, sk);
a09785a2
IM
653 lockdep_set_class(&sk->sk_receive_queue.lock,
654 &af_unix_sk_receive_queue_lock_key);
1da177e4
LT
655
656 sk->sk_write_space = unix_write_space;
a0a53c8b 657 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
1da177e4
LT
658 sk->sk_destruct = unix_sock_destructor;
659 u = unix_sk(sk);
40ffe67d
AV
660 u->path.dentry = NULL;
661 u->path.mnt = NULL;
fd19f329 662 spin_lock_init(&u->lock);
516e0cc5 663 atomic_long_set(&u->inflight, 0);
1fd05ba5 664 INIT_LIST_HEAD(&u->link);
57b47a53 665 mutex_init(&u->readlock); /* single task reading lock */
1da177e4 666 init_waitqueue_head(&u->peer_wait);
7123aaa3 667 unix_insert_socket(unix_sockets_unbound(sk), sk);
1da177e4 668out:
284b327b 669 if (sk == NULL)
518de9b3 670 atomic_long_dec(&unix_nr_socks);
920de804
ED
671 else {
672 local_bh_disable();
a8076d8d 673 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
920de804
ED
674 local_bh_enable();
675 }
1da177e4
LT
676 return sk;
677}
678
3f378b68
EP
679static int unix_create(struct net *net, struct socket *sock, int protocol,
680 int kern)
1da177e4
LT
681{
682 if (protocol && protocol != PF_UNIX)
683 return -EPROTONOSUPPORT;
684
685 sock->state = SS_UNCONNECTED;
686
687 switch (sock->type) {
688 case SOCK_STREAM:
689 sock->ops = &unix_stream_ops;
690 break;
691 /*
692 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
693 * nothing uses it.
694 */
695 case SOCK_RAW:
e27dfcea 696 sock->type = SOCK_DGRAM;
1da177e4
LT
697 case SOCK_DGRAM:
698 sock->ops = &unix_dgram_ops;
699 break;
700 case SOCK_SEQPACKET:
701 sock->ops = &unix_seqpacket_ops;
702 break;
703 default:
704 return -ESOCKTNOSUPPORT;
705 }
706
11aa9c28 707 return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
1da177e4
LT
708}
709
710static int unix_release(struct socket *sock)
711{
712 struct sock *sk = sock->sk;
713
714 if (!sk)
715 return 0;
716
ded34e0f 717 unix_release_sock(sk, 0);
1da177e4
LT
718 sock->sk = NULL;
719
ded34e0f 720 return 0;
1da177e4
LT
721}
722
723static int unix_autobind(struct socket *sock)
724{
725 struct sock *sk = sock->sk;
3b1e0a65 726 struct net *net = sock_net(sk);
1da177e4
LT
727 struct unix_sock *u = unix_sk(sk);
728 static u32 ordernum = 1;
6eba6a37 729 struct unix_address *addr;
1da177e4 730 int err;
8df73ff9 731 unsigned int retries = 0;
1da177e4 732
37ab4fa7
SL
733 err = mutex_lock_interruptible(&u->readlock);
734 if (err)
735 return err;
1da177e4
LT
736
737 err = 0;
738 if (u->addr)
739 goto out;
740
741 err = -ENOMEM;
0da974f4 742 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
1da177e4
LT
743 if (!addr)
744 goto out;
745
1da177e4
LT
746 addr->name->sun_family = AF_UNIX;
747 atomic_set(&addr->refcnt, 1);
748
749retry:
750 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
07f0757a 751 addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
1da177e4 752
fbe9cc4a 753 spin_lock(&unix_table_lock);
1da177e4
LT
754 ordernum = (ordernum+1)&0xFFFFF;
755
097e66c5 756 if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
1da177e4 757 addr->hash)) {
fbe9cc4a 758 spin_unlock(&unix_table_lock);
8df73ff9
TH
759 /*
760 * __unix_find_socket_byname() may take long time if many names
761 * are already in use.
762 */
763 cond_resched();
764 /* Give up if all names seems to be in use. */
765 if (retries++ == 0xFFFFF) {
766 err = -ENOSPC;
767 kfree(addr);
768 goto out;
769 }
1da177e4
LT
770 goto retry;
771 }
772 addr->hash ^= sk->sk_type;
773
774 __unix_remove_socket(sk);
775 u->addr = addr;
776 __unix_insert_socket(&unix_socket_table[addr->hash], sk);
fbe9cc4a 777 spin_unlock(&unix_table_lock);
1da177e4
LT
778 err = 0;
779
57b47a53 780out: mutex_unlock(&u->readlock);
1da177e4
LT
781 return err;
782}
783
097e66c5
DL
784static struct sock *unix_find_other(struct net *net,
785 struct sockaddr_un *sunname, int len,
95c96174 786 int type, unsigned int hash, int *error)
1da177e4
LT
787{
788 struct sock *u;
421748ec 789 struct path path;
1da177e4 790 int err = 0;
ac7bfa62 791
1da177e4 792 if (sunname->sun_path[0]) {
421748ec
AV
793 struct inode *inode;
794 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
1da177e4
LT
795 if (err)
796 goto fail;
a25b376b 797 inode = d_backing_inode(path.dentry);
421748ec 798 err = inode_permission(inode, MAY_WRITE);
1da177e4
LT
799 if (err)
800 goto put_fail;
801
802 err = -ECONNREFUSED;
421748ec 803 if (!S_ISSOCK(inode->i_mode))
1da177e4 804 goto put_fail;
6616f788 805 u = unix_find_socket_byinode(inode);
1da177e4
LT
806 if (!u)
807 goto put_fail;
808
809 if (u->sk_type == type)
68ac1234 810 touch_atime(&path);
1da177e4 811
421748ec 812 path_put(&path);
1da177e4 813
e27dfcea 814 err = -EPROTOTYPE;
1da177e4
LT
815 if (u->sk_type != type) {
816 sock_put(u);
817 goto fail;
818 }
819 } else {
820 err = -ECONNREFUSED;
e27dfcea 821 u = unix_find_socket_byname(net, sunname, len, type, hash);
1da177e4
LT
822 if (u) {
823 struct dentry *dentry;
40ffe67d 824 dentry = unix_sk(u)->path.dentry;
1da177e4 825 if (dentry)
68ac1234 826 touch_atime(&unix_sk(u)->path);
1da177e4
LT
827 } else
828 goto fail;
829 }
830 return u;
831
832put_fail:
421748ec 833 path_put(&path);
1da177e4 834fail:
e27dfcea 835 *error = err;
1da177e4
LT
836 return NULL;
837}
838
faf02010
AV
839static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
840{
841 struct dentry *dentry;
842 struct path path;
843 int err = 0;
844 /*
845 * Get the parent directory, calculate the hash for last
846 * component.
847 */
848 dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
849 err = PTR_ERR(dentry);
850 if (IS_ERR(dentry))
851 return err;
852
853 /*
854 * All right, let's create it.
855 */
856 err = security_path_mknod(&path, dentry, mode, 0);
857 if (!err) {
ee8ac4d6 858 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
faf02010
AV
859 if (!err) {
860 res->mnt = mntget(path.mnt);
861 res->dentry = dget(dentry);
862 }
863 }
864 done_path_create(&path, dentry);
865 return err;
866}
1da177e4
LT
867
868static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
869{
870 struct sock *sk = sock->sk;
3b1e0a65 871 struct net *net = sock_net(sk);
1da177e4 872 struct unix_sock *u = unix_sk(sk);
e27dfcea 873 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
dae6ad8f 874 char *sun_path = sunaddr->sun_path;
1da177e4 875 int err;
95c96174 876 unsigned int hash;
1da177e4
LT
877 struct unix_address *addr;
878 struct hlist_head *list;
879
880 err = -EINVAL;
881 if (sunaddr->sun_family != AF_UNIX)
882 goto out;
883
e27dfcea 884 if (addr_len == sizeof(short)) {
1da177e4
LT
885 err = unix_autobind(sock);
886 goto out;
887 }
888
889 err = unix_mkname(sunaddr, addr_len, &hash);
890 if (err < 0)
891 goto out;
892 addr_len = err;
893
37ab4fa7
SL
894 err = mutex_lock_interruptible(&u->readlock);
895 if (err)
896 goto out;
1da177e4
LT
897
898 err = -EINVAL;
899 if (u->addr)
900 goto out_up;
901
902 err = -ENOMEM;
903 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
904 if (!addr)
905 goto out_up;
906
907 memcpy(addr->name, sunaddr, addr_len);
908 addr->len = addr_len;
909 addr->hash = hash ^ sk->sk_type;
910 atomic_set(&addr->refcnt, 1);
911
dae6ad8f 912 if (sun_path[0]) {
faf02010
AV
913 struct path path;
914 umode_t mode = S_IFSOCK |
ce3b0f8d 915 (SOCK_INODE(sock)->i_mode & ~current_umask());
faf02010
AV
916 err = unix_mknod(sun_path, mode, &path);
917 if (err) {
918 if (err == -EEXIST)
919 err = -EADDRINUSE;
920 unix_release_addr(addr);
921 goto out_up;
922 }
1da177e4 923 addr->hash = UNIX_HASH_SIZE;
a25b376b 924 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE-1);
faf02010
AV
925 spin_lock(&unix_table_lock);
926 u->path = path;
927 list = &unix_socket_table[hash];
928 } else {
929 spin_lock(&unix_table_lock);
1da177e4 930 err = -EADDRINUSE;
097e66c5 931 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1da177e4
LT
932 sk->sk_type, hash)) {
933 unix_release_addr(addr);
934 goto out_unlock;
935 }
936
937 list = &unix_socket_table[addr->hash];
1da177e4
LT
938 }
939
940 err = 0;
941 __unix_remove_socket(sk);
942 u->addr = addr;
943 __unix_insert_socket(list, sk);
944
945out_unlock:
fbe9cc4a 946 spin_unlock(&unix_table_lock);
1da177e4 947out_up:
57b47a53 948 mutex_unlock(&u->readlock);
1da177e4
LT
949out:
950 return err;
1da177e4
LT
951}
952
278a3de5
DM
953static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
954{
955 if (unlikely(sk1 == sk2) || !sk2) {
956 unix_state_lock(sk1);
957 return;
958 }
959 if (sk1 < sk2) {
960 unix_state_lock(sk1);
961 unix_state_lock_nested(sk2);
962 } else {
963 unix_state_lock(sk2);
964 unix_state_lock_nested(sk1);
965 }
966}
967
968static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
969{
970 if (unlikely(sk1 == sk2) || !sk2) {
971 unix_state_unlock(sk1);
972 return;
973 }
974 unix_state_unlock(sk1);
975 unix_state_unlock(sk2);
976}
977
1da177e4
LT
978static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
979 int alen, int flags)
980{
981 struct sock *sk = sock->sk;
3b1e0a65 982 struct net *net = sock_net(sk);
e27dfcea 983 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1da177e4 984 struct sock *other;
95c96174 985 unsigned int hash;
1da177e4
LT
986 int err;
987
988 if (addr->sa_family != AF_UNSPEC) {
989 err = unix_mkname(sunaddr, alen, &hash);
990 if (err < 0)
991 goto out;
992 alen = err;
993
994 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
995 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
996 goto out;
997
278a3de5 998restart:
e27dfcea 999 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1da177e4
LT
1000 if (!other)
1001 goto out;
1002
278a3de5
DM
1003 unix_state_double_lock(sk, other);
1004
1005 /* Apparently VFS overslept socket death. Retry. */
1006 if (sock_flag(other, SOCK_DEAD)) {
1007 unix_state_double_unlock(sk, other);
1008 sock_put(other);
1009 goto restart;
1010 }
1da177e4
LT
1011
1012 err = -EPERM;
1013 if (!unix_may_send(sk, other))
1014 goto out_unlock;
1015
1016 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1017 if (err)
1018 goto out_unlock;
1019
1020 } else {
1021 /*
1022 * 1003.1g breaking connected state with AF_UNSPEC
1023 */
1024 other = NULL;
278a3de5 1025 unix_state_double_lock(sk, other);
1da177e4
LT
1026 }
1027
1028 /*
1029 * If it was connected, reconnect.
1030 */
1031 if (unix_peer(sk)) {
1032 struct sock *old_peer = unix_peer(sk);
e27dfcea 1033 unix_peer(sk) = other;
278a3de5 1034 unix_state_double_unlock(sk, other);
1da177e4
LT
1035
1036 if (other != old_peer)
1037 unix_dgram_disconnected(sk, old_peer);
1038 sock_put(old_peer);
1039 } else {
e27dfcea 1040 unix_peer(sk) = other;
278a3de5 1041 unix_state_double_unlock(sk, other);
1da177e4 1042 }
ac7bfa62 1043 return 0;
1da177e4
LT
1044
1045out_unlock:
278a3de5 1046 unix_state_double_unlock(sk, other);
1da177e4
LT
1047 sock_put(other);
1048out:
1049 return err;
1050}
1051
1052static long unix_wait_for_peer(struct sock *other, long timeo)
1053{
1054 struct unix_sock *u = unix_sk(other);
1055 int sched;
1056 DEFINE_WAIT(wait);
1057
1058 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1059
1060 sched = !sock_flag(other, SOCK_DEAD) &&
1061 !(other->sk_shutdown & RCV_SHUTDOWN) &&
3c73419c 1062 unix_recvq_full(other);
1da177e4 1063
1c92b4e5 1064 unix_state_unlock(other);
1da177e4
LT
1065
1066 if (sched)
1067 timeo = schedule_timeout(timeo);
1068
1069 finish_wait(&u->peer_wait, &wait);
1070 return timeo;
1071}
1072
1073static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1074 int addr_len, int flags)
1075{
e27dfcea 1076 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1da177e4 1077 struct sock *sk = sock->sk;
3b1e0a65 1078 struct net *net = sock_net(sk);
1da177e4
LT
1079 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1080 struct sock *newsk = NULL;
1081 struct sock *other = NULL;
1082 struct sk_buff *skb = NULL;
95c96174 1083 unsigned int hash;
1da177e4
LT
1084 int st;
1085 int err;
1086 long timeo;
1087
1088 err = unix_mkname(sunaddr, addr_len, &hash);
1089 if (err < 0)
1090 goto out;
1091 addr_len = err;
1092
f64f9e71
JP
1093 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1094 (err = unix_autobind(sock)) != 0)
1da177e4
LT
1095 goto out;
1096
1097 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1098
1099 /* First of all allocate resources.
1100 If we will make it after state is locked,
1101 we will have to recheck all again in any case.
1102 */
1103
1104 err = -ENOMEM;
1105
1106 /* create new sock for complete connection */
11aa9c28 1107 newsk = unix_create1(sock_net(sk), NULL, 0);
1da177e4
LT
1108 if (newsk == NULL)
1109 goto out;
1110
1111 /* Allocate skb for sending to listening sock */
1112 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1113 if (skb == NULL)
1114 goto out;
1115
1116restart:
1117 /* Find listening sock. */
097e66c5 1118 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1da177e4
LT
1119 if (!other)
1120 goto out;
1121
1122 /* Latch state of peer */
1c92b4e5 1123 unix_state_lock(other);
1da177e4
LT
1124
1125 /* Apparently VFS overslept socket death. Retry. */
1126 if (sock_flag(other, SOCK_DEAD)) {
1c92b4e5 1127 unix_state_unlock(other);
1da177e4
LT
1128 sock_put(other);
1129 goto restart;
1130 }
1131
1132 err = -ECONNREFUSED;
1133 if (other->sk_state != TCP_LISTEN)
1134 goto out_unlock;
77238f2b
TS
1135 if (other->sk_shutdown & RCV_SHUTDOWN)
1136 goto out_unlock;
1da177e4 1137
3c73419c 1138 if (unix_recvq_full(other)) {
1da177e4
LT
1139 err = -EAGAIN;
1140 if (!timeo)
1141 goto out_unlock;
1142
1143 timeo = unix_wait_for_peer(other, timeo);
1144
1145 err = sock_intr_errno(timeo);
1146 if (signal_pending(current))
1147 goto out;
1148 sock_put(other);
1149 goto restart;
ac7bfa62 1150 }
1da177e4
LT
1151
1152 /* Latch our state.
1153
e5537bfc 1154 It is tricky place. We need to grab our state lock and cannot
1da177e4
LT
1155 drop lock on peer. It is dangerous because deadlock is
1156 possible. Connect to self case and simultaneous
1157 attempt to connect are eliminated by checking socket
1158 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1159 check this before attempt to grab lock.
1160
1161 Well, and we have to recheck the state after socket locked.
1162 */
1163 st = sk->sk_state;
1164
1165 switch (st) {
1166 case TCP_CLOSE:
1167 /* This is ok... continue with connect */
1168 break;
1169 case TCP_ESTABLISHED:
1170 /* Socket is already connected */
1171 err = -EISCONN;
1172 goto out_unlock;
1173 default:
1174 err = -EINVAL;
1175 goto out_unlock;
1176 }
1177
1c92b4e5 1178 unix_state_lock_nested(sk);
1da177e4
LT
1179
1180 if (sk->sk_state != st) {
1c92b4e5
DM
1181 unix_state_unlock(sk);
1182 unix_state_unlock(other);
1da177e4
LT
1183 sock_put(other);
1184 goto restart;
1185 }
1186
3610cda5 1187 err = security_unix_stream_connect(sk, other, newsk);
1da177e4 1188 if (err) {
1c92b4e5 1189 unix_state_unlock(sk);
1da177e4
LT
1190 goto out_unlock;
1191 }
1192
1193 /* The way is open! Fastly set all the necessary fields... */
1194
1195 sock_hold(sk);
1196 unix_peer(newsk) = sk;
1197 newsk->sk_state = TCP_ESTABLISHED;
1198 newsk->sk_type = sk->sk_type;
109f6e39 1199 init_peercred(newsk);
1da177e4 1200 newu = unix_sk(newsk);
eaefd110 1201 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1da177e4
LT
1202 otheru = unix_sk(other);
1203
1204 /* copy address information from listening to new sock*/
1205 if (otheru->addr) {
1206 atomic_inc(&otheru->addr->refcnt);
1207 newu->addr = otheru->addr;
1208 }
40ffe67d
AV
1209 if (otheru->path.dentry) {
1210 path_get(&otheru->path);
1211 newu->path = otheru->path;
1da177e4
LT
1212 }
1213
1214 /* Set credentials */
109f6e39 1215 copy_peercred(sk, other);
1da177e4 1216
1da177e4
LT
1217 sock->state = SS_CONNECTED;
1218 sk->sk_state = TCP_ESTABLISHED;
830a1e5c
BL
1219 sock_hold(newsk);
1220
4e857c58 1221 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
830a1e5c 1222 unix_peer(sk) = newsk;
1da177e4 1223
1c92b4e5 1224 unix_state_unlock(sk);
1da177e4
LT
1225
1226 /* take ten and and send info to listening sock */
1227 spin_lock(&other->sk_receive_queue.lock);
1228 __skb_queue_tail(&other->sk_receive_queue, skb);
1da177e4 1229 spin_unlock(&other->sk_receive_queue.lock);
1c92b4e5 1230 unix_state_unlock(other);
676d2369 1231 other->sk_data_ready(other);
1da177e4
LT
1232 sock_put(other);
1233 return 0;
1234
1235out_unlock:
1236 if (other)
1c92b4e5 1237 unix_state_unlock(other);
1da177e4
LT
1238
1239out:
40d44446 1240 kfree_skb(skb);
1da177e4
LT
1241 if (newsk)
1242 unix_release_sock(newsk, 0);
1243 if (other)
1244 sock_put(other);
1245 return err;
1246}
1247
1248static int unix_socketpair(struct socket *socka, struct socket *sockb)
1249{
e27dfcea 1250 struct sock *ska = socka->sk, *skb = sockb->sk;
1da177e4
LT
1251
1252 /* Join our sockets back to back */
1253 sock_hold(ska);
1254 sock_hold(skb);
e27dfcea
JK
1255 unix_peer(ska) = skb;
1256 unix_peer(skb) = ska;
109f6e39
EB
1257 init_peercred(ska);
1258 init_peercred(skb);
1da177e4
LT
1259
1260 if (ska->sk_type != SOCK_DGRAM) {
1261 ska->sk_state = TCP_ESTABLISHED;
1262 skb->sk_state = TCP_ESTABLISHED;
1263 socka->state = SS_CONNECTED;
1264 sockb->state = SS_CONNECTED;
1265 }
1266 return 0;
1267}
1268
90c6bd34
DB
1269static void unix_sock_inherit_flags(const struct socket *old,
1270 struct socket *new)
1271{
1272 if (test_bit(SOCK_PASSCRED, &old->flags))
1273 set_bit(SOCK_PASSCRED, &new->flags);
1274 if (test_bit(SOCK_PASSSEC, &old->flags))
1275 set_bit(SOCK_PASSSEC, &new->flags);
1276}
1277
1da177e4
LT
1278static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1279{
1280 struct sock *sk = sock->sk;
1281 struct sock *tsk;
1282 struct sk_buff *skb;
1283 int err;
1284
1285 err = -EOPNOTSUPP;
6eba6a37 1286 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1da177e4
LT
1287 goto out;
1288
1289 err = -EINVAL;
1290 if (sk->sk_state != TCP_LISTEN)
1291 goto out;
1292
1293 /* If socket state is TCP_LISTEN it cannot change (for now...),
1294 * so that no locks are necessary.
1295 */
1296
1297 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1298 if (!skb) {
1299 /* This means receive shutdown. */
1300 if (err == 0)
1301 err = -EINVAL;
1302 goto out;
1303 }
1304
1305 tsk = skb->sk;
1306 skb_free_datagram(sk, skb);
1307 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1308
1309 /* attach accepted sock to socket */
1c92b4e5 1310 unix_state_lock(tsk);
1da177e4 1311 newsock->state = SS_CONNECTED;
90c6bd34 1312 unix_sock_inherit_flags(sock, newsock);
1da177e4 1313 sock_graft(tsk, newsock);
1c92b4e5 1314 unix_state_unlock(tsk);
1da177e4
LT
1315 return 0;
1316
1317out:
1318 return err;
1319}
1320
1321
1322static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1323{
1324 struct sock *sk = sock->sk;
1325 struct unix_sock *u;
13cfa97b 1326 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1da177e4
LT
1327 int err = 0;
1328
1329 if (peer) {
1330 sk = unix_peer_get(sk);
1331
1332 err = -ENOTCONN;
1333 if (!sk)
1334 goto out;
1335 err = 0;
1336 } else {
1337 sock_hold(sk);
1338 }
1339
1340 u = unix_sk(sk);
1c92b4e5 1341 unix_state_lock(sk);
1da177e4
LT
1342 if (!u->addr) {
1343 sunaddr->sun_family = AF_UNIX;
1344 sunaddr->sun_path[0] = 0;
1345 *uaddr_len = sizeof(short);
1346 } else {
1347 struct unix_address *addr = u->addr;
1348
1349 *uaddr_len = addr->len;
1350 memcpy(sunaddr, addr->name, *uaddr_len);
1351 }
1c92b4e5 1352 unix_state_unlock(sk);
1da177e4
LT
1353 sock_put(sk);
1354out:
1355 return err;
1356}
1357
1358static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1359{
1360 int i;
1361
1362 scm->fp = UNIXCB(skb).fp;
1da177e4
LT
1363 UNIXCB(skb).fp = NULL;
1364
6eba6a37 1365 for (i = scm->fp->count-1; i >= 0; i--)
1da177e4
LT
1366 unix_notinflight(scm->fp->fp[i]);
1367}
1368
7361c36c 1369static void unix_destruct_scm(struct sk_buff *skb)
1da177e4
LT
1370{
1371 struct scm_cookie scm;
1372 memset(&scm, 0, sizeof(scm));
7361c36c 1373 scm.pid = UNIXCB(skb).pid;
7361c36c
EB
1374 if (UNIXCB(skb).fp)
1375 unix_detach_fds(&scm, skb);
1da177e4
LT
1376
1377 /* Alas, it calls VFS */
1378 /* So fscking what? fput() had been SMP-safe since the last Summer */
1379 scm_destroy(&scm);
1380 sock_wfree(skb);
1381}
1382
25888e30
ED
1383#define MAX_RECURSION_LEVEL 4
1384
6209344f 1385static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1da177e4
LT
1386{
1387 int i;
25888e30
ED
1388 unsigned char max_level = 0;
1389 int unix_sock_count = 0;
1390
1391 for (i = scm->fp->count - 1; i >= 0; i--) {
1392 struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1393
1394 if (sk) {
1395 unix_sock_count++;
1396 max_level = max(max_level,
1397 unix_sk(sk)->recursion_level);
1398 }
1399 }
1400 if (unlikely(max_level > MAX_RECURSION_LEVEL))
1401 return -ETOOMANYREFS;
6209344f
MS
1402
1403 /*
1404 * Need to duplicate file references for the sake of garbage
1405 * collection. Otherwise a socket in the fps might become a
1406 * candidate for GC while the skb is not yet queued.
1407 */
1408 UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1409 if (!UNIXCB(skb).fp)
1410 return -ENOMEM;
1411
25888e30
ED
1412 if (unix_sock_count) {
1413 for (i = scm->fp->count - 1; i >= 0; i--)
1414 unix_inflight(scm->fp->fp[i]);
1415 }
1416 return max_level;
1da177e4
LT
1417}
1418
f78a5fda 1419static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
7361c36c
EB
1420{
1421 int err = 0;
16e57262 1422
f78a5fda 1423 UNIXCB(skb).pid = get_pid(scm->pid);
6b0ee8c0
EB
1424 UNIXCB(skb).uid = scm->creds.uid;
1425 UNIXCB(skb).gid = scm->creds.gid;
7361c36c 1426 UNIXCB(skb).fp = NULL;
37a9a8df 1427 unix_get_secdata(scm, skb);
7361c36c
EB
1428 if (scm->fp && send_fds)
1429 err = unix_attach_fds(scm, skb);
1430
1431 skb->destructor = unix_destruct_scm;
1432 return err;
1433}
1434
16e57262
ED
1435/*
1436 * Some apps rely on write() giving SCM_CREDENTIALS
1437 * We include credentials if source or destination socket
1438 * asserted SOCK_PASSCRED.
1439 */
1440static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1441 const struct sock *other)
1442{
6b0ee8c0 1443 if (UNIXCB(skb).pid)
16e57262
ED
1444 return;
1445 if (test_bit(SOCK_PASSCRED, &sock->flags) ||
25da0e3e
EB
1446 !other->sk_socket ||
1447 test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
16e57262 1448 UNIXCB(skb).pid = get_pid(task_tgid(current));
6e0895c2 1449 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
16e57262
ED
1450 }
1451}
1452
1da177e4
LT
1453/*
1454 * Send AF_UNIX data.
1455 */
1456
1b784140
YX
1457static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1458 size_t len)
1da177e4 1459{
1da177e4 1460 struct sock *sk = sock->sk;
3b1e0a65 1461 struct net *net = sock_net(sk);
1da177e4 1462 struct unix_sock *u = unix_sk(sk);
342dfc30 1463 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1da177e4
LT
1464 struct sock *other = NULL;
1465 int namelen = 0; /* fake GCC */
1466 int err;
95c96174 1467 unsigned int hash;
f78a5fda 1468 struct sk_buff *skb;
1da177e4 1469 long timeo;
7cc05662 1470 struct scm_cookie scm;
25888e30 1471 int max_level;
eb6a2481 1472 int data_len = 0;
1da177e4 1473
5f23b734 1474 wait_for_unix_gc();
7cc05662 1475 err = scm_send(sock, msg, &scm, false);
1da177e4
LT
1476 if (err < 0)
1477 return err;
1478
1479 err = -EOPNOTSUPP;
1480 if (msg->msg_flags&MSG_OOB)
1481 goto out;
1482
1483 if (msg->msg_namelen) {
1484 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1485 if (err < 0)
1486 goto out;
1487 namelen = err;
1488 } else {
1489 sunaddr = NULL;
1490 err = -ENOTCONN;
1491 other = unix_peer_get(sk);
1492 if (!other)
1493 goto out;
1494 }
1495
f64f9e71
JP
1496 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1497 && (err = unix_autobind(sock)) != 0)
1da177e4
LT
1498 goto out;
1499
1500 err = -EMSGSIZE;
1501 if (len > sk->sk_sndbuf - 32)
1502 goto out;
1503
31ff6aa5 1504 if (len > SKB_MAX_ALLOC) {
eb6a2481
ED
1505 data_len = min_t(size_t,
1506 len - SKB_MAX_ALLOC,
1507 MAX_SKB_FRAGS * PAGE_SIZE);
31ff6aa5
KT
1508 data_len = PAGE_ALIGN(data_len);
1509
1510 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1511 }
eb6a2481
ED
1512
1513 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
28d64271
ED
1514 msg->msg_flags & MSG_DONTWAIT, &err,
1515 PAGE_ALLOC_COSTLY_ORDER);
e27dfcea 1516 if (skb == NULL)
1da177e4
LT
1517 goto out;
1518
7cc05662 1519 err = unix_scm_to_skb(&scm, skb, true);
25888e30 1520 if (err < 0)
7361c36c 1521 goto out_free;
25888e30 1522 max_level = err + 1;
877ce7c1 1523
eb6a2481
ED
1524 skb_put(skb, len - data_len);
1525 skb->data_len = data_len;
1526 skb->len = len;
c0371da6 1527 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1da177e4
LT
1528 if (err)
1529 goto out_free;
1530
1531 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1532
1533restart:
1534 if (!other) {
1535 err = -ECONNRESET;
1536 if (sunaddr == NULL)
1537 goto out_free;
1538
097e66c5 1539 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1da177e4 1540 hash, &err);
e27dfcea 1541 if (other == NULL)
1da177e4
LT
1542 goto out_free;
1543 }
1544
d6ae3bae
AC
1545 if (sk_filter(other, skb) < 0) {
1546 /* Toss the packet but do not return any error to the sender */
1547 err = len;
1548 goto out_free;
1549 }
1550
1c92b4e5 1551 unix_state_lock(other);
1da177e4
LT
1552 err = -EPERM;
1553 if (!unix_may_send(sk, other))
1554 goto out_unlock;
1555
1556 if (sock_flag(other, SOCK_DEAD)) {
1557 /*
1558 * Check with 1003.1g - what should
1559 * datagram error
1560 */
1c92b4e5 1561 unix_state_unlock(other);
1da177e4
LT
1562 sock_put(other);
1563
1564 err = 0;
1c92b4e5 1565 unix_state_lock(sk);
1da177e4 1566 if (unix_peer(sk) == other) {
e27dfcea 1567 unix_peer(sk) = NULL;
1c92b4e5 1568 unix_state_unlock(sk);
1da177e4
LT
1569
1570 unix_dgram_disconnected(sk, other);
1571 sock_put(other);
1572 err = -ECONNREFUSED;
1573 } else {
1c92b4e5 1574 unix_state_unlock(sk);
1da177e4
LT
1575 }
1576
1577 other = NULL;
1578 if (err)
1579 goto out_free;
1580 goto restart;
1581 }
1582
1583 err = -EPIPE;
1584 if (other->sk_shutdown & RCV_SHUTDOWN)
1585 goto out_unlock;
1586
1587 if (sk->sk_type != SOCK_SEQPACKET) {
1588 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1589 if (err)
1590 goto out_unlock;
1591 }
1592
3c73419c 1593 if (unix_peer(other) != sk && unix_recvq_full(other)) {
1da177e4
LT
1594 if (!timeo) {
1595 err = -EAGAIN;
1596 goto out_unlock;
1597 }
1598
1599 timeo = unix_wait_for_peer(other, timeo);
1600
1601 err = sock_intr_errno(timeo);
1602 if (signal_pending(current))
1603 goto out_free;
1604
1605 goto restart;
1606 }
1607
3f66116e
AC
1608 if (sock_flag(other, SOCK_RCVTSTAMP))
1609 __net_timestamp(skb);
16e57262 1610 maybe_add_creds(skb, sock, other);
1da177e4 1611 skb_queue_tail(&other->sk_receive_queue, skb);
25888e30
ED
1612 if (max_level > unix_sk(other)->recursion_level)
1613 unix_sk(other)->recursion_level = max_level;
1c92b4e5 1614 unix_state_unlock(other);
676d2369 1615 other->sk_data_ready(other);
1da177e4 1616 sock_put(other);
7cc05662 1617 scm_destroy(&scm);
1da177e4
LT
1618 return len;
1619
1620out_unlock:
1c92b4e5 1621 unix_state_unlock(other);
1da177e4
LT
1622out_free:
1623 kfree_skb(skb);
1624out:
1625 if (other)
1626 sock_put(other);
7cc05662 1627 scm_destroy(&scm);
1da177e4
LT
1628 return err;
1629}
1630
e370a723
ED
1631/* We use paged skbs for stream sockets, and limit occupancy to 32768
1632 * bytes, and a minimun of a full page.
1633 */
1634#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
ac7bfa62 1635
1b784140
YX
1636static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1637 size_t len)
1da177e4 1638{
1da177e4
LT
1639 struct sock *sk = sock->sk;
1640 struct sock *other = NULL;
6eba6a37 1641 int err, size;
f78a5fda 1642 struct sk_buff *skb;
e27dfcea 1643 int sent = 0;
7cc05662 1644 struct scm_cookie scm;
8ba69ba6 1645 bool fds_sent = false;
25888e30 1646 int max_level;
e370a723 1647 int data_len;
1da177e4 1648
5f23b734 1649 wait_for_unix_gc();
7cc05662 1650 err = scm_send(sock, msg, &scm, false);
1da177e4
LT
1651 if (err < 0)
1652 return err;
1653
1654 err = -EOPNOTSUPP;
1655 if (msg->msg_flags&MSG_OOB)
1656 goto out_err;
1657
1658 if (msg->msg_namelen) {
1659 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1660 goto out_err;
1661 } else {
1da177e4 1662 err = -ENOTCONN;
830a1e5c 1663 other = unix_peer(sk);
1da177e4
LT
1664 if (!other)
1665 goto out_err;
1666 }
1667
1668 if (sk->sk_shutdown & SEND_SHUTDOWN)
1669 goto pipe_err;
1670
6eba6a37 1671 while (sent < len) {
e370a723 1672 size = len - sent;
1da177e4
LT
1673
1674 /* Keep two messages in the pipe so it schedules better */
e370a723 1675 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1da177e4 1676
e370a723
ED
1677 /* allow fallback to order-0 allocations */
1678 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
ac7bfa62 1679
e370a723 1680 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1da177e4 1681
31ff6aa5
KT
1682 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1683
e370a723 1684 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
28d64271
ED
1685 msg->msg_flags & MSG_DONTWAIT, &err,
1686 get_order(UNIX_SKB_FRAGS_SZ));
e370a723 1687 if (!skb)
1da177e4
LT
1688 goto out_err;
1689
f78a5fda 1690 /* Only send the fds in the first buffer */
7cc05662 1691 err = unix_scm_to_skb(&scm, skb, !fds_sent);
25888e30 1692 if (err < 0) {
7361c36c 1693 kfree_skb(skb);
f78a5fda 1694 goto out_err;
6209344f 1695 }
25888e30 1696 max_level = err + 1;
7361c36c 1697 fds_sent = true;
1da177e4 1698
e370a723
ED
1699 skb_put(skb, size - data_len);
1700 skb->data_len = data_len;
1701 skb->len = size;
c0371da6 1702 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
6eba6a37 1703 if (err) {
1da177e4 1704 kfree_skb(skb);
f78a5fda 1705 goto out_err;
1da177e4
LT
1706 }
1707
1c92b4e5 1708 unix_state_lock(other);
1da177e4
LT
1709
1710 if (sock_flag(other, SOCK_DEAD) ||
1711 (other->sk_shutdown & RCV_SHUTDOWN))
1712 goto pipe_err_free;
1713
16e57262 1714 maybe_add_creds(skb, sock, other);
1da177e4 1715 skb_queue_tail(&other->sk_receive_queue, skb);
25888e30
ED
1716 if (max_level > unix_sk(other)->recursion_level)
1717 unix_sk(other)->recursion_level = max_level;
1c92b4e5 1718 unix_state_unlock(other);
676d2369 1719 other->sk_data_ready(other);
e27dfcea 1720 sent += size;
1da177e4 1721 }
1da177e4 1722
7cc05662 1723 scm_destroy(&scm);
1da177e4
LT
1724
1725 return sent;
1726
1727pipe_err_free:
1c92b4e5 1728 unix_state_unlock(other);
1da177e4
LT
1729 kfree_skb(skb);
1730pipe_err:
6eba6a37
ED
1731 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1732 send_sig(SIGPIPE, current, 0);
1da177e4
LT
1733 err = -EPIPE;
1734out_err:
7cc05662 1735 scm_destroy(&scm);
1da177e4
LT
1736 return sent ? : err;
1737}
1738
869e7c62
HFS
1739static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1740 int offset, size_t size, int flags)
1741{
1742 int err = 0;
1743 bool send_sigpipe = true;
1744 struct sock *other, *sk = socket->sk;
1745 struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1746
1747 if (flags & MSG_OOB)
1748 return -EOPNOTSUPP;
1749
1750 other = unix_peer(sk);
1751 if (!other || sk->sk_state != TCP_ESTABLISHED)
1752 return -ENOTCONN;
1753
1754 if (false) {
1755alloc_skb:
1756 unix_state_unlock(other);
1757 mutex_unlock(&unix_sk(other)->readlock);
1758 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1759 &err, 0);
1760 if (!newskb)
1761 return err;
1762 }
1763
1764 /* we must acquire readlock as we modify already present
1765 * skbs in the sk_receive_queue and mess with skb->len
1766 */
1767 err = mutex_lock_interruptible(&unix_sk(other)->readlock);
1768 if (err) {
1769 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1770 send_sigpipe = false;
1771 goto err;
1772 }
1773
1774 if (sk->sk_shutdown & SEND_SHUTDOWN) {
1775 err = -EPIPE;
1776 goto err_unlock;
1777 }
1778
1779 unix_state_lock(other);
1780
1781 if (sock_flag(other, SOCK_DEAD) ||
1782 other->sk_shutdown & RCV_SHUTDOWN) {
1783 err = -EPIPE;
1784 goto err_state_unlock;
1785 }
1786
1787 skb = skb_peek_tail(&other->sk_receive_queue);
1788 if (tail && tail == skb) {
1789 skb = newskb;
1790 } else if (!skb) {
1791 if (newskb)
1792 skb = newskb;
1793 else
1794 goto alloc_skb;
1795 } else if (newskb) {
1796 /* this is fast path, we don't necessarily need to
1797 * call to kfree_skb even though with newskb == NULL
1798 * this - does no harm
1799 */
1800 consume_skb(newskb);
1801 }
1802
1803 if (skb_append_pagefrags(skb, page, offset, size)) {
1804 tail = skb;
1805 goto alloc_skb;
1806 }
1807
1808 skb->len += size;
1809 skb->data_len += size;
1810 skb->truesize += size;
1811 atomic_add(size, &sk->sk_wmem_alloc);
1812
1813 if (newskb)
1814 __skb_queue_tail(&other->sk_receive_queue, newskb);
1815
1816 unix_state_unlock(other);
1817 mutex_unlock(&unix_sk(other)->readlock);
1818
1819 other->sk_data_ready(other);
1820
1821 return size;
1822
1823err_state_unlock:
1824 unix_state_unlock(other);
1825err_unlock:
1826 mutex_unlock(&unix_sk(other)->readlock);
1827err:
1828 kfree_skb(newskb);
1829 if (send_sigpipe && !(flags & MSG_NOSIGNAL))
1830 send_sig(SIGPIPE, current, 0);
1831 return err;
1832}
1833
1b784140
YX
1834static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
1835 size_t len)
1da177e4
LT
1836{
1837 int err;
1838 struct sock *sk = sock->sk;
ac7bfa62 1839
1da177e4
LT
1840 err = sock_error(sk);
1841 if (err)
1842 return err;
1843
1844 if (sk->sk_state != TCP_ESTABLISHED)
1845 return -ENOTCONN;
1846
1847 if (msg->msg_namelen)
1848 msg->msg_namelen = 0;
1849
1b784140 1850 return unix_dgram_sendmsg(sock, msg, len);
1da177e4 1851}
ac7bfa62 1852
1b784140
YX
1853static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
1854 size_t size, int flags)
a05d2ad1
EB
1855{
1856 struct sock *sk = sock->sk;
1857
1858 if (sk->sk_state != TCP_ESTABLISHED)
1859 return -ENOTCONN;
1860
1b784140 1861 return unix_dgram_recvmsg(sock, msg, size, flags);
a05d2ad1
EB
1862}
1863
1da177e4
LT
1864static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1865{
1866 struct unix_sock *u = unix_sk(sk);
1867
1da177e4
LT
1868 if (u->addr) {
1869 msg->msg_namelen = u->addr->len;
1870 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1871 }
1872}
1873
1b784140
YX
1874static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
1875 size_t size, int flags)
1da177e4 1876{
7cc05662 1877 struct scm_cookie scm;
1da177e4
LT
1878 struct sock *sk = sock->sk;
1879 struct unix_sock *u = unix_sk(sk);
1880 int noblock = flags & MSG_DONTWAIT;
1881 struct sk_buff *skb;
1882 int err;
f55bb7f9 1883 int peeked, skip;
1da177e4
LT
1884
1885 err = -EOPNOTSUPP;
1886 if (flags&MSG_OOB)
1887 goto out;
1888
b3ca9b02 1889 err = mutex_lock_interruptible(&u->readlock);
de144391
ED
1890 if (unlikely(err)) {
1891 /* recvmsg() in non blocking mode is supposed to return -EAGAIN
1892 * sk_rcvtimeo is not honored by mutex_lock_interruptible()
1893 */
1894 err = noblock ? -EAGAIN : -ERESTARTSYS;
b3ca9b02
RW
1895 goto out;
1896 }
1da177e4 1897
f55bb7f9
PE
1898 skip = sk_peek_offset(sk, flags);
1899
1900 skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err);
0a112258
FZ
1901 if (!skb) {
1902 unix_state_lock(sk);
1903 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1904 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1905 (sk->sk_shutdown & RCV_SHUTDOWN))
1906 err = 0;
1907 unix_state_unlock(sk);
1da177e4 1908 goto out_unlock;
0a112258 1909 }
1da177e4 1910
67426b75
ED
1911 wake_up_interruptible_sync_poll(&u->peer_wait,
1912 POLLOUT | POLLWRNORM | POLLWRBAND);
1da177e4
LT
1913
1914 if (msg->msg_name)
1915 unix_copy_addr(msg, skb->sk);
1916
f55bb7f9
PE
1917 if (size > skb->len - skip)
1918 size = skb->len - skip;
1919 else if (size < skb->len - skip)
1da177e4
LT
1920 msg->msg_flags |= MSG_TRUNC;
1921
51f3d02b 1922 err = skb_copy_datagram_msg(skb, skip, msg, size);
1da177e4
LT
1923 if (err)
1924 goto out_free;
1925
3f66116e
AC
1926 if (sock_flag(sk, SOCK_RCVTSTAMP))
1927 __sock_recv_timestamp(msg, sk, skb);
1928
7cc05662
CH
1929 memset(&scm, 0, sizeof(scm));
1930
1931 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
1932 unix_set_secdata(&scm, skb);
1da177e4 1933
6eba6a37 1934 if (!(flags & MSG_PEEK)) {
1da177e4 1935 if (UNIXCB(skb).fp)
7cc05662 1936 unix_detach_fds(&scm, skb);
f55bb7f9
PE
1937
1938 sk_peek_offset_bwd(sk, skb->len);
6eba6a37 1939 } else {
1da177e4
LT
1940 /* It is questionable: on PEEK we could:
1941 - do not return fds - good, but too simple 8)
1942 - return fds, and do not return them on read (old strategy,
1943 apparently wrong)
1944 - clone fds (I chose it for now, it is the most universal
1945 solution)
ac7bfa62
YH
1946
1947 POSIX 1003.1g does not actually define this clearly
1948 at all. POSIX 1003.1g doesn't define a lot of things
1949 clearly however!
1950
1da177e4 1951 */
f55bb7f9
PE
1952
1953 sk_peek_offset_fwd(sk, size);
1954
1da177e4 1955 if (UNIXCB(skb).fp)
7cc05662 1956 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
1da177e4 1957 }
9f6f9af7 1958 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
1da177e4 1959
7cc05662 1960 scm_recv(sock, msg, &scm, flags);
1da177e4
LT
1961
1962out_free:
6eba6a37 1963 skb_free_datagram(sk, skb);
1da177e4 1964out_unlock:
57b47a53 1965 mutex_unlock(&u->readlock);
1da177e4
LT
1966out:
1967 return err;
1968}
1969
1970/*
79f632c7 1971 * Sleep until more data has arrived. But check for races..
1da177e4 1972 */
79f632c7 1973static long unix_stream_data_wait(struct sock *sk, long timeo,
2b514574 1974 struct sk_buff *last, unsigned int last_len)
1da177e4 1975{
2b514574 1976 struct sk_buff *tail;
1da177e4
LT
1977 DEFINE_WAIT(wait);
1978
1c92b4e5 1979 unix_state_lock(sk);
1da177e4
LT
1980
1981 for (;;) {
aa395145 1982 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1da177e4 1983
2b514574
HFS
1984 tail = skb_peek_tail(&sk->sk_receive_queue);
1985 if (tail != last ||
1986 (tail && tail->len != last_len) ||
1da177e4
LT
1987 sk->sk_err ||
1988 (sk->sk_shutdown & RCV_SHUTDOWN) ||
1989 signal_pending(current) ||
1990 !timeo)
1991 break;
1992
1993 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1c92b4e5 1994 unix_state_unlock(sk);
2b15af6f 1995 timeo = freezable_schedule_timeout(timeo);
1c92b4e5 1996 unix_state_lock(sk);
b48732e4
MS
1997
1998 if (sock_flag(sk, SOCK_DEAD))
1999 break;
2000
1da177e4
LT
2001 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
2002 }
2003
aa395145 2004 finish_wait(sk_sleep(sk), &wait);
1c92b4e5 2005 unix_state_unlock(sk);
1da177e4
LT
2006 return timeo;
2007}
2008
e370a723
ED
2009static unsigned int unix_skb_len(const struct sk_buff *skb)
2010{
2011 return skb->len - UNIXCB(skb).consumed;
2012}
2013
2b514574
HFS
2014struct unix_stream_read_state {
2015 int (*recv_actor)(struct sk_buff *, int, int,
2016 struct unix_stream_read_state *);
2017 struct socket *socket;
2018 struct msghdr *msg;
2019 struct pipe_inode_info *pipe;
2020 size_t size;
2021 int flags;
2022 unsigned int splice_flags;
2023};
2024
2025static int unix_stream_read_generic(struct unix_stream_read_state *state)
1da177e4 2026{
7cc05662 2027 struct scm_cookie scm;
2b514574 2028 struct socket *sock = state->socket;
1da177e4
LT
2029 struct sock *sk = sock->sk;
2030 struct unix_sock *u = unix_sk(sk);
1da177e4 2031 int copied = 0;
2b514574 2032 int flags = state->flags;
de144391 2033 int noblock = flags & MSG_DONTWAIT;
2b514574 2034 bool check_creds = false;
1da177e4
LT
2035 int target;
2036 int err = 0;
2037 long timeo;
fc0d7536 2038 int skip;
2b514574
HFS
2039 size_t size = state->size;
2040 unsigned int last_len;
1da177e4
LT
2041
2042 err = -EINVAL;
2043 if (sk->sk_state != TCP_ESTABLISHED)
2044 goto out;
2045
2046 err = -EOPNOTSUPP;
2b514574 2047 if (flags & MSG_OOB)
1da177e4
LT
2048 goto out;
2049
2b514574 2050 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
de144391 2051 timeo = sock_rcvtimeo(sk, noblock);
1da177e4 2052
2b514574
HFS
2053 memset(&scm, 0, sizeof(scm));
2054
1da177e4
LT
2055 /* Lock the socket to prevent queue disordering
2056 * while sleeps in memcpy_tomsg
2057 */
b3ca9b02 2058 err = mutex_lock_interruptible(&u->readlock);
de144391
ED
2059 if (unlikely(err)) {
2060 /* recvmsg() in non blocking mode is supposed to return -EAGAIN
2061 * sk_rcvtimeo is not honored by mutex_lock_interruptible()
2062 */
2063 err = noblock ? -EAGAIN : -ERESTARTSYS;
b3ca9b02
RW
2064 goto out;
2065 }
1da177e4 2066
6eba6a37 2067 do {
1da177e4 2068 int chunk;
79f632c7 2069 struct sk_buff *skb, *last;
1da177e4 2070
3c0d2f37 2071 unix_state_lock(sk);
b48732e4
MS
2072 if (sock_flag(sk, SOCK_DEAD)) {
2073 err = -ECONNRESET;
2074 goto unlock;
2075 }
79f632c7 2076 last = skb = skb_peek(&sk->sk_receive_queue);
2b514574 2077 last_len = last ? last->len : 0;
fc0d7536 2078again:
6eba6a37 2079 if (skb == NULL) {
25888e30 2080 unix_sk(sk)->recursion_level = 0;
1da177e4 2081 if (copied >= target)
3c0d2f37 2082 goto unlock;
1da177e4
LT
2083
2084 /*
2085 * POSIX 1003.1g mandates this order.
2086 */
ac7bfa62 2087
6eba6a37
ED
2088 err = sock_error(sk);
2089 if (err)
3c0d2f37 2090 goto unlock;
1da177e4 2091 if (sk->sk_shutdown & RCV_SHUTDOWN)
3c0d2f37
MS
2092 goto unlock;
2093
2094 unix_state_unlock(sk);
1da177e4
LT
2095 err = -EAGAIN;
2096 if (!timeo)
2097 break;
57b47a53 2098 mutex_unlock(&u->readlock);
1da177e4 2099
2b514574
HFS
2100 timeo = unix_stream_data_wait(sk, timeo, last,
2101 last_len);
1da177e4 2102
2b514574
HFS
2103 if (signal_pending(current) ||
2104 mutex_lock_interruptible(&u->readlock)) {
1da177e4
LT
2105 err = sock_intr_errno(timeo);
2106 goto out;
2107 }
b3ca9b02 2108
1da177e4 2109 continue;
2b514574 2110unlock:
3c0d2f37
MS
2111 unix_state_unlock(sk);
2112 break;
1da177e4 2113 }
fc0d7536 2114
79f632c7 2115 skip = sk_peek_offset(sk, flags);
e370a723
ED
2116 while (skip >= unix_skb_len(skb)) {
2117 skip -= unix_skb_len(skb);
79f632c7 2118 last = skb;
2b514574 2119 last_len = skb->len;
fc0d7536 2120 skb = skb_peek_next(skb, &sk->sk_receive_queue);
79f632c7
BP
2121 if (!skb)
2122 goto again;
fc0d7536
PE
2123 }
2124
3c0d2f37 2125 unix_state_unlock(sk);
1da177e4
LT
2126
2127 if (check_creds) {
2128 /* Never glue messages from different writers */
7cc05662
CH
2129 if ((UNIXCB(skb).pid != scm.pid) ||
2130 !uid_eq(UNIXCB(skb).uid, scm.creds.uid) ||
37a9a8df
SS
2131 !gid_eq(UNIXCB(skb).gid, scm.creds.gid) ||
2132 !unix_secdata_eq(&scm, skb))
1da177e4 2133 break;
0e82e7f6 2134 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
1da177e4 2135 /* Copy credentials */
7cc05662 2136 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
37a9a8df 2137 unix_set_secdata(&scm, skb);
2b514574 2138 check_creds = true;
1da177e4
LT
2139 }
2140
2141 /* Copy address just once */
2b514574
HFS
2142 if (state->msg && state->msg->msg_name) {
2143 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2144 state->msg->msg_name);
2145 unix_copy_addr(state->msg, skb->sk);
1da177e4
LT
2146 sunaddr = NULL;
2147 }
2148
e370a723 2149 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2b514574
HFS
2150 chunk = state->recv_actor(skb, skip, chunk, state);
2151 if (chunk < 0) {
1da177e4
LT
2152 if (copied == 0)
2153 copied = -EFAULT;
2154 break;
2155 }
2156 copied += chunk;
2157 size -= chunk;
2158
2159 /* Mark read part of skb as used */
6eba6a37 2160 if (!(flags & MSG_PEEK)) {
e370a723 2161 UNIXCB(skb).consumed += chunk;
1da177e4 2162
fc0d7536
PE
2163 sk_peek_offset_bwd(sk, chunk);
2164
1da177e4 2165 if (UNIXCB(skb).fp)
7cc05662 2166 unix_detach_fds(&scm, skb);
1da177e4 2167
e370a723 2168 if (unix_skb_len(skb))
1da177e4 2169 break;
1da177e4 2170
6f01fd6e 2171 skb_unlink(skb, &sk->sk_receive_queue);
70d4bf6d 2172 consume_skb(skb);
1da177e4 2173
7cc05662 2174 if (scm.fp)
1da177e4 2175 break;
6eba6a37 2176 } else {
1da177e4
LT
2177 /* It is questionable, see note in unix_dgram_recvmsg.
2178 */
2179 if (UNIXCB(skb).fp)
7cc05662 2180 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
1da177e4 2181
fc0d7536
PE
2182 sk_peek_offset_fwd(sk, chunk);
2183
1da177e4
LT
2184 break;
2185 }
2186 } while (size);
2187
57b47a53 2188 mutex_unlock(&u->readlock);
2b514574
HFS
2189 if (state->msg)
2190 scm_recv(sock, state->msg, &scm, flags);
2191 else
2192 scm_destroy(&scm);
1da177e4
LT
2193out:
2194 return copied ? : err;
2195}
2196
2b514574
HFS
2197static int unix_stream_read_actor(struct sk_buff *skb,
2198 int skip, int chunk,
2199 struct unix_stream_read_state *state)
2200{
2201 int ret;
2202
2203 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2204 state->msg, chunk);
2205 return ret ?: chunk;
2206}
2207
2208static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2209 size_t size, int flags)
2210{
2211 struct unix_stream_read_state state = {
2212 .recv_actor = unix_stream_read_actor,
2213 .socket = sock,
2214 .msg = msg,
2215 .size = size,
2216 .flags = flags
2217 };
2218
2219 return unix_stream_read_generic(&state);
2220}
2221
2222static ssize_t skb_unix_socket_splice(struct sock *sk,
2223 struct pipe_inode_info *pipe,
2224 struct splice_pipe_desc *spd)
2225{
2226 int ret;
2227 struct unix_sock *u = unix_sk(sk);
2228
2229 mutex_unlock(&u->readlock);
2230 ret = splice_to_pipe(pipe, spd);
2231 mutex_lock(&u->readlock);
2232
2233 return ret;
2234}
2235
2236static int unix_stream_splice_actor(struct sk_buff *skb,
2237 int skip, int chunk,
2238 struct unix_stream_read_state *state)
2239{
2240 return skb_splice_bits(skb, state->socket->sk,
2241 UNIXCB(skb).consumed + skip,
2242 state->pipe, chunk, state->splice_flags,
2243 skb_unix_socket_splice);
2244}
2245
2246static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
2247 struct pipe_inode_info *pipe,
2248 size_t size, unsigned int flags)
2249{
2250 struct unix_stream_read_state state = {
2251 .recv_actor = unix_stream_splice_actor,
2252 .socket = sock,
2253 .pipe = pipe,
2254 .size = size,
2255 .splice_flags = flags,
2256 };
2257
2258 if (unlikely(*ppos))
2259 return -ESPIPE;
2260
2261 if (sock->file->f_flags & O_NONBLOCK ||
2262 flags & SPLICE_F_NONBLOCK)
2263 state.flags = MSG_DONTWAIT;
2264
2265 return unix_stream_read_generic(&state);
2266}
2267
1da177e4
LT
2268static int unix_shutdown(struct socket *sock, int mode)
2269{
2270 struct sock *sk = sock->sk;
2271 struct sock *other;
2272
fc61b928
XW
2273 if (mode < SHUT_RD || mode > SHUT_RDWR)
2274 return -EINVAL;
2275 /* This maps:
2276 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2277 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
2278 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2279 */
2280 ++mode;
7180a031
AC
2281
2282 unix_state_lock(sk);
2283 sk->sk_shutdown |= mode;
2284 other = unix_peer(sk);
2285 if (other)
2286 sock_hold(other);
2287 unix_state_unlock(sk);
2288 sk->sk_state_change(sk);
2289
2290 if (other &&
2291 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2292
2293 int peer_mode = 0;
2294
2295 if (mode&RCV_SHUTDOWN)
2296 peer_mode |= SEND_SHUTDOWN;
2297 if (mode&SEND_SHUTDOWN)
2298 peer_mode |= RCV_SHUTDOWN;
2299 unix_state_lock(other);
2300 other->sk_shutdown |= peer_mode;
2301 unix_state_unlock(other);
2302 other->sk_state_change(other);
2303 if (peer_mode == SHUTDOWN_MASK)
2304 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2305 else if (peer_mode & RCV_SHUTDOWN)
2306 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1da177e4 2307 }
7180a031
AC
2308 if (other)
2309 sock_put(other);
2310
1da177e4
LT
2311 return 0;
2312}
2313
885ee74d
PE
2314long unix_inq_len(struct sock *sk)
2315{
2316 struct sk_buff *skb;
2317 long amount = 0;
2318
2319 if (sk->sk_state == TCP_LISTEN)
2320 return -EINVAL;
2321
2322 spin_lock(&sk->sk_receive_queue.lock);
2323 if (sk->sk_type == SOCK_STREAM ||
2324 sk->sk_type == SOCK_SEQPACKET) {
2325 skb_queue_walk(&sk->sk_receive_queue, skb)
e370a723 2326 amount += unix_skb_len(skb);
885ee74d
PE
2327 } else {
2328 skb = skb_peek(&sk->sk_receive_queue);
2329 if (skb)
2330 amount = skb->len;
2331 }
2332 spin_unlock(&sk->sk_receive_queue.lock);
2333
2334 return amount;
2335}
2336EXPORT_SYMBOL_GPL(unix_inq_len);
2337
2338long unix_outq_len(struct sock *sk)
2339{
2340 return sk_wmem_alloc_get(sk);
2341}
2342EXPORT_SYMBOL_GPL(unix_outq_len);
2343
1da177e4
LT
2344static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2345{
2346 struct sock *sk = sock->sk;
e27dfcea 2347 long amount = 0;
1da177e4
LT
2348 int err;
2349
6eba6a37
ED
2350 switch (cmd) {
2351 case SIOCOUTQ:
885ee74d 2352 amount = unix_outq_len(sk);
6eba6a37
ED
2353 err = put_user(amount, (int __user *)arg);
2354 break;
2355 case SIOCINQ:
885ee74d
PE
2356 amount = unix_inq_len(sk);
2357 if (amount < 0)
2358 err = amount;
2359 else
1da177e4 2360 err = put_user(amount, (int __user *)arg);
885ee74d 2361 break;
6eba6a37
ED
2362 default:
2363 err = -ENOIOCTLCMD;
2364 break;
1da177e4
LT
2365 }
2366 return err;
2367}
2368
6eba6a37 2369static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
1da177e4
LT
2370{
2371 struct sock *sk = sock->sk;
2372 unsigned int mask;
2373
aa395145 2374 sock_poll_wait(file, sk_sleep(sk), wait);
1da177e4
LT
2375 mask = 0;
2376
2377 /* exceptional events? */
2378 if (sk->sk_err)
2379 mask |= POLLERR;
2380 if (sk->sk_shutdown == SHUTDOWN_MASK)
2381 mask |= POLLHUP;
f348d70a 2382 if (sk->sk_shutdown & RCV_SHUTDOWN)
db40980f 2383 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
1da177e4
LT
2384
2385 /* readable? */
db40980f 2386 if (!skb_queue_empty(&sk->sk_receive_queue))
1da177e4
LT
2387 mask |= POLLIN | POLLRDNORM;
2388
2389 /* Connection-based need to check for termination and startup */
6eba6a37
ED
2390 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2391 sk->sk_state == TCP_CLOSE)
1da177e4
LT
2392 mask |= POLLHUP;
2393
2394 /*
2395 * we set writable also when the other side has shut down the
2396 * connection. This prevents stuck sockets.
2397 */
2398 if (unix_writable(sk))
2399 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2400
2401 return mask;
2402}
2403
ec0d215f
RW
2404static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2405 poll_table *wait)
3c73419c 2406{
ec0d215f
RW
2407 struct sock *sk = sock->sk, *other;
2408 unsigned int mask, writable;
3c73419c 2409
aa395145 2410 sock_poll_wait(file, sk_sleep(sk), wait);
3c73419c
RW
2411 mask = 0;
2412
2413 /* exceptional events? */
2414 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
7d4c04fc 2415 mask |= POLLERR |
8facd5fb 2416 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
7d4c04fc 2417
3c73419c 2418 if (sk->sk_shutdown & RCV_SHUTDOWN)
5456f09a 2419 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
3c73419c
RW
2420 if (sk->sk_shutdown == SHUTDOWN_MASK)
2421 mask |= POLLHUP;
2422
2423 /* readable? */
5456f09a 2424 if (!skb_queue_empty(&sk->sk_receive_queue))
3c73419c
RW
2425 mask |= POLLIN | POLLRDNORM;
2426
2427 /* Connection-based need to check for termination and startup */
2428 if (sk->sk_type == SOCK_SEQPACKET) {
2429 if (sk->sk_state == TCP_CLOSE)
2430 mask |= POLLHUP;
2431 /* connection hasn't started yet? */
2432 if (sk->sk_state == TCP_SYN_SENT)
2433 return mask;
2434 }
2435
973a34aa 2436 /* No write status requested, avoid expensive OUT tests. */
626cf236 2437 if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
973a34aa
ED
2438 return mask;
2439
ec0d215f 2440 writable = unix_writable(sk);
5456f09a
ED
2441 other = unix_peer_get(sk);
2442 if (other) {
2443 if (unix_peer(other) != sk) {
2444 sock_poll_wait(file, &unix_sk(other)->peer_wait, wait);
2445 if (unix_recvq_full(other))
2446 writable = 0;
ec0d215f 2447 }
5456f09a 2448 sock_put(other);
ec0d215f
RW
2449 }
2450
2451 if (writable)
3c73419c
RW
2452 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2453 else
2454 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2455
3c73419c
RW
2456 return mask;
2457}
1da177e4
LT
2458
2459#ifdef CONFIG_PROC_FS
a53eb3fe 2460
7123aaa3
ED
2461#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2462
2463#define get_bucket(x) ((x) >> BUCKET_SPACE)
2464#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2465#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
a53eb3fe 2466
7123aaa3 2467static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
1da177e4 2468{
7123aaa3
ED
2469 unsigned long offset = get_offset(*pos);
2470 unsigned long bucket = get_bucket(*pos);
2471 struct sock *sk;
2472 unsigned long count = 0;
1da177e4 2473
7123aaa3
ED
2474 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2475 if (sock_net(sk) != seq_file_net(seq))
097e66c5 2476 continue;
7123aaa3
ED
2477 if (++count == offset)
2478 break;
2479 }
2480
2481 return sk;
2482}
2483
2484static struct sock *unix_next_socket(struct seq_file *seq,
2485 struct sock *sk,
2486 loff_t *pos)
2487{
2488 unsigned long bucket;
2489
2490 while (sk > (struct sock *)SEQ_START_TOKEN) {
2491 sk = sk_next(sk);
2492 if (!sk)
2493 goto next_bucket;
2494 if (sock_net(sk) == seq_file_net(seq))
2495 return sk;
1da177e4 2496 }
7123aaa3
ED
2497
2498 do {
2499 sk = unix_from_bucket(seq, pos);
2500 if (sk)
2501 return sk;
2502
2503next_bucket:
2504 bucket = get_bucket(*pos) + 1;
2505 *pos = set_bucket_offset(bucket, 1);
2506 } while (bucket < ARRAY_SIZE(unix_socket_table));
2507
1da177e4
LT
2508 return NULL;
2509}
2510
1da177e4 2511static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
9a429c49 2512 __acquires(unix_table_lock)
1da177e4 2513{
fbe9cc4a 2514 spin_lock(&unix_table_lock);
7123aaa3
ED
2515
2516 if (!*pos)
2517 return SEQ_START_TOKEN;
2518
2519 if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2520 return NULL;
2521
2522 return unix_next_socket(seq, NULL, pos);
1da177e4
LT
2523}
2524
2525static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2526{
2527 ++*pos;
7123aaa3 2528 return unix_next_socket(seq, v, pos);
1da177e4
LT
2529}
2530
2531static void unix_seq_stop(struct seq_file *seq, void *v)
9a429c49 2532 __releases(unix_table_lock)
1da177e4 2533{
fbe9cc4a 2534 spin_unlock(&unix_table_lock);
1da177e4
LT
2535}
2536
2537static int unix_seq_show(struct seq_file *seq, void *v)
2538{
ac7bfa62 2539
b9f3124f 2540 if (v == SEQ_START_TOKEN)
1da177e4
LT
2541 seq_puts(seq, "Num RefCount Protocol Flags Type St "
2542 "Inode Path\n");
2543 else {
2544 struct sock *s = v;
2545 struct unix_sock *u = unix_sk(s);
1c92b4e5 2546 unix_state_lock(s);
1da177e4 2547
71338aa7 2548 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
1da177e4
LT
2549 s,
2550 atomic_read(&s->sk_refcnt),
2551 0,
2552 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2553 s->sk_type,
2554 s->sk_socket ?
2555 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2556 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2557 sock_i_ino(s));
2558
2559 if (u->addr) {
2560 int i, len;
2561 seq_putc(seq, ' ');
2562
2563 i = 0;
2564 len = u->addr->len - sizeof(short);
2565 if (!UNIX_ABSTRACT(s))
2566 len--;
2567 else {
2568 seq_putc(seq, '@');
2569 i++;
2570 }
2571 for ( ; i < len; i++)
2572 seq_putc(seq, u->addr->name->sun_path[i]);
2573 }
1c92b4e5 2574 unix_state_unlock(s);
1da177e4
LT
2575 seq_putc(seq, '\n');
2576 }
2577
2578 return 0;
2579}
2580
56b3d975 2581static const struct seq_operations unix_seq_ops = {
1da177e4
LT
2582 .start = unix_seq_start,
2583 .next = unix_seq_next,
2584 .stop = unix_seq_stop,
2585 .show = unix_seq_show,
2586};
2587
1da177e4
LT
2588static int unix_seq_open(struct inode *inode, struct file *file)
2589{
e372c414 2590 return seq_open_net(inode, file, &unix_seq_ops,
8b51b064 2591 sizeof(struct seq_net_private));
1da177e4
LT
2592}
2593
da7071d7 2594static const struct file_operations unix_seq_fops = {
1da177e4
LT
2595 .owner = THIS_MODULE,
2596 .open = unix_seq_open,
2597 .read = seq_read,
2598 .llseek = seq_lseek,
e372c414 2599 .release = seq_release_net,
1da177e4
LT
2600};
2601
2602#endif
2603
ec1b4cf7 2604static const struct net_proto_family unix_family_ops = {
1da177e4
LT
2605 .family = PF_UNIX,
2606 .create = unix_create,
2607 .owner = THIS_MODULE,
2608};
2609
097e66c5 2610
2c8c1e72 2611static int __net_init unix_net_init(struct net *net)
097e66c5
DL
2612{
2613 int error = -ENOMEM;
2614
a0a53c8b 2615 net->unx.sysctl_max_dgram_qlen = 10;
1597fbc0
PE
2616 if (unix_sysctl_register(net))
2617 goto out;
d392e497 2618
097e66c5 2619#ifdef CONFIG_PROC_FS
d4beaa66 2620 if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
1597fbc0 2621 unix_sysctl_unregister(net);
097e66c5 2622 goto out;
1597fbc0 2623 }
097e66c5
DL
2624#endif
2625 error = 0;
2626out:
48dcc33e 2627 return error;
097e66c5
DL
2628}
2629
2c8c1e72 2630static void __net_exit unix_net_exit(struct net *net)
097e66c5 2631{
1597fbc0 2632 unix_sysctl_unregister(net);
ece31ffd 2633 remove_proc_entry("unix", net->proc_net);
097e66c5
DL
2634}
2635
2636static struct pernet_operations unix_net_ops = {
2637 .init = unix_net_init,
2638 .exit = unix_net_exit,
2639};
2640
1da177e4
LT
2641static int __init af_unix_init(void)
2642{
2643 int rc = -1;
1da177e4 2644
b4fff5f8 2645 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
1da177e4
LT
2646
2647 rc = proto_register(&unix_proto, 1);
ac7bfa62 2648 if (rc != 0) {
5cc208be 2649 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
1da177e4
LT
2650 goto out;
2651 }
2652
2653 sock_register(&unix_family_ops);
097e66c5 2654 register_pernet_subsys(&unix_net_ops);
1da177e4
LT
2655out:
2656 return rc;
2657}
2658
2659static void __exit af_unix_exit(void)
2660{
2661 sock_unregister(PF_UNIX);
1da177e4 2662 proto_unregister(&unix_proto);
097e66c5 2663 unregister_pernet_subsys(&unix_net_ops);
1da177e4
LT
2664}
2665
3d366960
DW
2666/* Earlier than device_initcall() so that other drivers invoking
2667 request_module() don't end up in a loop when modprobe tries
2668 to use a UNIX socket. But later than subsys_initcall() because
2669 we depend on stuff initialised there */
2670fs_initcall(af_unix_init);
1da177e4
LT
2671module_exit(af_unix_exit);
2672
2673MODULE_LICENSE("GPL");
2674MODULE_ALIAS_NETPROTO(PF_UNIX);