Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs
[linux-block.git] / net / unix / af_unix.c
CommitLineData
1da177e4
LT
1/*
2 * NET4: Implementation of BSD Unix domain sockets.
3 *
113aa838 4 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
1da177e4
LT
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
1da177e4
LT
11 * Fixes:
12 * Linus Torvalds : Assorted bug cures.
13 * Niibe Yutaka : async I/O support.
14 * Carsten Paeth : PF_UNIX check, address fixes.
15 * Alan Cox : Limit size of allocated blocks.
16 * Alan Cox : Fixed the stupid socketpair bug.
17 * Alan Cox : BSD compatibility fine tuning.
18 * Alan Cox : Fixed a bug in connect when interrupted.
19 * Alan Cox : Sorted out a proper draft version of
20 * file descriptor passing hacked up from
21 * Mike Shaver's work.
22 * Marty Leisner : Fixes to fd passing
23 * Nick Nevin : recvmsg bugfix.
24 * Alan Cox : Started proper garbage collector
25 * Heiko EiBfeldt : Missing verify_area check
26 * Alan Cox : Started POSIXisms
27 * Andreas Schwab : Replace inode by dentry for proper
28 * reference counting
29 * Kirk Petersen : Made this a module
30 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
31 * Lots of bug fixes.
32 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
33 * by above two patches.
34 * Andrea Arcangeli : If possible we block in connect(2)
35 * if the max backlog of the listen socket
36 * is been reached. This won't break
37 * old apps and it will avoid huge amount
38 * of socks hashed (this for unix_gc()
39 * performances reasons).
40 * Security fix that limits the max
41 * number of socks to 2*max_files and
42 * the number of skb queueable in the
43 * dgram receiver.
44 * Artur Skawina : Hash function optimizations
45 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
46 * Malcolm Beattie : Set peercred for socketpair
47 * Michal Ostrowski : Module initialization cleanup.
48 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
49 * the core infrastructure is doing that
50 * for all net proto families now (2.5.69+)
51 *
52 *
53 * Known differences from reference BSD that was tested:
54 *
55 * [TO FIX]
56 * ECONNREFUSED is not returned from one end of a connected() socket to the
57 * other the moment one end closes.
58 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
59 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
60 * [NOT TO FIX]
61 * accept() returns a path name even if the connecting socket has closed
62 * in the meantime (BSD loses the path and gives up).
63 * accept() returns 0 length path for an unbound connector. BSD returns 16
64 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
66 * BSD af_unix apparently has connect forgetting to block properly.
67 * (need to check this with the POSIX spec in detail)
68 *
69 * Differences from 2.0.0-11-... (ANK)
70 * Bug fixes and improvements.
71 * - client shutdown killed server socket.
72 * - removed all useless cli/sti pairs.
73 *
74 * Semantic changes/extensions.
75 * - generic control message passing.
76 * - SCM_CREDENTIALS control message.
77 * - "Abstract" (not FS based) socket bindings.
78 * Abstract names are sequences of bytes (not zero terminated)
79 * started by 0, so that this name space does not intersect
80 * with BSD names.
81 */
82
83#include <linux/module.h>
1da177e4 84#include <linux/kernel.h>
1da177e4
LT
85#include <linux/signal.h>
86#include <linux/sched.h>
87#include <linux/errno.h>
88#include <linux/string.h>
89#include <linux/stat.h>
90#include <linux/dcache.h>
91#include <linux/namei.h>
92#include <linux/socket.h>
93#include <linux/un.h>
94#include <linux/fcntl.h>
95#include <linux/termios.h>
96#include <linux/sockios.h>
97#include <linux/net.h>
98#include <linux/in.h>
99#include <linux/fs.h>
100#include <linux/slab.h>
101#include <asm/uaccess.h>
102#include <linux/skbuff.h>
103#include <linux/netdevice.h>
457c4cbc 104#include <net/net_namespace.h>
1da177e4 105#include <net/sock.h>
c752f073 106#include <net/tcp_states.h>
1da177e4
LT
107#include <net/af_unix.h>
108#include <linux/proc_fs.h>
109#include <linux/seq_file.h>
110#include <net/scm.h>
111#include <linux/init.h>
112#include <linux/poll.h>
1da177e4
LT
113#include <linux/rtnetlink.h>
114#include <linux/mount.h>
115#include <net/checksum.h>
116#include <linux/security.h>
117
13111698
AB
118static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
119static DEFINE_SPINLOCK(unix_table_lock);
1da177e4
LT
120static atomic_t unix_nr_socks = ATOMIC_INIT(0);
121
122#define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE])
123
124#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
125
877ce7c1 126#ifdef CONFIG_SECURITY_NETWORK
dc49c1f9 127static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
877ce7c1 128{
dc49c1f9 129 memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
877ce7c1
CZ
130}
131
132static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
133{
dc49c1f9 134 scm->secid = *UNIXSID(skb);
877ce7c1
CZ
135}
136#else
dc49c1f9 137static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
877ce7c1
CZ
138{ }
139
140static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
141{ }
142#endif /* CONFIG_SECURITY_NETWORK */
143
1da177e4
LT
144/*
145 * SMP locking strategy:
fbe9cc4a 146 * hash table is protected with spinlock unix_table_lock
1da177e4
LT
147 * each socket state is protected by separate rwlock.
148 */
149
44bb9363 150static inline unsigned unix_hash_fold(__wsum n)
1da177e4 151{
44bb9363 152 unsigned hash = (__force unsigned)n;
1da177e4
LT
153 hash ^= hash>>16;
154 hash ^= hash>>8;
155 return hash&(UNIX_HASH_SIZE-1);
156}
157
158#define unix_peer(sk) (unix_sk(sk)->peer)
159
160static inline int unix_our_peer(struct sock *sk, struct sock *osk)
161{
162 return unix_peer(osk) == sk;
163}
164
165static inline int unix_may_send(struct sock *sk, struct sock *osk)
166{
6eba6a37 167 return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
1da177e4
LT
168}
169
3c73419c
RW
170static inline int unix_recvq_full(struct sock const *sk)
171{
172 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
173}
174
1da177e4
LT
175static struct sock *unix_peer_get(struct sock *s)
176{
177 struct sock *peer;
178
1c92b4e5 179 unix_state_lock(s);
1da177e4
LT
180 peer = unix_peer(s);
181 if (peer)
182 sock_hold(peer);
1c92b4e5 183 unix_state_unlock(s);
1da177e4
LT
184 return peer;
185}
186
187static inline void unix_release_addr(struct unix_address *addr)
188{
189 if (atomic_dec_and_test(&addr->refcnt))
190 kfree(addr);
191}
192
193/*
194 * Check unix socket name:
195 * - should be not zero length.
196 * - if started by not zero, should be NULL terminated (FS object)
197 * - if started by zero, it is abstract name.
198 */
ac7bfa62 199
6eba6a37 200static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp)
1da177e4
LT
201{
202 if (len <= sizeof(short) || len > sizeof(*sunaddr))
203 return -EINVAL;
204 if (!sunaddr || sunaddr->sun_family != AF_UNIX)
205 return -EINVAL;
206 if (sunaddr->sun_path[0]) {
207 /*
208 * This may look like an off by one error but it is a bit more
209 * subtle. 108 is the longest valid AF_UNIX path for a binding.
210 * sun_path[108] doesnt as such exist. However in kernel space
211 * we are guaranteed that it is a valid memory location in our
212 * kernel address buffer.
213 */
e27dfcea 214 ((char *)sunaddr)[len] = 0;
1da177e4
LT
215 len = strlen(sunaddr->sun_path)+1+sizeof(short);
216 return len;
217 }
218
07f0757a 219 *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
1da177e4
LT
220 return len;
221}
222
223static void __unix_remove_socket(struct sock *sk)
224{
225 sk_del_node_init(sk);
226}
227
228static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
229{
547b792c 230 WARN_ON(!sk_unhashed(sk));
1da177e4
LT
231 sk_add_node(sk, list);
232}
233
234static inline void unix_remove_socket(struct sock *sk)
235{
fbe9cc4a 236 spin_lock(&unix_table_lock);
1da177e4 237 __unix_remove_socket(sk);
fbe9cc4a 238 spin_unlock(&unix_table_lock);
1da177e4
LT
239}
240
241static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
242{
fbe9cc4a 243 spin_lock(&unix_table_lock);
1da177e4 244 __unix_insert_socket(list, sk);
fbe9cc4a 245 spin_unlock(&unix_table_lock);
1da177e4
LT
246}
247
097e66c5
DL
248static struct sock *__unix_find_socket_byname(struct net *net,
249 struct sockaddr_un *sunname,
1da177e4
LT
250 int len, int type, unsigned hash)
251{
252 struct sock *s;
253 struct hlist_node *node;
254
255 sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
256 struct unix_sock *u = unix_sk(s);
257
878628fb 258 if (!net_eq(sock_net(s), net))
097e66c5
DL
259 continue;
260
1da177e4
LT
261 if (u->addr->len == len &&
262 !memcmp(u->addr->name, sunname, len))
263 goto found;
264 }
265 s = NULL;
266found:
267 return s;
268}
269
097e66c5
DL
270static inline struct sock *unix_find_socket_byname(struct net *net,
271 struct sockaddr_un *sunname,
1da177e4
LT
272 int len, int type,
273 unsigned hash)
274{
275 struct sock *s;
276
fbe9cc4a 277 spin_lock(&unix_table_lock);
097e66c5 278 s = __unix_find_socket_byname(net, sunname, len, type, hash);
1da177e4
LT
279 if (s)
280 sock_hold(s);
fbe9cc4a 281 spin_unlock(&unix_table_lock);
1da177e4
LT
282 return s;
283}
284
097e66c5 285static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
1da177e4
LT
286{
287 struct sock *s;
288 struct hlist_node *node;
289
fbe9cc4a 290 spin_lock(&unix_table_lock);
1da177e4
LT
291 sk_for_each(s, node,
292 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
293 struct dentry *dentry = unix_sk(s)->dentry;
294
878628fb 295 if (!net_eq(sock_net(s), net))
097e66c5
DL
296 continue;
297
6eba6a37 298 if (dentry && dentry->d_inode == i) {
1da177e4
LT
299 sock_hold(s);
300 goto found;
301 }
302 }
303 s = NULL;
304found:
fbe9cc4a 305 spin_unlock(&unix_table_lock);
1da177e4
LT
306 return s;
307}
308
309static inline int unix_writable(struct sock *sk)
310{
311 return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
312}
313
314static void unix_write_space(struct sock *sk)
315{
316 read_lock(&sk->sk_callback_lock);
317 if (unix_writable(sk)) {
318 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
71e20f18 319 wake_up_interruptible_sync(sk->sk_sleep);
8d8ad9d7 320 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
1da177e4
LT
321 }
322 read_unlock(&sk->sk_callback_lock);
323}
324
325/* When dgram socket disconnects (or changes its peer), we clear its receive
326 * queue of packets arrived from previous peer. First, it allows to do
327 * flow control based only on wmem_alloc; second, sk connected to peer
328 * may receive messages only from that peer. */
329static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
330{
b03efcfb 331 if (!skb_queue_empty(&sk->sk_receive_queue)) {
1da177e4
LT
332 skb_queue_purge(&sk->sk_receive_queue);
333 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
334
335 /* If one link of bidirectional dgram pipe is disconnected,
336 * we signal error. Messages are lost. Do not make this,
337 * when peer was not connected to us.
338 */
339 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
340 other->sk_err = ECONNRESET;
341 other->sk_error_report(other);
342 }
343 }
344}
345
346static void unix_sock_destructor(struct sock *sk)
347{
348 struct unix_sock *u = unix_sk(sk);
349
350 skb_queue_purge(&sk->sk_receive_queue);
351
547b792c
IJ
352 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
353 WARN_ON(!sk_unhashed(sk));
354 WARN_ON(sk->sk_socket);
1da177e4 355 if (!sock_flag(sk, SOCK_DEAD)) {
6b41e7dd 356 printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
1da177e4
LT
357 return;
358 }
359
360 if (u->addr)
361 unix_release_addr(u->addr);
362
363 atomic_dec(&unix_nr_socks);
6f756a8c 364 local_bh_disable();
a8076d8d 365 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
6f756a8c 366 local_bh_enable();
1da177e4 367#ifdef UNIX_REFCNT_DEBUG
6eba6a37
ED
368 printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk,
369 atomic_read(&unix_nr_socks));
1da177e4
LT
370#endif
371}
372
6eba6a37 373static int unix_release_sock(struct sock *sk, int embrion)
1da177e4
LT
374{
375 struct unix_sock *u = unix_sk(sk);
376 struct dentry *dentry;
377 struct vfsmount *mnt;
378 struct sock *skpair;
379 struct sk_buff *skb;
380 int state;
381
382 unix_remove_socket(sk);
383
384 /* Clear state */
1c92b4e5 385 unix_state_lock(sk);
1da177e4
LT
386 sock_orphan(sk);
387 sk->sk_shutdown = SHUTDOWN_MASK;
388 dentry = u->dentry;
389 u->dentry = NULL;
390 mnt = u->mnt;
391 u->mnt = NULL;
392 state = sk->sk_state;
393 sk->sk_state = TCP_CLOSE;
1c92b4e5 394 unix_state_unlock(sk);
1da177e4
LT
395
396 wake_up_interruptible_all(&u->peer_wait);
397
e27dfcea 398 skpair = unix_peer(sk);
1da177e4 399
e27dfcea 400 if (skpair != NULL) {
1da177e4 401 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
1c92b4e5 402 unix_state_lock(skpair);
1da177e4
LT
403 /* No more writes */
404 skpair->sk_shutdown = SHUTDOWN_MASK;
405 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
406 skpair->sk_err = ECONNRESET;
1c92b4e5 407 unix_state_unlock(skpair);
1da177e4
LT
408 skpair->sk_state_change(skpair);
409 read_lock(&skpair->sk_callback_lock);
8d8ad9d7 410 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
1da177e4
LT
411 read_unlock(&skpair->sk_callback_lock);
412 }
413 sock_put(skpair); /* It may now die */
414 unix_peer(sk) = NULL;
415 }
416
417 /* Try to flush out this socket. Throw out buffers at least */
418
419 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
e27dfcea 420 if (state == TCP_LISTEN)
1da177e4
LT
421 unix_release_sock(skb->sk, 1);
422 /* passed fds are erased in the kfree_skb hook */
423 kfree_skb(skb);
424 }
425
426 if (dentry) {
427 dput(dentry);
428 mntput(mnt);
429 }
430
431 sock_put(sk);
432
433 /* ---- Socket is dead now and most probably destroyed ---- */
434
435 /*
436 * Fixme: BSD difference: In BSD all sockets connected to use get
437 * ECONNRESET and we die on the spot. In Linux we behave
438 * like files and pipes do and wait for the last
439 * dereference.
440 *
441 * Can't we simply set sock->err?
442 *
443 * What the above comment does talk about? --ANK(980817)
444 */
445
9305cfa4 446 if (unix_tot_inflight)
ac7bfa62 447 unix_gc(); /* Garbage collect fds */
1da177e4
LT
448
449 return 0;
450}
451
452static int unix_listen(struct socket *sock, int backlog)
453{
454 int err;
455 struct sock *sk = sock->sk;
456 struct unix_sock *u = unix_sk(sk);
457
458 err = -EOPNOTSUPP;
6eba6a37
ED
459 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
460 goto out; /* Only stream/seqpacket sockets accept */
1da177e4
LT
461 err = -EINVAL;
462 if (!u->addr)
6eba6a37 463 goto out; /* No listens on an unbound socket */
1c92b4e5 464 unix_state_lock(sk);
1da177e4
LT
465 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
466 goto out_unlock;
467 if (backlog > sk->sk_max_ack_backlog)
468 wake_up_interruptible_all(&u->peer_wait);
469 sk->sk_max_ack_backlog = backlog;
470 sk->sk_state = TCP_LISTEN;
471 /* set credentials so connect can copy them */
b488893a 472 sk->sk_peercred.pid = task_tgid_vnr(current);
19d65624 473 current_euid_egid(&sk->sk_peercred.uid, &sk->sk_peercred.gid);
1da177e4
LT
474 err = 0;
475
476out_unlock:
1c92b4e5 477 unix_state_unlock(sk);
1da177e4
LT
478out:
479 return err;
480}
481
482static int unix_release(struct socket *);
483static int unix_bind(struct socket *, struct sockaddr *, int);
484static int unix_stream_connect(struct socket *, struct sockaddr *,
485 int addr_len, int flags);
486static int unix_socketpair(struct socket *, struct socket *);
487static int unix_accept(struct socket *, struct socket *, int);
488static int unix_getname(struct socket *, struct sockaddr *, int *, int);
489static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
ec0d215f
RW
490static unsigned int unix_dgram_poll(struct file *, struct socket *,
491 poll_table *);
1da177e4
LT
492static int unix_ioctl(struct socket *, unsigned int, unsigned long);
493static int unix_shutdown(struct socket *, int);
494static int unix_stream_sendmsg(struct kiocb *, struct socket *,
495 struct msghdr *, size_t);
496static int unix_stream_recvmsg(struct kiocb *, struct socket *,
497 struct msghdr *, size_t, int);
498static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
499 struct msghdr *, size_t);
500static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
501 struct msghdr *, size_t, int);
502static int unix_dgram_connect(struct socket *, struct sockaddr *,
503 int, int);
504static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
505 struct msghdr *, size_t);
506
90ddc4f0 507static const struct proto_ops unix_stream_ops = {
1da177e4
LT
508 .family = PF_UNIX,
509 .owner = THIS_MODULE,
510 .release = unix_release,
511 .bind = unix_bind,
512 .connect = unix_stream_connect,
513 .socketpair = unix_socketpair,
514 .accept = unix_accept,
515 .getname = unix_getname,
516 .poll = unix_poll,
517 .ioctl = unix_ioctl,
518 .listen = unix_listen,
519 .shutdown = unix_shutdown,
520 .setsockopt = sock_no_setsockopt,
521 .getsockopt = sock_no_getsockopt,
522 .sendmsg = unix_stream_sendmsg,
523 .recvmsg = unix_stream_recvmsg,
524 .mmap = sock_no_mmap,
525 .sendpage = sock_no_sendpage,
526};
527
90ddc4f0 528static const struct proto_ops unix_dgram_ops = {
1da177e4
LT
529 .family = PF_UNIX,
530 .owner = THIS_MODULE,
531 .release = unix_release,
532 .bind = unix_bind,
533 .connect = unix_dgram_connect,
534 .socketpair = unix_socketpair,
535 .accept = sock_no_accept,
536 .getname = unix_getname,
ec0d215f 537 .poll = unix_dgram_poll,
1da177e4
LT
538 .ioctl = unix_ioctl,
539 .listen = sock_no_listen,
540 .shutdown = unix_shutdown,
541 .setsockopt = sock_no_setsockopt,
542 .getsockopt = sock_no_getsockopt,
543 .sendmsg = unix_dgram_sendmsg,
544 .recvmsg = unix_dgram_recvmsg,
545 .mmap = sock_no_mmap,
546 .sendpage = sock_no_sendpage,
547};
548
90ddc4f0 549static const struct proto_ops unix_seqpacket_ops = {
1da177e4
LT
550 .family = PF_UNIX,
551 .owner = THIS_MODULE,
552 .release = unix_release,
553 .bind = unix_bind,
554 .connect = unix_stream_connect,
555 .socketpair = unix_socketpair,
556 .accept = unix_accept,
557 .getname = unix_getname,
ec0d215f 558 .poll = unix_dgram_poll,
1da177e4
LT
559 .ioctl = unix_ioctl,
560 .listen = unix_listen,
561 .shutdown = unix_shutdown,
562 .setsockopt = sock_no_setsockopt,
563 .getsockopt = sock_no_getsockopt,
564 .sendmsg = unix_seqpacket_sendmsg,
565 .recvmsg = unix_dgram_recvmsg,
566 .mmap = sock_no_mmap,
567 .sendpage = sock_no_sendpage,
568};
569
570static struct proto unix_proto = {
248969ae
ED
571 .name = "UNIX",
572 .owner = THIS_MODULE,
248969ae 573 .obj_size = sizeof(struct unix_sock),
1da177e4
LT
574};
575
a09785a2
IM
576/*
577 * AF_UNIX sockets do not interact with hardware, hence they
578 * dont trigger interrupts - so it's safe for them to have
579 * bh-unsafe locking for their sk_receive_queue.lock. Split off
580 * this special lock-class by reinitializing the spinlock key:
581 */
582static struct lock_class_key af_unix_sk_receive_queue_lock_key;
583
6eba6a37 584static struct sock *unix_create1(struct net *net, struct socket *sock)
1da177e4
LT
585{
586 struct sock *sk = NULL;
587 struct unix_sock *u;
588
284b327b
PE
589 atomic_inc(&unix_nr_socks);
590 if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
1da177e4
LT
591 goto out;
592
6257ff21 593 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
1da177e4
LT
594 if (!sk)
595 goto out;
596
6eba6a37 597 sock_init_data(sock, sk);
a09785a2
IM
598 lockdep_set_class(&sk->sk_receive_queue.lock,
599 &af_unix_sk_receive_queue_lock_key);
1da177e4
LT
600
601 sk->sk_write_space = unix_write_space;
a0a53c8b 602 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
1da177e4
LT
603 sk->sk_destruct = unix_sock_destructor;
604 u = unix_sk(sk);
605 u->dentry = NULL;
606 u->mnt = NULL;
fd19f329 607 spin_lock_init(&u->lock);
516e0cc5 608 atomic_long_set(&u->inflight, 0);
1fd05ba5 609 INIT_LIST_HEAD(&u->link);
57b47a53 610 mutex_init(&u->readlock); /* single task reading lock */
1da177e4
LT
611 init_waitqueue_head(&u->peer_wait);
612 unix_insert_socket(unix_sockets_unbound, sk);
613out:
284b327b
PE
614 if (sk == NULL)
615 atomic_dec(&unix_nr_socks);
920de804
ED
616 else {
617 local_bh_disable();
a8076d8d 618 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
920de804
ED
619 local_bh_enable();
620 }
1da177e4
LT
621 return sk;
622}
623
1b8d7ae4 624static int unix_create(struct net *net, struct socket *sock, int protocol)
1da177e4
LT
625{
626 if (protocol && protocol != PF_UNIX)
627 return -EPROTONOSUPPORT;
628
629 sock->state = SS_UNCONNECTED;
630
631 switch (sock->type) {
632 case SOCK_STREAM:
633 sock->ops = &unix_stream_ops;
634 break;
635 /*
636 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
637 * nothing uses it.
638 */
639 case SOCK_RAW:
e27dfcea 640 sock->type = SOCK_DGRAM;
1da177e4
LT
641 case SOCK_DGRAM:
642 sock->ops = &unix_dgram_ops;
643 break;
644 case SOCK_SEQPACKET:
645 sock->ops = &unix_seqpacket_ops;
646 break;
647 default:
648 return -ESOCKTNOSUPPORT;
649 }
650
1b8d7ae4 651 return unix_create1(net, sock) ? 0 : -ENOMEM;
1da177e4
LT
652}
653
654static int unix_release(struct socket *sock)
655{
656 struct sock *sk = sock->sk;
657
658 if (!sk)
659 return 0;
660
661 sock->sk = NULL;
662
6eba6a37 663 return unix_release_sock(sk, 0);
1da177e4
LT
664}
665
666static int unix_autobind(struct socket *sock)
667{
668 struct sock *sk = sock->sk;
3b1e0a65 669 struct net *net = sock_net(sk);
1da177e4
LT
670 struct unix_sock *u = unix_sk(sk);
671 static u32 ordernum = 1;
6eba6a37 672 struct unix_address *addr;
1da177e4
LT
673 int err;
674
57b47a53 675 mutex_lock(&u->readlock);
1da177e4
LT
676
677 err = 0;
678 if (u->addr)
679 goto out;
680
681 err = -ENOMEM;
0da974f4 682 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
1da177e4
LT
683 if (!addr)
684 goto out;
685
1da177e4
LT
686 addr->name->sun_family = AF_UNIX;
687 atomic_set(&addr->refcnt, 1);
688
689retry:
690 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
07f0757a 691 addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
1da177e4 692
fbe9cc4a 693 spin_lock(&unix_table_lock);
1da177e4
LT
694 ordernum = (ordernum+1)&0xFFFFF;
695
097e66c5 696 if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
1da177e4 697 addr->hash)) {
fbe9cc4a 698 spin_unlock(&unix_table_lock);
1da177e4
LT
699 /* Sanity yield. It is unusual case, but yet... */
700 if (!(ordernum&0xFF))
701 yield();
702 goto retry;
703 }
704 addr->hash ^= sk->sk_type;
705
706 __unix_remove_socket(sk);
707 u->addr = addr;
708 __unix_insert_socket(&unix_socket_table[addr->hash], sk);
fbe9cc4a 709 spin_unlock(&unix_table_lock);
1da177e4
LT
710 err = 0;
711
57b47a53 712out: mutex_unlock(&u->readlock);
1da177e4
LT
713 return err;
714}
715
097e66c5
DL
716static struct sock *unix_find_other(struct net *net,
717 struct sockaddr_un *sunname, int len,
1da177e4
LT
718 int type, unsigned hash, int *error)
719{
720 struct sock *u;
421748ec 721 struct path path;
1da177e4 722 int err = 0;
ac7bfa62 723
1da177e4 724 if (sunname->sun_path[0]) {
421748ec
AV
725 struct inode *inode;
726 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
1da177e4
LT
727 if (err)
728 goto fail;
421748ec
AV
729 inode = path.dentry->d_inode;
730 err = inode_permission(inode, MAY_WRITE);
1da177e4
LT
731 if (err)
732 goto put_fail;
733
734 err = -ECONNREFUSED;
421748ec 735 if (!S_ISSOCK(inode->i_mode))
1da177e4 736 goto put_fail;
421748ec 737 u = unix_find_socket_byinode(net, inode);
1da177e4
LT
738 if (!u)
739 goto put_fail;
740
741 if (u->sk_type == type)
421748ec 742 touch_atime(path.mnt, path.dentry);
1da177e4 743
421748ec 744 path_put(&path);
1da177e4 745
e27dfcea 746 err = -EPROTOTYPE;
1da177e4
LT
747 if (u->sk_type != type) {
748 sock_put(u);
749 goto fail;
750 }
751 } else {
752 err = -ECONNREFUSED;
e27dfcea 753 u = unix_find_socket_byname(net, sunname, len, type, hash);
1da177e4
LT
754 if (u) {
755 struct dentry *dentry;
756 dentry = unix_sk(u)->dentry;
757 if (dentry)
758 touch_atime(unix_sk(u)->mnt, dentry);
759 } else
760 goto fail;
761 }
762 return u;
763
764put_fail:
421748ec 765 path_put(&path);
1da177e4 766fail:
e27dfcea 767 *error = err;
1da177e4
LT
768 return NULL;
769}
770
771
772static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
773{
774 struct sock *sk = sock->sk;
3b1e0a65 775 struct net *net = sock_net(sk);
1da177e4 776 struct unix_sock *u = unix_sk(sk);
e27dfcea 777 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
6eba6a37 778 struct dentry *dentry = NULL;
1da177e4
LT
779 struct nameidata nd;
780 int err;
781 unsigned hash;
782 struct unix_address *addr;
783 struct hlist_head *list;
784
785 err = -EINVAL;
786 if (sunaddr->sun_family != AF_UNIX)
787 goto out;
788
e27dfcea 789 if (addr_len == sizeof(short)) {
1da177e4
LT
790 err = unix_autobind(sock);
791 goto out;
792 }
793
794 err = unix_mkname(sunaddr, addr_len, &hash);
795 if (err < 0)
796 goto out;
797 addr_len = err;
798
57b47a53 799 mutex_lock(&u->readlock);
1da177e4
LT
800
801 err = -EINVAL;
802 if (u->addr)
803 goto out_up;
804
805 err = -ENOMEM;
806 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
807 if (!addr)
808 goto out_up;
809
810 memcpy(addr->name, sunaddr, addr_len);
811 addr->len = addr_len;
812 addr->hash = hash ^ sk->sk_type;
813 atomic_set(&addr->refcnt, 1);
814
815 if (sunaddr->sun_path[0]) {
816 unsigned int mode;
817 err = 0;
818 /*
819 * Get the parent directory, calculate the hash for last
820 * component.
821 */
822 err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
823 if (err)
824 goto out_mknod_parent;
f81a0bff
CH
825
826 dentry = lookup_create(&nd, 0);
1da177e4
LT
827 err = PTR_ERR(dentry);
828 if (IS_ERR(dentry))
829 goto out_mknod_unlock;
f81a0bff 830
1da177e4
LT
831 /*
832 * All right, let's create it.
833 */
834 mode = S_IFSOCK |
835 (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
463c3197
DH
836 err = mnt_want_write(nd.path.mnt);
837 if (err)
838 goto out_mknod_dput;
4ac91378 839 err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
463c3197 840 mnt_drop_write(nd.path.mnt);
1da177e4
LT
841 if (err)
842 goto out_mknod_dput;
4ac91378
JB
843 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
844 dput(nd.path.dentry);
845 nd.path.dentry = dentry;
1da177e4
LT
846
847 addr->hash = UNIX_HASH_SIZE;
848 }
849
fbe9cc4a 850 spin_lock(&unix_table_lock);
1da177e4
LT
851
852 if (!sunaddr->sun_path[0]) {
853 err = -EADDRINUSE;
097e66c5 854 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1da177e4
LT
855 sk->sk_type, hash)) {
856 unix_release_addr(addr);
857 goto out_unlock;
858 }
859
860 list = &unix_socket_table[addr->hash];
861 } else {
862 list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
4ac91378
JB
863 u->dentry = nd.path.dentry;
864 u->mnt = nd.path.mnt;
1da177e4
LT
865 }
866
867 err = 0;
868 __unix_remove_socket(sk);
869 u->addr = addr;
870 __unix_insert_socket(list, sk);
871
872out_unlock:
fbe9cc4a 873 spin_unlock(&unix_table_lock);
1da177e4 874out_up:
57b47a53 875 mutex_unlock(&u->readlock);
1da177e4
LT
876out:
877 return err;
878
879out_mknod_dput:
880 dput(dentry);
881out_mknod_unlock:
4ac91378 882 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
1d957f9b 883 path_put(&nd.path);
1da177e4 884out_mknod_parent:
e27dfcea
JK
885 if (err == -EEXIST)
886 err = -EADDRINUSE;
1da177e4
LT
887 unix_release_addr(addr);
888 goto out_up;
889}
890
278a3de5
DM
891static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
892{
893 if (unlikely(sk1 == sk2) || !sk2) {
894 unix_state_lock(sk1);
895 return;
896 }
897 if (sk1 < sk2) {
898 unix_state_lock(sk1);
899 unix_state_lock_nested(sk2);
900 } else {
901 unix_state_lock(sk2);
902 unix_state_lock_nested(sk1);
903 }
904}
905
906static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
907{
908 if (unlikely(sk1 == sk2) || !sk2) {
909 unix_state_unlock(sk1);
910 return;
911 }
912 unix_state_unlock(sk1);
913 unix_state_unlock(sk2);
914}
915
1da177e4
LT
916static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
917 int alen, int flags)
918{
919 struct sock *sk = sock->sk;
3b1e0a65 920 struct net *net = sock_net(sk);
e27dfcea 921 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1da177e4
LT
922 struct sock *other;
923 unsigned hash;
924 int err;
925
926 if (addr->sa_family != AF_UNSPEC) {
927 err = unix_mkname(sunaddr, alen, &hash);
928 if (err < 0)
929 goto out;
930 alen = err;
931
932 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
933 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
934 goto out;
935
278a3de5 936restart:
e27dfcea 937 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1da177e4
LT
938 if (!other)
939 goto out;
940
278a3de5
DM
941 unix_state_double_lock(sk, other);
942
943 /* Apparently VFS overslept socket death. Retry. */
944 if (sock_flag(other, SOCK_DEAD)) {
945 unix_state_double_unlock(sk, other);
946 sock_put(other);
947 goto restart;
948 }
1da177e4
LT
949
950 err = -EPERM;
951 if (!unix_may_send(sk, other))
952 goto out_unlock;
953
954 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
955 if (err)
956 goto out_unlock;
957
958 } else {
959 /*
960 * 1003.1g breaking connected state with AF_UNSPEC
961 */
962 other = NULL;
278a3de5 963 unix_state_double_lock(sk, other);
1da177e4
LT
964 }
965
966 /*
967 * If it was connected, reconnect.
968 */
969 if (unix_peer(sk)) {
970 struct sock *old_peer = unix_peer(sk);
e27dfcea 971 unix_peer(sk) = other;
278a3de5 972 unix_state_double_unlock(sk, other);
1da177e4
LT
973
974 if (other != old_peer)
975 unix_dgram_disconnected(sk, old_peer);
976 sock_put(old_peer);
977 } else {
e27dfcea 978 unix_peer(sk) = other;
278a3de5 979 unix_state_double_unlock(sk, other);
1da177e4 980 }
ac7bfa62 981 return 0;
1da177e4
LT
982
983out_unlock:
278a3de5 984 unix_state_double_unlock(sk, other);
1da177e4
LT
985 sock_put(other);
986out:
987 return err;
988}
989
990static long unix_wait_for_peer(struct sock *other, long timeo)
991{
992 struct unix_sock *u = unix_sk(other);
993 int sched;
994 DEFINE_WAIT(wait);
995
996 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
997
998 sched = !sock_flag(other, SOCK_DEAD) &&
999 !(other->sk_shutdown & RCV_SHUTDOWN) &&
3c73419c 1000 unix_recvq_full(other);
1da177e4 1001
1c92b4e5 1002 unix_state_unlock(other);
1da177e4
LT
1003
1004 if (sched)
1005 timeo = schedule_timeout(timeo);
1006
1007 finish_wait(&u->peer_wait, &wait);
1008 return timeo;
1009}
1010
1011static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1012 int addr_len, int flags)
1013{
e27dfcea 1014 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1da177e4 1015 struct sock *sk = sock->sk;
3b1e0a65 1016 struct net *net = sock_net(sk);
1da177e4
LT
1017 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1018 struct sock *newsk = NULL;
1019 struct sock *other = NULL;
1020 struct sk_buff *skb = NULL;
1021 unsigned hash;
1022 int st;
1023 int err;
1024 long timeo;
1025
1026 err = unix_mkname(sunaddr, addr_len, &hash);
1027 if (err < 0)
1028 goto out;
1029 addr_len = err;
1030
1031 if (test_bit(SOCK_PASSCRED, &sock->flags)
1032 && !u->addr && (err = unix_autobind(sock)) != 0)
1033 goto out;
1034
1035 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1036
1037 /* First of all allocate resources.
1038 If we will make it after state is locked,
1039 we will have to recheck all again in any case.
1040 */
1041
1042 err = -ENOMEM;
1043
1044 /* create new sock for complete connection */
3b1e0a65 1045 newsk = unix_create1(sock_net(sk), NULL);
1da177e4
LT
1046 if (newsk == NULL)
1047 goto out;
1048
1049 /* Allocate skb for sending to listening sock */
1050 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1051 if (skb == NULL)
1052 goto out;
1053
1054restart:
1055 /* Find listening sock. */
097e66c5 1056 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1da177e4
LT
1057 if (!other)
1058 goto out;
1059
1060 /* Latch state of peer */
1c92b4e5 1061 unix_state_lock(other);
1da177e4
LT
1062
1063 /* Apparently VFS overslept socket death. Retry. */
1064 if (sock_flag(other, SOCK_DEAD)) {
1c92b4e5 1065 unix_state_unlock(other);
1da177e4
LT
1066 sock_put(other);
1067 goto restart;
1068 }
1069
1070 err = -ECONNREFUSED;
1071 if (other->sk_state != TCP_LISTEN)
1072 goto out_unlock;
1073
3c73419c 1074 if (unix_recvq_full(other)) {
1da177e4
LT
1075 err = -EAGAIN;
1076 if (!timeo)
1077 goto out_unlock;
1078
1079 timeo = unix_wait_for_peer(other, timeo);
1080
1081 err = sock_intr_errno(timeo);
1082 if (signal_pending(current))
1083 goto out;
1084 sock_put(other);
1085 goto restart;
ac7bfa62 1086 }
1da177e4
LT
1087
1088 /* Latch our state.
1089
1090 It is tricky place. We need to grab write lock and cannot
1091 drop lock on peer. It is dangerous because deadlock is
1092 possible. Connect to self case and simultaneous
1093 attempt to connect are eliminated by checking socket
1094 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1095 check this before attempt to grab lock.
1096
1097 Well, and we have to recheck the state after socket locked.
1098 */
1099 st = sk->sk_state;
1100
1101 switch (st) {
1102 case TCP_CLOSE:
1103 /* This is ok... continue with connect */
1104 break;
1105 case TCP_ESTABLISHED:
1106 /* Socket is already connected */
1107 err = -EISCONN;
1108 goto out_unlock;
1109 default:
1110 err = -EINVAL;
1111 goto out_unlock;
1112 }
1113
1c92b4e5 1114 unix_state_lock_nested(sk);
1da177e4
LT
1115
1116 if (sk->sk_state != st) {
1c92b4e5
DM
1117 unix_state_unlock(sk);
1118 unix_state_unlock(other);
1da177e4
LT
1119 sock_put(other);
1120 goto restart;
1121 }
1122
1123 err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1124 if (err) {
1c92b4e5 1125 unix_state_unlock(sk);
1da177e4
LT
1126 goto out_unlock;
1127 }
1128
1129 /* The way is open! Fastly set all the necessary fields... */
1130
1131 sock_hold(sk);
1132 unix_peer(newsk) = sk;
1133 newsk->sk_state = TCP_ESTABLISHED;
1134 newsk->sk_type = sk->sk_type;
b488893a 1135 newsk->sk_peercred.pid = task_tgid_vnr(current);
19d65624 1136 current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid);
1da177e4
LT
1137 newu = unix_sk(newsk);
1138 newsk->sk_sleep = &newu->peer_wait;
1139 otheru = unix_sk(other);
1140
1141 /* copy address information from listening to new sock*/
1142 if (otheru->addr) {
1143 atomic_inc(&otheru->addr->refcnt);
1144 newu->addr = otheru->addr;
1145 }
1146 if (otheru->dentry) {
1147 newu->dentry = dget(otheru->dentry);
1148 newu->mnt = mntget(otheru->mnt);
1149 }
1150
1151 /* Set credentials */
1152 sk->sk_peercred = other->sk_peercred;
1153
1da177e4
LT
1154 sock->state = SS_CONNECTED;
1155 sk->sk_state = TCP_ESTABLISHED;
830a1e5c
BL
1156 sock_hold(newsk);
1157
1158 smp_mb__after_atomic_inc(); /* sock_hold() does an atomic_inc() */
1159 unix_peer(sk) = newsk;
1da177e4 1160
1c92b4e5 1161 unix_state_unlock(sk);
1da177e4
LT
1162
1163 /* take ten and and send info to listening sock */
1164 spin_lock(&other->sk_receive_queue.lock);
1165 __skb_queue_tail(&other->sk_receive_queue, skb);
1da177e4 1166 spin_unlock(&other->sk_receive_queue.lock);
1c92b4e5 1167 unix_state_unlock(other);
1da177e4
LT
1168 other->sk_data_ready(other, 0);
1169 sock_put(other);
1170 return 0;
1171
1172out_unlock:
1173 if (other)
1c92b4e5 1174 unix_state_unlock(other);
1da177e4
LT
1175
1176out:
1177 if (skb)
1178 kfree_skb(skb);
1179 if (newsk)
1180 unix_release_sock(newsk, 0);
1181 if (other)
1182 sock_put(other);
1183 return err;
1184}
1185
1186static int unix_socketpair(struct socket *socka, struct socket *sockb)
1187{
e27dfcea 1188 struct sock *ska = socka->sk, *skb = sockb->sk;
1da177e4
LT
1189
1190 /* Join our sockets back to back */
1191 sock_hold(ska);
1192 sock_hold(skb);
e27dfcea
JK
1193 unix_peer(ska) = skb;
1194 unix_peer(skb) = ska;
b488893a 1195 ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
19d65624
DH
1196 current_euid_egid(&skb->sk_peercred.uid, &skb->sk_peercred.gid);
1197 ska->sk_peercred.uid = skb->sk_peercred.uid;
1198 ska->sk_peercred.gid = skb->sk_peercred.gid;
1da177e4
LT
1199
1200 if (ska->sk_type != SOCK_DGRAM) {
1201 ska->sk_state = TCP_ESTABLISHED;
1202 skb->sk_state = TCP_ESTABLISHED;
1203 socka->state = SS_CONNECTED;
1204 sockb->state = SS_CONNECTED;
1205 }
1206 return 0;
1207}
1208
1209static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1210{
1211 struct sock *sk = sock->sk;
1212 struct sock *tsk;
1213 struct sk_buff *skb;
1214 int err;
1215
1216 err = -EOPNOTSUPP;
6eba6a37 1217 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1da177e4
LT
1218 goto out;
1219
1220 err = -EINVAL;
1221 if (sk->sk_state != TCP_LISTEN)
1222 goto out;
1223
1224 /* If socket state is TCP_LISTEN it cannot change (for now...),
1225 * so that no locks are necessary.
1226 */
1227
1228 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1229 if (!skb) {
1230 /* This means receive shutdown. */
1231 if (err == 0)
1232 err = -EINVAL;
1233 goto out;
1234 }
1235
1236 tsk = skb->sk;
1237 skb_free_datagram(sk, skb);
1238 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1239
1240 /* attach accepted sock to socket */
1c92b4e5 1241 unix_state_lock(tsk);
1da177e4
LT
1242 newsock->state = SS_CONNECTED;
1243 sock_graft(tsk, newsock);
1c92b4e5 1244 unix_state_unlock(tsk);
1da177e4
LT
1245 return 0;
1246
1247out:
1248 return err;
1249}
1250
1251
1252static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1253{
1254 struct sock *sk = sock->sk;
1255 struct unix_sock *u;
e27dfcea 1256 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1da177e4
LT
1257 int err = 0;
1258
1259 if (peer) {
1260 sk = unix_peer_get(sk);
1261
1262 err = -ENOTCONN;
1263 if (!sk)
1264 goto out;
1265 err = 0;
1266 } else {
1267 sock_hold(sk);
1268 }
1269
1270 u = unix_sk(sk);
1c92b4e5 1271 unix_state_lock(sk);
1da177e4
LT
1272 if (!u->addr) {
1273 sunaddr->sun_family = AF_UNIX;
1274 sunaddr->sun_path[0] = 0;
1275 *uaddr_len = sizeof(short);
1276 } else {
1277 struct unix_address *addr = u->addr;
1278
1279 *uaddr_len = addr->len;
1280 memcpy(sunaddr, addr->name, *uaddr_len);
1281 }
1c92b4e5 1282 unix_state_unlock(sk);
1da177e4
LT
1283 sock_put(sk);
1284out:
1285 return err;
1286}
1287
1288static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1289{
1290 int i;
1291
1292 scm->fp = UNIXCB(skb).fp;
1293 skb->destructor = sock_wfree;
1294 UNIXCB(skb).fp = NULL;
1295
6eba6a37 1296 for (i = scm->fp->count-1; i >= 0; i--)
1da177e4
LT
1297 unix_notinflight(scm->fp->fp[i]);
1298}
1299
1300static void unix_destruct_fds(struct sk_buff *skb)
1301{
1302 struct scm_cookie scm;
1303 memset(&scm, 0, sizeof(scm));
1304 unix_detach_fds(&scm, skb);
1305
1306 /* Alas, it calls VFS */
1307 /* So fscking what? fput() had been SMP-safe since the last Summer */
1308 scm_destroy(&scm);
1309 sock_wfree(skb);
1310}
1311
6209344f 1312static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1da177e4
LT
1313{
1314 int i;
6209344f
MS
1315
1316 /*
1317 * Need to duplicate file references for the sake of garbage
1318 * collection. Otherwise a socket in the fps might become a
1319 * candidate for GC while the skb is not yet queued.
1320 */
1321 UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1322 if (!UNIXCB(skb).fp)
1323 return -ENOMEM;
1324
6eba6a37 1325 for (i = scm->fp->count-1; i >= 0; i--)
1da177e4 1326 unix_inflight(scm->fp->fp[i]);
1da177e4 1327 skb->destructor = unix_destruct_fds;
6209344f 1328 return 0;
1da177e4
LT
1329}
1330
1331/*
1332 * Send AF_UNIX data.
1333 */
1334
1335static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1336 struct msghdr *msg, size_t len)
1337{
1338 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1339 struct sock *sk = sock->sk;
3b1e0a65 1340 struct net *net = sock_net(sk);
1da177e4 1341 struct unix_sock *u = unix_sk(sk);
e27dfcea 1342 struct sockaddr_un *sunaddr = msg->msg_name;
1da177e4
LT
1343 struct sock *other = NULL;
1344 int namelen = 0; /* fake GCC */
1345 int err;
1346 unsigned hash;
1347 struct sk_buff *skb;
1348 long timeo;
1349 struct scm_cookie tmp_scm;
1350
1351 if (NULL == siocb->scm)
1352 siocb->scm = &tmp_scm;
5f23b734 1353 wait_for_unix_gc();
1da177e4
LT
1354 err = scm_send(sock, msg, siocb->scm);
1355 if (err < 0)
1356 return err;
1357
1358 err = -EOPNOTSUPP;
1359 if (msg->msg_flags&MSG_OOB)
1360 goto out;
1361
1362 if (msg->msg_namelen) {
1363 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1364 if (err < 0)
1365 goto out;
1366 namelen = err;
1367 } else {
1368 sunaddr = NULL;
1369 err = -ENOTCONN;
1370 other = unix_peer_get(sk);
1371 if (!other)
1372 goto out;
1373 }
1374
1375 if (test_bit(SOCK_PASSCRED, &sock->flags)
1376 && !u->addr && (err = unix_autobind(sock)) != 0)
1377 goto out;
1378
1379 err = -EMSGSIZE;
1380 if (len > sk->sk_sndbuf - 32)
1381 goto out;
1382
1383 skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
e27dfcea 1384 if (skb == NULL)
1da177e4
LT
1385 goto out;
1386
1387 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
6209344f
MS
1388 if (siocb->scm->fp) {
1389 err = unix_attach_fds(siocb->scm, skb);
1390 if (err)
1391 goto out_free;
1392 }
dc49c1f9 1393 unix_get_secdata(siocb->scm, skb);
877ce7c1 1394
badff6d0 1395 skb_reset_transport_header(skb);
6eba6a37 1396 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1da177e4
LT
1397 if (err)
1398 goto out_free;
1399
1400 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1401
1402restart:
1403 if (!other) {
1404 err = -ECONNRESET;
1405 if (sunaddr == NULL)
1406 goto out_free;
1407
097e66c5 1408 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1da177e4 1409 hash, &err);
e27dfcea 1410 if (other == NULL)
1da177e4
LT
1411 goto out_free;
1412 }
1413
1c92b4e5 1414 unix_state_lock(other);
1da177e4
LT
1415 err = -EPERM;
1416 if (!unix_may_send(sk, other))
1417 goto out_unlock;
1418
1419 if (sock_flag(other, SOCK_DEAD)) {
1420 /*
1421 * Check with 1003.1g - what should
1422 * datagram error
1423 */
1c92b4e5 1424 unix_state_unlock(other);
1da177e4
LT
1425 sock_put(other);
1426
1427 err = 0;
1c92b4e5 1428 unix_state_lock(sk);
1da177e4 1429 if (unix_peer(sk) == other) {
e27dfcea 1430 unix_peer(sk) = NULL;
1c92b4e5 1431 unix_state_unlock(sk);
1da177e4
LT
1432
1433 unix_dgram_disconnected(sk, other);
1434 sock_put(other);
1435 err = -ECONNREFUSED;
1436 } else {
1c92b4e5 1437 unix_state_unlock(sk);
1da177e4
LT
1438 }
1439
1440 other = NULL;
1441 if (err)
1442 goto out_free;
1443 goto restart;
1444 }
1445
1446 err = -EPIPE;
1447 if (other->sk_shutdown & RCV_SHUTDOWN)
1448 goto out_unlock;
1449
1450 if (sk->sk_type != SOCK_SEQPACKET) {
1451 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1452 if (err)
1453 goto out_unlock;
1454 }
1455
3c73419c 1456 if (unix_peer(other) != sk && unix_recvq_full(other)) {
1da177e4
LT
1457 if (!timeo) {
1458 err = -EAGAIN;
1459 goto out_unlock;
1460 }
1461
1462 timeo = unix_wait_for_peer(other, timeo);
1463
1464 err = sock_intr_errno(timeo);
1465 if (signal_pending(current))
1466 goto out_free;
1467
1468 goto restart;
1469 }
1470
1471 skb_queue_tail(&other->sk_receive_queue, skb);
1c92b4e5 1472 unix_state_unlock(other);
1da177e4
LT
1473 other->sk_data_ready(other, len);
1474 sock_put(other);
1475 scm_destroy(siocb->scm);
1476 return len;
1477
1478out_unlock:
1c92b4e5 1479 unix_state_unlock(other);
1da177e4
LT
1480out_free:
1481 kfree_skb(skb);
1482out:
1483 if (other)
1484 sock_put(other);
1485 scm_destroy(siocb->scm);
1486 return err;
1487}
1488
ac7bfa62 1489
1da177e4
LT
1490static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1491 struct msghdr *msg, size_t len)
1492{
1493 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1494 struct sock *sk = sock->sk;
1495 struct sock *other = NULL;
e27dfcea 1496 struct sockaddr_un *sunaddr = msg->msg_name;
6eba6a37 1497 int err, size;
1da177e4 1498 struct sk_buff *skb;
e27dfcea 1499 int sent = 0;
1da177e4
LT
1500 struct scm_cookie tmp_scm;
1501
1502 if (NULL == siocb->scm)
1503 siocb->scm = &tmp_scm;
5f23b734 1504 wait_for_unix_gc();
1da177e4
LT
1505 err = scm_send(sock, msg, siocb->scm);
1506 if (err < 0)
1507 return err;
1508
1509 err = -EOPNOTSUPP;
1510 if (msg->msg_flags&MSG_OOB)
1511 goto out_err;
1512
1513 if (msg->msg_namelen) {
1514 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1515 goto out_err;
1516 } else {
1517 sunaddr = NULL;
1518 err = -ENOTCONN;
830a1e5c 1519 other = unix_peer(sk);
1da177e4
LT
1520 if (!other)
1521 goto out_err;
1522 }
1523
1524 if (sk->sk_shutdown & SEND_SHUTDOWN)
1525 goto pipe_err;
1526
6eba6a37 1527 while (sent < len) {
1da177e4 1528 /*
e9df7d7f
BL
1529 * Optimisation for the fact that under 0.01% of X
1530 * messages typically need breaking up.
1da177e4
LT
1531 */
1532
e9df7d7f 1533 size = len-sent;
1da177e4
LT
1534
1535 /* Keep two messages in the pipe so it schedules better */
e9df7d7f
BL
1536 if (size > ((sk->sk_sndbuf >> 1) - 64))
1537 size = (sk->sk_sndbuf >> 1) - 64;
1da177e4
LT
1538
1539 if (size > SKB_MAX_ALLOC)
1540 size = SKB_MAX_ALLOC;
ac7bfa62 1541
1da177e4
LT
1542 /*
1543 * Grab a buffer
1544 */
ac7bfa62 1545
6eba6a37
ED
1546 skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1547 &err);
1da177e4 1548
e27dfcea 1549 if (skb == NULL)
1da177e4
LT
1550 goto out_err;
1551
1552 /*
1553 * If you pass two values to the sock_alloc_send_skb
1554 * it tries to grab the large buffer with GFP_NOFS
1555 * (which can fail easily), and if it fails grab the
1556 * fallback size buffer which is under a page and will
1557 * succeed. [Alan]
1558 */
1559 size = min_t(int, size, skb_tailroom(skb));
1560
1561 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
6209344f
MS
1562 if (siocb->scm->fp) {
1563 err = unix_attach_fds(siocb->scm, skb);
1564 if (err) {
1565 kfree_skb(skb);
1566 goto out_err;
1567 }
1568 }
1da177e4 1569
6eba6a37
ED
1570 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1571 if (err) {
1da177e4
LT
1572 kfree_skb(skb);
1573 goto out_err;
1574 }
1575
1c92b4e5 1576 unix_state_lock(other);
1da177e4
LT
1577
1578 if (sock_flag(other, SOCK_DEAD) ||
1579 (other->sk_shutdown & RCV_SHUTDOWN))
1580 goto pipe_err_free;
1581
1582 skb_queue_tail(&other->sk_receive_queue, skb);
1c92b4e5 1583 unix_state_unlock(other);
1da177e4 1584 other->sk_data_ready(other, size);
e27dfcea 1585 sent += size;
1da177e4 1586 }
1da177e4
LT
1587
1588 scm_destroy(siocb->scm);
1589 siocb->scm = NULL;
1590
1591 return sent;
1592
1593pipe_err_free:
1c92b4e5 1594 unix_state_unlock(other);
1da177e4
LT
1595 kfree_skb(skb);
1596pipe_err:
6eba6a37
ED
1597 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1598 send_sig(SIGPIPE, current, 0);
1da177e4
LT
1599 err = -EPIPE;
1600out_err:
1da177e4
LT
1601 scm_destroy(siocb->scm);
1602 siocb->scm = NULL;
1603 return sent ? : err;
1604}
1605
1606static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1607 struct msghdr *msg, size_t len)
1608{
1609 int err;
1610 struct sock *sk = sock->sk;
ac7bfa62 1611
1da177e4
LT
1612 err = sock_error(sk);
1613 if (err)
1614 return err;
1615
1616 if (sk->sk_state != TCP_ESTABLISHED)
1617 return -ENOTCONN;
1618
1619 if (msg->msg_namelen)
1620 msg->msg_namelen = 0;
1621
1622 return unix_dgram_sendmsg(kiocb, sock, msg, len);
1623}
ac7bfa62 1624
1da177e4
LT
1625static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1626{
1627 struct unix_sock *u = unix_sk(sk);
1628
1629 msg->msg_namelen = 0;
1630 if (u->addr) {
1631 msg->msg_namelen = u->addr->len;
1632 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1633 }
1634}
1635
1636static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1637 struct msghdr *msg, size_t size,
1638 int flags)
1639{
1640 struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1641 struct scm_cookie tmp_scm;
1642 struct sock *sk = sock->sk;
1643 struct unix_sock *u = unix_sk(sk);
1644 int noblock = flags & MSG_DONTWAIT;
1645 struct sk_buff *skb;
1646 int err;
1647
1648 err = -EOPNOTSUPP;
1649 if (flags&MSG_OOB)
1650 goto out;
1651
1652 msg->msg_namelen = 0;
1653
57b47a53 1654 mutex_lock(&u->readlock);
1da177e4
LT
1655
1656 skb = skb_recv_datagram(sk, flags, noblock, &err);
0a112258
FZ
1657 if (!skb) {
1658 unix_state_lock(sk);
1659 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1660 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1661 (sk->sk_shutdown & RCV_SHUTDOWN))
1662 err = 0;
1663 unix_state_unlock(sk);
1da177e4 1664 goto out_unlock;
0a112258 1665 }
1da177e4 1666
71e20f18 1667 wake_up_interruptible_sync(&u->peer_wait);
1da177e4
LT
1668
1669 if (msg->msg_name)
1670 unix_copy_addr(msg, skb->sk);
1671
1672 if (size > skb->len)
1673 size = skb->len;
1674 else if (size < skb->len)
1675 msg->msg_flags |= MSG_TRUNC;
1676
1677 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1678 if (err)
1679 goto out_free;
1680
1681 if (!siocb->scm) {
1682 siocb->scm = &tmp_scm;
1683 memset(&tmp_scm, 0, sizeof(tmp_scm));
1684 }
1685 siocb->scm->creds = *UNIXCREDS(skb);
877ce7c1 1686 unix_set_secdata(siocb->scm, skb);
1da177e4 1687
6eba6a37 1688 if (!(flags & MSG_PEEK)) {
1da177e4
LT
1689 if (UNIXCB(skb).fp)
1690 unix_detach_fds(siocb->scm, skb);
6eba6a37 1691 } else {
1da177e4
LT
1692 /* It is questionable: on PEEK we could:
1693 - do not return fds - good, but too simple 8)
1694 - return fds, and do not return them on read (old strategy,
1695 apparently wrong)
1696 - clone fds (I chose it for now, it is the most universal
1697 solution)
ac7bfa62
YH
1698
1699 POSIX 1003.1g does not actually define this clearly
1700 at all. POSIX 1003.1g doesn't define a lot of things
1701 clearly however!
1702
1da177e4
LT
1703 */
1704 if (UNIXCB(skb).fp)
1705 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1706 }
1707 err = size;
1708
1709 scm_recv(sock, msg, siocb->scm, flags);
1710
1711out_free:
6eba6a37 1712 skb_free_datagram(sk, skb);
1da177e4 1713out_unlock:
57b47a53 1714 mutex_unlock(&u->readlock);
1da177e4
LT
1715out:
1716 return err;
1717}
1718
1719/*
1720 * Sleep until data has arrive. But check for races..
1721 */
ac7bfa62 1722
6eba6a37 1723static long unix_stream_data_wait(struct sock *sk, long timeo)
1da177e4
LT
1724{
1725 DEFINE_WAIT(wait);
1726
1c92b4e5 1727 unix_state_lock(sk);
1da177e4
LT
1728
1729 for (;;) {
1730 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1731
b03efcfb 1732 if (!skb_queue_empty(&sk->sk_receive_queue) ||
1da177e4
LT
1733 sk->sk_err ||
1734 (sk->sk_shutdown & RCV_SHUTDOWN) ||
1735 signal_pending(current) ||
1736 !timeo)
1737 break;
1738
1739 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1c92b4e5 1740 unix_state_unlock(sk);
1da177e4 1741 timeo = schedule_timeout(timeo);
1c92b4e5 1742 unix_state_lock(sk);
1da177e4
LT
1743 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1744 }
1745
1746 finish_wait(sk->sk_sleep, &wait);
1c92b4e5 1747 unix_state_unlock(sk);
1da177e4
LT
1748 return timeo;
1749}
1750
1751
1752
1753static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1754 struct msghdr *msg, size_t size,
1755 int flags)
1756{
1757 struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1758 struct scm_cookie tmp_scm;
1759 struct sock *sk = sock->sk;
1760 struct unix_sock *u = unix_sk(sk);
e27dfcea 1761 struct sockaddr_un *sunaddr = msg->msg_name;
1da177e4
LT
1762 int copied = 0;
1763 int check_creds = 0;
1764 int target;
1765 int err = 0;
1766 long timeo;
1767
1768 err = -EINVAL;
1769 if (sk->sk_state != TCP_ESTABLISHED)
1770 goto out;
1771
1772 err = -EOPNOTSUPP;
1773 if (flags&MSG_OOB)
1774 goto out;
1775
1776 target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1777 timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1778
1779 msg->msg_namelen = 0;
1780
1781 /* Lock the socket to prevent queue disordering
1782 * while sleeps in memcpy_tomsg
1783 */
1784
1785 if (!siocb->scm) {
1786 siocb->scm = &tmp_scm;
1787 memset(&tmp_scm, 0, sizeof(tmp_scm));
1788 }
1789
57b47a53 1790 mutex_lock(&u->readlock);
1da177e4 1791
6eba6a37 1792 do {
1da177e4
LT
1793 int chunk;
1794 struct sk_buff *skb;
1795
3c0d2f37 1796 unix_state_lock(sk);
1da177e4 1797 skb = skb_dequeue(&sk->sk_receive_queue);
6eba6a37 1798 if (skb == NULL) {
1da177e4 1799 if (copied >= target)
3c0d2f37 1800 goto unlock;
1da177e4
LT
1801
1802 /*
1803 * POSIX 1003.1g mandates this order.
1804 */
ac7bfa62 1805
6eba6a37
ED
1806 err = sock_error(sk);
1807 if (err)
3c0d2f37 1808 goto unlock;
1da177e4 1809 if (sk->sk_shutdown & RCV_SHUTDOWN)
3c0d2f37
MS
1810 goto unlock;
1811
1812 unix_state_unlock(sk);
1da177e4
LT
1813 err = -EAGAIN;
1814 if (!timeo)
1815 break;
57b47a53 1816 mutex_unlock(&u->readlock);
1da177e4
LT
1817
1818 timeo = unix_stream_data_wait(sk, timeo);
1819
1820 if (signal_pending(current)) {
1821 err = sock_intr_errno(timeo);
1822 goto out;
1823 }
57b47a53 1824 mutex_lock(&u->readlock);
1da177e4 1825 continue;
3c0d2f37
MS
1826 unlock:
1827 unix_state_unlock(sk);
1828 break;
1da177e4 1829 }
3c0d2f37 1830 unix_state_unlock(sk);
1da177e4
LT
1831
1832 if (check_creds) {
1833 /* Never glue messages from different writers */
6eba6a37
ED
1834 if (memcmp(UNIXCREDS(skb), &siocb->scm->creds,
1835 sizeof(siocb->scm->creds)) != 0) {
1da177e4
LT
1836 skb_queue_head(&sk->sk_receive_queue, skb);
1837 break;
1838 }
1839 } else {
1840 /* Copy credentials */
1841 siocb->scm->creds = *UNIXCREDS(skb);
1842 check_creds = 1;
1843 }
1844
1845 /* Copy address just once */
6eba6a37 1846 if (sunaddr) {
1da177e4
LT
1847 unix_copy_addr(msg, skb->sk);
1848 sunaddr = NULL;
1849 }
1850
1851 chunk = min_t(unsigned int, skb->len, size);
1852 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1853 skb_queue_head(&sk->sk_receive_queue, skb);
1854 if (copied == 0)
1855 copied = -EFAULT;
1856 break;
1857 }
1858 copied += chunk;
1859 size -= chunk;
1860
1861 /* Mark read part of skb as used */
6eba6a37 1862 if (!(flags & MSG_PEEK)) {
1da177e4
LT
1863 skb_pull(skb, chunk);
1864
1865 if (UNIXCB(skb).fp)
1866 unix_detach_fds(siocb->scm, skb);
1867
1868 /* put the skb back if we didn't use it up.. */
6eba6a37 1869 if (skb->len) {
1da177e4
LT
1870 skb_queue_head(&sk->sk_receive_queue, skb);
1871 break;
1872 }
1873
1874 kfree_skb(skb);
1875
1876 if (siocb->scm->fp)
1877 break;
6eba6a37 1878 } else {
1da177e4
LT
1879 /* It is questionable, see note in unix_dgram_recvmsg.
1880 */
1881 if (UNIXCB(skb).fp)
1882 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1883
1884 /* put message back and return */
1885 skb_queue_head(&sk->sk_receive_queue, skb);
1886 break;
1887 }
1888 } while (size);
1889
57b47a53 1890 mutex_unlock(&u->readlock);
1da177e4
LT
1891 scm_recv(sock, msg, siocb->scm, flags);
1892out:
1893 return copied ? : err;
1894}
1895
1896static int unix_shutdown(struct socket *sock, int mode)
1897{
1898 struct sock *sk = sock->sk;
1899 struct sock *other;
1900
1901 mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1902
1903 if (mode) {
1c92b4e5 1904 unix_state_lock(sk);
1da177e4 1905 sk->sk_shutdown |= mode;
e27dfcea 1906 other = unix_peer(sk);
1da177e4
LT
1907 if (other)
1908 sock_hold(other);
1c92b4e5 1909 unix_state_unlock(sk);
1da177e4
LT
1910 sk->sk_state_change(sk);
1911
1912 if (other &&
1913 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1914
1915 int peer_mode = 0;
1916
1917 if (mode&RCV_SHUTDOWN)
1918 peer_mode |= SEND_SHUTDOWN;
1919 if (mode&SEND_SHUTDOWN)
1920 peer_mode |= RCV_SHUTDOWN;
1c92b4e5 1921 unix_state_lock(other);
1da177e4 1922 other->sk_shutdown |= peer_mode;
1c92b4e5 1923 unix_state_unlock(other);
1da177e4
LT
1924 other->sk_state_change(other);
1925 read_lock(&other->sk_callback_lock);
1926 if (peer_mode == SHUTDOWN_MASK)
8d8ad9d7 1927 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
1da177e4 1928 else if (peer_mode & RCV_SHUTDOWN)
8d8ad9d7 1929 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1da177e4
LT
1930 read_unlock(&other->sk_callback_lock);
1931 }
1932 if (other)
1933 sock_put(other);
1934 }
1935 return 0;
1936}
1937
1938static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1939{
1940 struct sock *sk = sock->sk;
e27dfcea 1941 long amount = 0;
1da177e4
LT
1942 int err;
1943
6eba6a37
ED
1944 switch (cmd) {
1945 case SIOCOUTQ:
1946 amount = atomic_read(&sk->sk_wmem_alloc);
1947 err = put_user(amount, (int __user *)arg);
1948 break;
1949 case SIOCINQ:
1da177e4
LT
1950 {
1951 struct sk_buff *skb;
1952
1953 if (sk->sk_state == TCP_LISTEN) {
1954 err = -EINVAL;
1955 break;
1956 }
1957
1958 spin_lock(&sk->sk_receive_queue.lock);
1959 if (sk->sk_type == SOCK_STREAM ||
1960 sk->sk_type == SOCK_SEQPACKET) {
1961 skb_queue_walk(&sk->sk_receive_queue, skb)
1962 amount += skb->len;
1963 } else {
1964 skb = skb_peek(&sk->sk_receive_queue);
1965 if (skb)
e27dfcea 1966 amount = skb->len;
1da177e4
LT
1967 }
1968 spin_unlock(&sk->sk_receive_queue.lock);
1969 err = put_user(amount, (int __user *)arg);
1970 break;
1971 }
1972
6eba6a37
ED
1973 default:
1974 err = -ENOIOCTLCMD;
1975 break;
1da177e4
LT
1976 }
1977 return err;
1978}
1979
6eba6a37 1980static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
1da177e4
LT
1981{
1982 struct sock *sk = sock->sk;
1983 unsigned int mask;
1984
1985 poll_wait(file, sk->sk_sleep, wait);
1986 mask = 0;
1987
1988 /* exceptional events? */
1989 if (sk->sk_err)
1990 mask |= POLLERR;
1991 if (sk->sk_shutdown == SHUTDOWN_MASK)
1992 mask |= POLLHUP;
f348d70a
DL
1993 if (sk->sk_shutdown & RCV_SHUTDOWN)
1994 mask |= POLLRDHUP;
1da177e4
LT
1995
1996 /* readable? */
1997 if (!skb_queue_empty(&sk->sk_receive_queue) ||
1998 (sk->sk_shutdown & RCV_SHUTDOWN))
1999 mask |= POLLIN | POLLRDNORM;
2000
2001 /* Connection-based need to check for termination and startup */
6eba6a37
ED
2002 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2003 sk->sk_state == TCP_CLOSE)
1da177e4
LT
2004 mask |= POLLHUP;
2005
2006 /*
2007 * we set writable also when the other side has shut down the
2008 * connection. This prevents stuck sockets.
2009 */
2010 if (unix_writable(sk))
2011 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2012
2013 return mask;
2014}
2015
ec0d215f
RW
2016static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2017 poll_table *wait)
3c73419c 2018{
ec0d215f
RW
2019 struct sock *sk = sock->sk, *other;
2020 unsigned int mask, writable;
3c73419c
RW
2021
2022 poll_wait(file, sk->sk_sleep, wait);
3c73419c
RW
2023 mask = 0;
2024
2025 /* exceptional events? */
2026 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2027 mask |= POLLERR;
2028 if (sk->sk_shutdown & RCV_SHUTDOWN)
2029 mask |= POLLRDHUP;
2030 if (sk->sk_shutdown == SHUTDOWN_MASK)
2031 mask |= POLLHUP;
2032
2033 /* readable? */
2034 if (!skb_queue_empty(&sk->sk_receive_queue) ||
2035 (sk->sk_shutdown & RCV_SHUTDOWN))
2036 mask |= POLLIN | POLLRDNORM;
2037
2038 /* Connection-based need to check for termination and startup */
2039 if (sk->sk_type == SOCK_SEQPACKET) {
2040 if (sk->sk_state == TCP_CLOSE)
2041 mask |= POLLHUP;
2042 /* connection hasn't started yet? */
2043 if (sk->sk_state == TCP_SYN_SENT)
2044 return mask;
2045 }
2046
2047 /* writable? */
ec0d215f
RW
2048 writable = unix_writable(sk);
2049 if (writable) {
2050 other = unix_peer_get(sk);
2051 if (other) {
2052 if (unix_peer(other) != sk) {
2053 poll_wait(file, &unix_sk(other)->peer_wait,
2054 wait);
2055 if (unix_recvq_full(other))
2056 writable = 0;
2057 }
2058
2059 sock_put(other);
2060 }
2061 }
2062
2063 if (writable)
3c73419c
RW
2064 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2065 else
2066 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2067
3c73419c
RW
2068 return mask;
2069}
1da177e4
LT
2070
2071#ifdef CONFIG_PROC_FS
a53eb3fe
PE
2072static struct sock *first_unix_socket(int *i)
2073{
2074 for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
2075 if (!hlist_empty(&unix_socket_table[*i]))
2076 return __sk_head(&unix_socket_table[*i]);
2077 }
2078 return NULL;
2079}
2080
2081static struct sock *next_unix_socket(int *i, struct sock *s)
2082{
2083 struct sock *next = sk_next(s);
2084 /* More in this chain? */
2085 if (next)
2086 return next;
2087 /* Look for next non-empty chain. */
2088 for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2089 if (!hlist_empty(&unix_socket_table[*i]))
2090 return __sk_head(&unix_socket_table[*i]);
2091 }
2092 return NULL;
2093}
2094
097e66c5 2095struct unix_iter_state {
e372c414 2096 struct seq_net_private p;
097e66c5
DL
2097 int i;
2098};
e27dfcea 2099
1218854a 2100static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
1da177e4 2101{
1218854a 2102 struct unix_iter_state *iter = seq->private;
1da177e4
LT
2103 loff_t off = 0;
2104 struct sock *s;
2105
097e66c5 2106 for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
1218854a 2107 if (sock_net(s) != seq_file_net(seq))
097e66c5 2108 continue;
ac7bfa62 2109 if (off == pos)
1da177e4
LT
2110 return s;
2111 ++off;
2112 }
2113 return NULL;
2114}
2115
1da177e4 2116static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
9a429c49 2117 __acquires(unix_table_lock)
1da177e4 2118{
fbe9cc4a 2119 spin_lock(&unix_table_lock);
b9f3124f 2120 return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
1da177e4
LT
2121}
2122
2123static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2124{
097e66c5
DL
2125 struct unix_iter_state *iter = seq->private;
2126 struct sock *sk = v;
1da177e4
LT
2127 ++*pos;
2128
b9f3124f 2129 if (v == SEQ_START_TOKEN)
097e66c5
DL
2130 sk = first_unix_socket(&iter->i);
2131 else
2132 sk = next_unix_socket(&iter->i, sk);
1218854a 2133 while (sk && (sock_net(sk) != seq_file_net(seq)))
097e66c5
DL
2134 sk = next_unix_socket(&iter->i, sk);
2135 return sk;
1da177e4
LT
2136}
2137
2138static void unix_seq_stop(struct seq_file *seq, void *v)
9a429c49 2139 __releases(unix_table_lock)
1da177e4 2140{
fbe9cc4a 2141 spin_unlock(&unix_table_lock);
1da177e4
LT
2142}
2143
2144static int unix_seq_show(struct seq_file *seq, void *v)
2145{
ac7bfa62 2146
b9f3124f 2147 if (v == SEQ_START_TOKEN)
1da177e4
LT
2148 seq_puts(seq, "Num RefCount Protocol Flags Type St "
2149 "Inode Path\n");
2150 else {
2151 struct sock *s = v;
2152 struct unix_sock *u = unix_sk(s);
1c92b4e5 2153 unix_state_lock(s);
1da177e4
LT
2154
2155 seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
2156 s,
2157 atomic_read(&s->sk_refcnt),
2158 0,
2159 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2160 s->sk_type,
2161 s->sk_socket ?
2162 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2163 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2164 sock_i_ino(s));
2165
2166 if (u->addr) {
2167 int i, len;
2168 seq_putc(seq, ' ');
2169
2170 i = 0;
2171 len = u->addr->len - sizeof(short);
2172 if (!UNIX_ABSTRACT(s))
2173 len--;
2174 else {
2175 seq_putc(seq, '@');
2176 i++;
2177 }
2178 for ( ; i < len; i++)
2179 seq_putc(seq, u->addr->name->sun_path[i]);
2180 }
1c92b4e5 2181 unix_state_unlock(s);
1da177e4
LT
2182 seq_putc(seq, '\n');
2183 }
2184
2185 return 0;
2186}
2187
56b3d975 2188static const struct seq_operations unix_seq_ops = {
1da177e4
LT
2189 .start = unix_seq_start,
2190 .next = unix_seq_next,
2191 .stop = unix_seq_stop,
2192 .show = unix_seq_show,
2193};
2194
1da177e4
LT
2195static int unix_seq_open(struct inode *inode, struct file *file)
2196{
e372c414
DL
2197 return seq_open_net(inode, file, &unix_seq_ops,
2198 sizeof(struct unix_iter_state));
1da177e4
LT
2199}
2200
da7071d7 2201static const struct file_operations unix_seq_fops = {
1da177e4
LT
2202 .owner = THIS_MODULE,
2203 .open = unix_seq_open,
2204 .read = seq_read,
2205 .llseek = seq_lseek,
e372c414 2206 .release = seq_release_net,
1da177e4
LT
2207};
2208
2209#endif
2210
2211static struct net_proto_family unix_family_ops = {
2212 .family = PF_UNIX,
2213 .create = unix_create,
2214 .owner = THIS_MODULE,
2215};
2216
097e66c5
DL
2217
2218static int unix_net_init(struct net *net)
2219{
2220 int error = -ENOMEM;
2221
a0a53c8b 2222 net->unx.sysctl_max_dgram_qlen = 10;
1597fbc0
PE
2223 if (unix_sysctl_register(net))
2224 goto out;
d392e497 2225
097e66c5 2226#ifdef CONFIG_PROC_FS
1597fbc0
PE
2227 if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2228 unix_sysctl_unregister(net);
097e66c5 2229 goto out;
1597fbc0 2230 }
097e66c5
DL
2231#endif
2232 error = 0;
2233out:
48dcc33e 2234 return error;
097e66c5
DL
2235}
2236
2237static void unix_net_exit(struct net *net)
2238{
1597fbc0 2239 unix_sysctl_unregister(net);
097e66c5
DL
2240 proc_net_remove(net, "unix");
2241}
2242
2243static struct pernet_operations unix_net_ops = {
2244 .init = unix_net_init,
2245 .exit = unix_net_exit,
2246};
2247
1da177e4
LT
2248static int __init af_unix_init(void)
2249{
2250 int rc = -1;
2251 struct sk_buff *dummy_skb;
2252
ef047f5e 2253 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
1da177e4
LT
2254
2255 rc = proto_register(&unix_proto, 1);
ac7bfa62
YH
2256 if (rc != 0) {
2257 printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
0dc47877 2258 __func__);
1da177e4
LT
2259 goto out;
2260 }
2261
2262 sock_register(&unix_family_ops);
097e66c5 2263 register_pernet_subsys(&unix_net_ops);
1da177e4
LT
2264out:
2265 return rc;
2266}
2267
2268static void __exit af_unix_exit(void)
2269{
2270 sock_unregister(PF_UNIX);
1da177e4 2271 proto_unregister(&unix_proto);
097e66c5 2272 unregister_pernet_subsys(&unix_net_ops);
1da177e4
LT
2273}
2274
3d366960
DW
2275/* Earlier than device_initcall() so that other drivers invoking
2276 request_module() don't end up in a loop when modprobe tries
2277 to use a UNIX socket. But later than subsys_initcall() because
2278 we depend on stuff initialised there */
2279fs_initcall(af_unix_init);
1da177e4
LT
2280module_exit(af_unix_exit);
2281
2282MODULE_LICENSE("GPL");
2283MODULE_ALIAS_NETPROTO(PF_UNIX);