[NET]: Consolidate net namespace related proc files creation.
[linux-2.6-block.git] / net / unix / af_unix.c
CommitLineData
1da177e4
LT
1/*
2 * NET4: Implementation of BSD Unix domain sockets.
3 *
4 * Authors: Alan Cox, <alan.cox@linux.org>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 * Version: $Id: af_unix.c,v 1.133 2002/02/08 03:57:19 davem Exp $
12 *
13 * Fixes:
14 * Linus Torvalds : Assorted bug cures.
15 * Niibe Yutaka : async I/O support.
16 * Carsten Paeth : PF_UNIX check, address fixes.
17 * Alan Cox : Limit size of allocated blocks.
18 * Alan Cox : Fixed the stupid socketpair bug.
19 * Alan Cox : BSD compatibility fine tuning.
20 * Alan Cox : Fixed a bug in connect when interrupted.
21 * Alan Cox : Sorted out a proper draft version of
22 * file descriptor passing hacked up from
23 * Mike Shaver's work.
24 * Marty Leisner : Fixes to fd passing
25 * Nick Nevin : recvmsg bugfix.
26 * Alan Cox : Started proper garbage collector
27 * Heiko EiBfeldt : Missing verify_area check
28 * Alan Cox : Started POSIXisms
29 * Andreas Schwab : Replace inode by dentry for proper
30 * reference counting
31 * Kirk Petersen : Made this a module
32 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
33 * Lots of bug fixes.
34 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
35 * by above two patches.
36 * Andrea Arcangeli : If possible we block in connect(2)
37 * if the max backlog of the listen socket
38 * is been reached. This won't break
39 * old apps and it will avoid huge amount
40 * of socks hashed (this for unix_gc()
41 * performances reasons).
42 * Security fix that limits the max
43 * number of socks to 2*max_files and
44 * the number of skb queueable in the
45 * dgram receiver.
46 * Artur Skawina : Hash function optimizations
47 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
48 * Malcolm Beattie : Set peercred for socketpair
49 * Michal Ostrowski : Module initialization cleanup.
50 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
51 * the core infrastructure is doing that
52 * for all net proto families now (2.5.69+)
53 *
54 *
55 * Known differences from reference BSD that was tested:
56 *
57 * [TO FIX]
58 * ECONNREFUSED is not returned from one end of a connected() socket to the
59 * other the moment one end closes.
60 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
61 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
62 * [NOT TO FIX]
63 * accept() returns a path name even if the connecting socket has closed
64 * in the meantime (BSD loses the path and gives up).
65 * accept() returns 0 length path for an unbound connector. BSD returns 16
66 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
67 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
68 * BSD af_unix apparently has connect forgetting to block properly.
69 * (need to check this with the POSIX spec in detail)
70 *
71 * Differences from 2.0.0-11-... (ANK)
72 * Bug fixes and improvements.
73 * - client shutdown killed server socket.
74 * - removed all useless cli/sti pairs.
75 *
76 * Semantic changes/extensions.
77 * - generic control message passing.
78 * - SCM_CREDENTIALS control message.
79 * - "Abstract" (not FS based) socket bindings.
80 * Abstract names are sequences of bytes (not zero terminated)
81 * started by 0, so that this name space does not intersect
82 * with BSD names.
83 */
84
85#include <linux/module.h>
1da177e4 86#include <linux/kernel.h>
1da177e4
LT
87#include <linux/signal.h>
88#include <linux/sched.h>
89#include <linux/errno.h>
90#include <linux/string.h>
91#include <linux/stat.h>
92#include <linux/dcache.h>
93#include <linux/namei.h>
94#include <linux/socket.h>
95#include <linux/un.h>
96#include <linux/fcntl.h>
97#include <linux/termios.h>
98#include <linux/sockios.h>
99#include <linux/net.h>
100#include <linux/in.h>
101#include <linux/fs.h>
102#include <linux/slab.h>
103#include <asm/uaccess.h>
104#include <linux/skbuff.h>
105#include <linux/netdevice.h>
457c4cbc 106#include <net/net_namespace.h>
1da177e4 107#include <net/sock.h>
c752f073 108#include <net/tcp_states.h>
1da177e4
LT
109#include <net/af_unix.h>
110#include <linux/proc_fs.h>
111#include <linux/seq_file.h>
112#include <net/scm.h>
113#include <linux/init.h>
114#include <linux/poll.h>
1da177e4
LT
115#include <linux/rtnetlink.h>
116#include <linux/mount.h>
117#include <net/checksum.h>
118#include <linux/security.h>
119
18adaf06 120int sysctl_unix_max_dgram_qlen __read_mostly = 10;
1da177e4 121
13111698
AB
122static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
123static DEFINE_SPINLOCK(unix_table_lock);
1da177e4
LT
124static atomic_t unix_nr_socks = ATOMIC_INIT(0);
125
126#define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE])
127
128#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
129
13111698
AB
130static struct sock *first_unix_socket(int *i)
131{
132 for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
133 if (!hlist_empty(&unix_socket_table[*i]))
134 return __sk_head(&unix_socket_table[*i]);
135 }
136 return NULL;
137}
138
139static struct sock *next_unix_socket(int *i, struct sock *s)
140{
141 struct sock *next = sk_next(s);
142 /* More in this chain? */
143 if (next)
144 return next;
145 /* Look for next non-empty chain. */
146 for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
147 if (!hlist_empty(&unix_socket_table[*i]))
148 return __sk_head(&unix_socket_table[*i]);
149 }
150 return NULL;
151}
152
153#define forall_unix_sockets(i, s) \
154 for (s = first_unix_socket(&(i)); s; s = next_unix_socket(&(i),(s)))
155
877ce7c1 156#ifdef CONFIG_SECURITY_NETWORK
dc49c1f9 157static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
877ce7c1 158{
dc49c1f9 159 memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
877ce7c1
CZ
160}
161
162static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
163{
dc49c1f9 164 scm->secid = *UNIXSID(skb);
877ce7c1
CZ
165}
166#else
dc49c1f9 167static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
877ce7c1
CZ
168{ }
169
170static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
171{ }
172#endif /* CONFIG_SECURITY_NETWORK */
173
1da177e4
LT
174/*
175 * SMP locking strategy:
fbe9cc4a 176 * hash table is protected with spinlock unix_table_lock
1da177e4
LT
177 * each socket state is protected by separate rwlock.
178 */
179
44bb9363 180static inline unsigned unix_hash_fold(__wsum n)
1da177e4 181{
44bb9363 182 unsigned hash = (__force unsigned)n;
1da177e4
LT
183 hash ^= hash>>16;
184 hash ^= hash>>8;
185 return hash&(UNIX_HASH_SIZE-1);
186}
187
188#define unix_peer(sk) (unix_sk(sk)->peer)
189
190static inline int unix_our_peer(struct sock *sk, struct sock *osk)
191{
192 return unix_peer(osk) == sk;
193}
194
195static inline int unix_may_send(struct sock *sk, struct sock *osk)
196{
197 return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
198}
199
200static struct sock *unix_peer_get(struct sock *s)
201{
202 struct sock *peer;
203
1c92b4e5 204 unix_state_lock(s);
1da177e4
LT
205 peer = unix_peer(s);
206 if (peer)
207 sock_hold(peer);
1c92b4e5 208 unix_state_unlock(s);
1da177e4
LT
209 return peer;
210}
211
212static inline void unix_release_addr(struct unix_address *addr)
213{
214 if (atomic_dec_and_test(&addr->refcnt))
215 kfree(addr);
216}
217
218/*
219 * Check unix socket name:
220 * - should be not zero length.
221 * - if started by not zero, should be NULL terminated (FS object)
222 * - if started by zero, it is abstract name.
223 */
ac7bfa62 224
1da177e4
LT
225static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp)
226{
227 if (len <= sizeof(short) || len > sizeof(*sunaddr))
228 return -EINVAL;
229 if (!sunaddr || sunaddr->sun_family != AF_UNIX)
230 return -EINVAL;
231 if (sunaddr->sun_path[0]) {
232 /*
233 * This may look like an off by one error but it is a bit more
234 * subtle. 108 is the longest valid AF_UNIX path for a binding.
235 * sun_path[108] doesnt as such exist. However in kernel space
236 * we are guaranteed that it is a valid memory location in our
237 * kernel address buffer.
238 */
239 ((char *)sunaddr)[len]=0;
240 len = strlen(sunaddr->sun_path)+1+sizeof(short);
241 return len;
242 }
243
244 *hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0));
245 return len;
246}
247
248static void __unix_remove_socket(struct sock *sk)
249{
250 sk_del_node_init(sk);
251}
252
253static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
254{
255 BUG_TRAP(sk_unhashed(sk));
256 sk_add_node(sk, list);
257}
258
259static inline void unix_remove_socket(struct sock *sk)
260{
fbe9cc4a 261 spin_lock(&unix_table_lock);
1da177e4 262 __unix_remove_socket(sk);
fbe9cc4a 263 spin_unlock(&unix_table_lock);
1da177e4
LT
264}
265
266static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
267{
fbe9cc4a 268 spin_lock(&unix_table_lock);
1da177e4 269 __unix_insert_socket(list, sk);
fbe9cc4a 270 spin_unlock(&unix_table_lock);
1da177e4
LT
271}
272
097e66c5
DL
273static struct sock *__unix_find_socket_byname(struct net *net,
274 struct sockaddr_un *sunname,
1da177e4
LT
275 int len, int type, unsigned hash)
276{
277 struct sock *s;
278 struct hlist_node *node;
279
280 sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
281 struct unix_sock *u = unix_sk(s);
282
097e66c5
DL
283 if (s->sk_net != net)
284 continue;
285
1da177e4
LT
286 if (u->addr->len == len &&
287 !memcmp(u->addr->name, sunname, len))
288 goto found;
289 }
290 s = NULL;
291found:
292 return s;
293}
294
097e66c5
DL
295static inline struct sock *unix_find_socket_byname(struct net *net,
296 struct sockaddr_un *sunname,
1da177e4
LT
297 int len, int type,
298 unsigned hash)
299{
300 struct sock *s;
301
fbe9cc4a 302 spin_lock(&unix_table_lock);
097e66c5 303 s = __unix_find_socket_byname(net, sunname, len, type, hash);
1da177e4
LT
304 if (s)
305 sock_hold(s);
fbe9cc4a 306 spin_unlock(&unix_table_lock);
1da177e4
LT
307 return s;
308}
309
097e66c5 310static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
1da177e4
LT
311{
312 struct sock *s;
313 struct hlist_node *node;
314
fbe9cc4a 315 spin_lock(&unix_table_lock);
1da177e4
LT
316 sk_for_each(s, node,
317 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
318 struct dentry *dentry = unix_sk(s)->dentry;
319
097e66c5
DL
320 if (s->sk_net != net)
321 continue;
322
1da177e4
LT
323 if(dentry && dentry->d_inode == i)
324 {
325 sock_hold(s);
326 goto found;
327 }
328 }
329 s = NULL;
330found:
fbe9cc4a 331 spin_unlock(&unix_table_lock);
1da177e4
LT
332 return s;
333}
334
335static inline int unix_writable(struct sock *sk)
336{
337 return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
338}
339
340static void unix_write_space(struct sock *sk)
341{
342 read_lock(&sk->sk_callback_lock);
343 if (unix_writable(sk)) {
344 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
71e20f18 345 wake_up_interruptible_sync(sk->sk_sleep);
1da177e4
LT
346 sk_wake_async(sk, 2, POLL_OUT);
347 }
348 read_unlock(&sk->sk_callback_lock);
349}
350
351/* When dgram socket disconnects (or changes its peer), we clear its receive
352 * queue of packets arrived from previous peer. First, it allows to do
353 * flow control based only on wmem_alloc; second, sk connected to peer
354 * may receive messages only from that peer. */
355static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
356{
b03efcfb 357 if (!skb_queue_empty(&sk->sk_receive_queue)) {
1da177e4
LT
358 skb_queue_purge(&sk->sk_receive_queue);
359 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
360
361 /* If one link of bidirectional dgram pipe is disconnected,
362 * we signal error. Messages are lost. Do not make this,
363 * when peer was not connected to us.
364 */
365 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
366 other->sk_err = ECONNRESET;
367 other->sk_error_report(other);
368 }
369 }
370}
371
372static void unix_sock_destructor(struct sock *sk)
373{
374 struct unix_sock *u = unix_sk(sk);
375
376 skb_queue_purge(&sk->sk_receive_queue);
377
378 BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
379 BUG_TRAP(sk_unhashed(sk));
380 BUG_TRAP(!sk->sk_socket);
381 if (!sock_flag(sk, SOCK_DEAD)) {
382 printk("Attempt to release alive unix socket: %p\n", sk);
383 return;
384 }
385
386 if (u->addr)
387 unix_release_addr(u->addr);
388
389 atomic_dec(&unix_nr_socks);
390#ifdef UNIX_REFCNT_DEBUG
391 printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks));
392#endif
393}
394
395static int unix_release_sock (struct sock *sk, int embrion)
396{
397 struct unix_sock *u = unix_sk(sk);
398 struct dentry *dentry;
399 struct vfsmount *mnt;
400 struct sock *skpair;
401 struct sk_buff *skb;
402 int state;
403
404 unix_remove_socket(sk);
405
406 /* Clear state */
1c92b4e5 407 unix_state_lock(sk);
1da177e4
LT
408 sock_orphan(sk);
409 sk->sk_shutdown = SHUTDOWN_MASK;
410 dentry = u->dentry;
411 u->dentry = NULL;
412 mnt = u->mnt;
413 u->mnt = NULL;
414 state = sk->sk_state;
415 sk->sk_state = TCP_CLOSE;
1c92b4e5 416 unix_state_unlock(sk);
1da177e4
LT
417
418 wake_up_interruptible_all(&u->peer_wait);
419
420 skpair=unix_peer(sk);
421
422 if (skpair!=NULL) {
423 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
1c92b4e5 424 unix_state_lock(skpair);
1da177e4
LT
425 /* No more writes */
426 skpair->sk_shutdown = SHUTDOWN_MASK;
427 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
428 skpair->sk_err = ECONNRESET;
1c92b4e5 429 unix_state_unlock(skpair);
1da177e4
LT
430 skpair->sk_state_change(skpair);
431 read_lock(&skpair->sk_callback_lock);
432 sk_wake_async(skpair,1,POLL_HUP);
433 read_unlock(&skpair->sk_callback_lock);
434 }
435 sock_put(skpair); /* It may now die */
436 unix_peer(sk) = NULL;
437 }
438
439 /* Try to flush out this socket. Throw out buffers at least */
440
441 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
442 if (state==TCP_LISTEN)
443 unix_release_sock(skb->sk, 1);
444 /* passed fds are erased in the kfree_skb hook */
445 kfree_skb(skb);
446 }
447
448 if (dentry) {
449 dput(dentry);
450 mntput(mnt);
451 }
452
453 sock_put(sk);
454
455 /* ---- Socket is dead now and most probably destroyed ---- */
456
457 /*
458 * Fixme: BSD difference: In BSD all sockets connected to use get
459 * ECONNRESET and we die on the spot. In Linux we behave
460 * like files and pipes do and wait for the last
461 * dereference.
462 *
463 * Can't we simply set sock->err?
464 *
465 * What the above comment does talk about? --ANK(980817)
466 */
467
9305cfa4 468 if (unix_tot_inflight)
ac7bfa62 469 unix_gc(); /* Garbage collect fds */
1da177e4
LT
470
471 return 0;
472}
473
474static int unix_listen(struct socket *sock, int backlog)
475{
476 int err;
477 struct sock *sk = sock->sk;
478 struct unix_sock *u = unix_sk(sk);
479
480 err = -EOPNOTSUPP;
481 if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
482 goto out; /* Only stream/seqpacket sockets accept */
483 err = -EINVAL;
484 if (!u->addr)
485 goto out; /* No listens on an unbound socket */
1c92b4e5 486 unix_state_lock(sk);
1da177e4
LT
487 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
488 goto out_unlock;
489 if (backlog > sk->sk_max_ack_backlog)
490 wake_up_interruptible_all(&u->peer_wait);
491 sk->sk_max_ack_backlog = backlog;
492 sk->sk_state = TCP_LISTEN;
493 /* set credentials so connect can copy them */
b488893a 494 sk->sk_peercred.pid = task_tgid_vnr(current);
1da177e4
LT
495 sk->sk_peercred.uid = current->euid;
496 sk->sk_peercred.gid = current->egid;
497 err = 0;
498
499out_unlock:
1c92b4e5 500 unix_state_unlock(sk);
1da177e4
LT
501out:
502 return err;
503}
504
505static int unix_release(struct socket *);
506static int unix_bind(struct socket *, struct sockaddr *, int);
507static int unix_stream_connect(struct socket *, struct sockaddr *,
508 int addr_len, int flags);
509static int unix_socketpair(struct socket *, struct socket *);
510static int unix_accept(struct socket *, struct socket *, int);
511static int unix_getname(struct socket *, struct sockaddr *, int *, int);
512static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
513static int unix_ioctl(struct socket *, unsigned int, unsigned long);
514static int unix_shutdown(struct socket *, int);
515static int unix_stream_sendmsg(struct kiocb *, struct socket *,
516 struct msghdr *, size_t);
517static int unix_stream_recvmsg(struct kiocb *, struct socket *,
518 struct msghdr *, size_t, int);
519static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
520 struct msghdr *, size_t);
521static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
522 struct msghdr *, size_t, int);
523static int unix_dgram_connect(struct socket *, struct sockaddr *,
524 int, int);
525static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
526 struct msghdr *, size_t);
527
90ddc4f0 528static const struct proto_ops unix_stream_ops = {
1da177e4
LT
529 .family = PF_UNIX,
530 .owner = THIS_MODULE,
531 .release = unix_release,
532 .bind = unix_bind,
533 .connect = unix_stream_connect,
534 .socketpair = unix_socketpair,
535 .accept = unix_accept,
536 .getname = unix_getname,
537 .poll = unix_poll,
538 .ioctl = unix_ioctl,
539 .listen = unix_listen,
540 .shutdown = unix_shutdown,
541 .setsockopt = sock_no_setsockopt,
542 .getsockopt = sock_no_getsockopt,
543 .sendmsg = unix_stream_sendmsg,
544 .recvmsg = unix_stream_recvmsg,
545 .mmap = sock_no_mmap,
546 .sendpage = sock_no_sendpage,
547};
548
90ddc4f0 549static const struct proto_ops unix_dgram_ops = {
1da177e4
LT
550 .family = PF_UNIX,
551 .owner = THIS_MODULE,
552 .release = unix_release,
553 .bind = unix_bind,
554 .connect = unix_dgram_connect,
555 .socketpair = unix_socketpair,
556 .accept = sock_no_accept,
557 .getname = unix_getname,
558 .poll = datagram_poll,
559 .ioctl = unix_ioctl,
560 .listen = sock_no_listen,
561 .shutdown = unix_shutdown,
562 .setsockopt = sock_no_setsockopt,
563 .getsockopt = sock_no_getsockopt,
564 .sendmsg = unix_dgram_sendmsg,
565 .recvmsg = unix_dgram_recvmsg,
566 .mmap = sock_no_mmap,
567 .sendpage = sock_no_sendpage,
568};
569
90ddc4f0 570static const struct proto_ops unix_seqpacket_ops = {
1da177e4
LT
571 .family = PF_UNIX,
572 .owner = THIS_MODULE,
573 .release = unix_release,
574 .bind = unix_bind,
575 .connect = unix_stream_connect,
576 .socketpair = unix_socketpair,
577 .accept = unix_accept,
578 .getname = unix_getname,
579 .poll = datagram_poll,
580 .ioctl = unix_ioctl,
581 .listen = unix_listen,
582 .shutdown = unix_shutdown,
583 .setsockopt = sock_no_setsockopt,
584 .getsockopt = sock_no_getsockopt,
585 .sendmsg = unix_seqpacket_sendmsg,
586 .recvmsg = unix_dgram_recvmsg,
587 .mmap = sock_no_mmap,
588 .sendpage = sock_no_sendpage,
589};
590
591static struct proto unix_proto = {
592 .name = "UNIX",
593 .owner = THIS_MODULE,
594 .obj_size = sizeof(struct unix_sock),
595};
596
a09785a2
IM
597/*
598 * AF_UNIX sockets do not interact with hardware, hence they
599 * dont trigger interrupts - so it's safe for them to have
600 * bh-unsafe locking for their sk_receive_queue.lock. Split off
601 * this special lock-class by reinitializing the spinlock key:
602 */
603static struct lock_class_key af_unix_sk_receive_queue_lock_key;
604
1b8d7ae4 605static struct sock * unix_create1(struct net *net, struct socket *sock)
1da177e4
LT
606{
607 struct sock *sk = NULL;
608 struct unix_sock *u;
609
284b327b
PE
610 atomic_inc(&unix_nr_socks);
611 if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
1da177e4
LT
612 goto out;
613
6257ff21 614 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
1da177e4
LT
615 if (!sk)
616 goto out;
617
1da177e4 618 sock_init_data(sock,sk);
a09785a2
IM
619 lockdep_set_class(&sk->sk_receive_queue.lock,
620 &af_unix_sk_receive_queue_lock_key);
1da177e4
LT
621
622 sk->sk_write_space = unix_write_space;
623 sk->sk_max_ack_backlog = sysctl_unix_max_dgram_qlen;
624 sk->sk_destruct = unix_sock_destructor;
625 u = unix_sk(sk);
626 u->dentry = NULL;
627 u->mnt = NULL;
fd19f329 628 spin_lock_init(&u->lock);
1fd05ba5
MS
629 atomic_set(&u->inflight, 0);
630 INIT_LIST_HEAD(&u->link);
57b47a53 631 mutex_init(&u->readlock); /* single task reading lock */
1da177e4
LT
632 init_waitqueue_head(&u->peer_wait);
633 unix_insert_socket(unix_sockets_unbound, sk);
634out:
284b327b
PE
635 if (sk == NULL)
636 atomic_dec(&unix_nr_socks);
1da177e4
LT
637 return sk;
638}
639
1b8d7ae4 640static int unix_create(struct net *net, struct socket *sock, int protocol)
1da177e4
LT
641{
642 if (protocol && protocol != PF_UNIX)
643 return -EPROTONOSUPPORT;
644
645 sock->state = SS_UNCONNECTED;
646
647 switch (sock->type) {
648 case SOCK_STREAM:
649 sock->ops = &unix_stream_ops;
650 break;
651 /*
652 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
653 * nothing uses it.
654 */
655 case SOCK_RAW:
656 sock->type=SOCK_DGRAM;
657 case SOCK_DGRAM:
658 sock->ops = &unix_dgram_ops;
659 break;
660 case SOCK_SEQPACKET:
661 sock->ops = &unix_seqpacket_ops;
662 break;
663 default:
664 return -ESOCKTNOSUPPORT;
665 }
666
1b8d7ae4 667 return unix_create1(net, sock) ? 0 : -ENOMEM;
1da177e4
LT
668}
669
670static int unix_release(struct socket *sock)
671{
672 struct sock *sk = sock->sk;
673
674 if (!sk)
675 return 0;
676
677 sock->sk = NULL;
678
679 return unix_release_sock (sk, 0);
680}
681
682static int unix_autobind(struct socket *sock)
683{
684 struct sock *sk = sock->sk;
097e66c5 685 struct net *net = sk->sk_net;
1da177e4
LT
686 struct unix_sock *u = unix_sk(sk);
687 static u32 ordernum = 1;
688 struct unix_address * addr;
689 int err;
690
57b47a53 691 mutex_lock(&u->readlock);
1da177e4
LT
692
693 err = 0;
694 if (u->addr)
695 goto out;
696
697 err = -ENOMEM;
0da974f4 698 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
1da177e4
LT
699 if (!addr)
700 goto out;
701
1da177e4
LT
702 addr->name->sun_family = AF_UNIX;
703 atomic_set(&addr->refcnt, 1);
704
705retry:
706 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
707 addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0));
708
fbe9cc4a 709 spin_lock(&unix_table_lock);
1da177e4
LT
710 ordernum = (ordernum+1)&0xFFFFF;
711
097e66c5 712 if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
1da177e4 713 addr->hash)) {
fbe9cc4a 714 spin_unlock(&unix_table_lock);
1da177e4
LT
715 /* Sanity yield. It is unusual case, but yet... */
716 if (!(ordernum&0xFF))
717 yield();
718 goto retry;
719 }
720 addr->hash ^= sk->sk_type;
721
722 __unix_remove_socket(sk);
723 u->addr = addr;
724 __unix_insert_socket(&unix_socket_table[addr->hash], sk);
fbe9cc4a 725 spin_unlock(&unix_table_lock);
1da177e4
LT
726 err = 0;
727
57b47a53 728out: mutex_unlock(&u->readlock);
1da177e4
LT
729 return err;
730}
731
097e66c5
DL
732static struct sock *unix_find_other(struct net *net,
733 struct sockaddr_un *sunname, int len,
1da177e4
LT
734 int type, unsigned hash, int *error)
735{
736 struct sock *u;
737 struct nameidata nd;
738 int err = 0;
ac7bfa62 739
1da177e4
LT
740 if (sunname->sun_path[0]) {
741 err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
742 if (err)
743 goto fail;
e4543edd 744 err = vfs_permission(&nd, MAY_WRITE);
1da177e4
LT
745 if (err)
746 goto put_fail;
747
748 err = -ECONNREFUSED;
749 if (!S_ISSOCK(nd.dentry->d_inode->i_mode))
750 goto put_fail;
097e66c5 751 u=unix_find_socket_byinode(net, nd.dentry->d_inode);
1da177e4
LT
752 if (!u)
753 goto put_fail;
754
755 if (u->sk_type == type)
756 touch_atime(nd.mnt, nd.dentry);
757
758 path_release(&nd);
759
760 err=-EPROTOTYPE;
761 if (u->sk_type != type) {
762 sock_put(u);
763 goto fail;
764 }
765 } else {
766 err = -ECONNREFUSED;
097e66c5 767 u=unix_find_socket_byname(net, sunname, len, type, hash);
1da177e4
LT
768 if (u) {
769 struct dentry *dentry;
770 dentry = unix_sk(u)->dentry;
771 if (dentry)
772 touch_atime(unix_sk(u)->mnt, dentry);
773 } else
774 goto fail;
775 }
776 return u;
777
778put_fail:
779 path_release(&nd);
780fail:
781 *error=err;
782 return NULL;
783}
784
785
786static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
787{
788 struct sock *sk = sock->sk;
097e66c5 789 struct net *net = sk->sk_net;
1da177e4
LT
790 struct unix_sock *u = unix_sk(sk);
791 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
792 struct dentry * dentry = NULL;
793 struct nameidata nd;
794 int err;
795 unsigned hash;
796 struct unix_address *addr;
797 struct hlist_head *list;
798
799 err = -EINVAL;
800 if (sunaddr->sun_family != AF_UNIX)
801 goto out;
802
803 if (addr_len==sizeof(short)) {
804 err = unix_autobind(sock);
805 goto out;
806 }
807
808 err = unix_mkname(sunaddr, addr_len, &hash);
809 if (err < 0)
810 goto out;
811 addr_len = err;
812
57b47a53 813 mutex_lock(&u->readlock);
1da177e4
LT
814
815 err = -EINVAL;
816 if (u->addr)
817 goto out_up;
818
819 err = -ENOMEM;
820 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
821 if (!addr)
822 goto out_up;
823
824 memcpy(addr->name, sunaddr, addr_len);
825 addr->len = addr_len;
826 addr->hash = hash ^ sk->sk_type;
827 atomic_set(&addr->refcnt, 1);
828
829 if (sunaddr->sun_path[0]) {
830 unsigned int mode;
831 err = 0;
832 /*
833 * Get the parent directory, calculate the hash for last
834 * component.
835 */
836 err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
837 if (err)
838 goto out_mknod_parent;
f81a0bff
CH
839
840 dentry = lookup_create(&nd, 0);
1da177e4
LT
841 err = PTR_ERR(dentry);
842 if (IS_ERR(dentry))
843 goto out_mknod_unlock;
f81a0bff 844
1da177e4
LT
845 /*
846 * All right, let's create it.
847 */
848 mode = S_IFSOCK |
849 (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
850 err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0);
851 if (err)
852 goto out_mknod_dput;
1b1dcc1b 853 mutex_unlock(&nd.dentry->d_inode->i_mutex);
1da177e4
LT
854 dput(nd.dentry);
855 nd.dentry = dentry;
856
857 addr->hash = UNIX_HASH_SIZE;
858 }
859
fbe9cc4a 860 spin_lock(&unix_table_lock);
1da177e4
LT
861
862 if (!sunaddr->sun_path[0]) {
863 err = -EADDRINUSE;
097e66c5 864 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1da177e4
LT
865 sk->sk_type, hash)) {
866 unix_release_addr(addr);
867 goto out_unlock;
868 }
869
870 list = &unix_socket_table[addr->hash];
871 } else {
872 list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
873 u->dentry = nd.dentry;
874 u->mnt = nd.mnt;
875 }
876
877 err = 0;
878 __unix_remove_socket(sk);
879 u->addr = addr;
880 __unix_insert_socket(list, sk);
881
882out_unlock:
fbe9cc4a 883 spin_unlock(&unix_table_lock);
1da177e4 884out_up:
57b47a53 885 mutex_unlock(&u->readlock);
1da177e4
LT
886out:
887 return err;
888
889out_mknod_dput:
890 dput(dentry);
891out_mknod_unlock:
1b1dcc1b 892 mutex_unlock(&nd.dentry->d_inode->i_mutex);
1da177e4
LT
893 path_release(&nd);
894out_mknod_parent:
895 if (err==-EEXIST)
896 err=-EADDRINUSE;
897 unix_release_addr(addr);
898 goto out_up;
899}
900
278a3de5
DM
901static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
902{
903 if (unlikely(sk1 == sk2) || !sk2) {
904 unix_state_lock(sk1);
905 return;
906 }
907 if (sk1 < sk2) {
908 unix_state_lock(sk1);
909 unix_state_lock_nested(sk2);
910 } else {
911 unix_state_lock(sk2);
912 unix_state_lock_nested(sk1);
913 }
914}
915
916static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
917{
918 if (unlikely(sk1 == sk2) || !sk2) {
919 unix_state_unlock(sk1);
920 return;
921 }
922 unix_state_unlock(sk1);
923 unix_state_unlock(sk2);
924}
925
1da177e4
LT
926static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
927 int alen, int flags)
928{
929 struct sock *sk = sock->sk;
097e66c5 930 struct net *net = sk->sk_net;
1da177e4
LT
931 struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
932 struct sock *other;
933 unsigned hash;
934 int err;
935
936 if (addr->sa_family != AF_UNSPEC) {
937 err = unix_mkname(sunaddr, alen, &hash);
938 if (err < 0)
939 goto out;
940 alen = err;
941
942 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
943 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
944 goto out;
945
278a3de5 946restart:
097e66c5 947 other=unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1da177e4
LT
948 if (!other)
949 goto out;
950
278a3de5
DM
951 unix_state_double_lock(sk, other);
952
953 /* Apparently VFS overslept socket death. Retry. */
954 if (sock_flag(other, SOCK_DEAD)) {
955 unix_state_double_unlock(sk, other);
956 sock_put(other);
957 goto restart;
958 }
1da177e4
LT
959
960 err = -EPERM;
961 if (!unix_may_send(sk, other))
962 goto out_unlock;
963
964 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
965 if (err)
966 goto out_unlock;
967
968 } else {
969 /*
970 * 1003.1g breaking connected state with AF_UNSPEC
971 */
972 other = NULL;
278a3de5 973 unix_state_double_lock(sk, other);
1da177e4
LT
974 }
975
976 /*
977 * If it was connected, reconnect.
978 */
979 if (unix_peer(sk)) {
980 struct sock *old_peer = unix_peer(sk);
981 unix_peer(sk)=other;
278a3de5 982 unix_state_double_unlock(sk, other);
1da177e4
LT
983
984 if (other != old_peer)
985 unix_dgram_disconnected(sk, old_peer);
986 sock_put(old_peer);
987 } else {
988 unix_peer(sk)=other;
278a3de5 989 unix_state_double_unlock(sk, other);
1da177e4 990 }
ac7bfa62 991 return 0;
1da177e4
LT
992
993out_unlock:
278a3de5 994 unix_state_double_unlock(sk, other);
1da177e4
LT
995 sock_put(other);
996out:
997 return err;
998}
999
1000static long unix_wait_for_peer(struct sock *other, long timeo)
1001{
1002 struct unix_sock *u = unix_sk(other);
1003 int sched;
1004 DEFINE_WAIT(wait);
1005
1006 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1007
1008 sched = !sock_flag(other, SOCK_DEAD) &&
1009 !(other->sk_shutdown & RCV_SHUTDOWN) &&
64a14651 1010 (skb_queue_len(&other->sk_receive_queue) >
1da177e4
LT
1011 other->sk_max_ack_backlog);
1012
1c92b4e5 1013 unix_state_unlock(other);
1da177e4
LT
1014
1015 if (sched)
1016 timeo = schedule_timeout(timeo);
1017
1018 finish_wait(&u->peer_wait, &wait);
1019 return timeo;
1020}
1021
1022static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1023 int addr_len, int flags)
1024{
1025 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1026 struct sock *sk = sock->sk;
097e66c5 1027 struct net *net = sk->sk_net;
1da177e4
LT
1028 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1029 struct sock *newsk = NULL;
1030 struct sock *other = NULL;
1031 struct sk_buff *skb = NULL;
1032 unsigned hash;
1033 int st;
1034 int err;
1035 long timeo;
1036
1037 err = unix_mkname(sunaddr, addr_len, &hash);
1038 if (err < 0)
1039 goto out;
1040 addr_len = err;
1041
1042 if (test_bit(SOCK_PASSCRED, &sock->flags)
1043 && !u->addr && (err = unix_autobind(sock)) != 0)
1044 goto out;
1045
1046 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1047
1048 /* First of all allocate resources.
1049 If we will make it after state is locked,
1050 we will have to recheck all again in any case.
1051 */
1052
1053 err = -ENOMEM;
1054
1055 /* create new sock for complete connection */
1b8d7ae4 1056 newsk = unix_create1(sk->sk_net, NULL);
1da177e4
LT
1057 if (newsk == NULL)
1058 goto out;
1059
1060 /* Allocate skb for sending to listening sock */
1061 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1062 if (skb == NULL)
1063 goto out;
1064
1065restart:
1066 /* Find listening sock. */
097e66c5 1067 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1da177e4
LT
1068 if (!other)
1069 goto out;
1070
1071 /* Latch state of peer */
1c92b4e5 1072 unix_state_lock(other);
1da177e4
LT
1073
1074 /* Apparently VFS overslept socket death. Retry. */
1075 if (sock_flag(other, SOCK_DEAD)) {
1c92b4e5 1076 unix_state_unlock(other);
1da177e4
LT
1077 sock_put(other);
1078 goto restart;
1079 }
1080
1081 err = -ECONNREFUSED;
1082 if (other->sk_state != TCP_LISTEN)
1083 goto out_unlock;
1084
64a14651 1085 if (skb_queue_len(&other->sk_receive_queue) >
1da177e4
LT
1086 other->sk_max_ack_backlog) {
1087 err = -EAGAIN;
1088 if (!timeo)
1089 goto out_unlock;
1090
1091 timeo = unix_wait_for_peer(other, timeo);
1092
1093 err = sock_intr_errno(timeo);
1094 if (signal_pending(current))
1095 goto out;
1096 sock_put(other);
1097 goto restart;
ac7bfa62 1098 }
1da177e4
LT
1099
1100 /* Latch our state.
1101
1102 It is tricky place. We need to grab write lock and cannot
1103 drop lock on peer. It is dangerous because deadlock is
1104 possible. Connect to self case and simultaneous
1105 attempt to connect are eliminated by checking socket
1106 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1107 check this before attempt to grab lock.
1108
1109 Well, and we have to recheck the state after socket locked.
1110 */
1111 st = sk->sk_state;
1112
1113 switch (st) {
1114 case TCP_CLOSE:
1115 /* This is ok... continue with connect */
1116 break;
1117 case TCP_ESTABLISHED:
1118 /* Socket is already connected */
1119 err = -EISCONN;
1120 goto out_unlock;
1121 default:
1122 err = -EINVAL;
1123 goto out_unlock;
1124 }
1125
1c92b4e5 1126 unix_state_lock_nested(sk);
1da177e4
LT
1127
1128 if (sk->sk_state != st) {
1c92b4e5
DM
1129 unix_state_unlock(sk);
1130 unix_state_unlock(other);
1da177e4
LT
1131 sock_put(other);
1132 goto restart;
1133 }
1134
1135 err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1136 if (err) {
1c92b4e5 1137 unix_state_unlock(sk);
1da177e4
LT
1138 goto out_unlock;
1139 }
1140
1141 /* The way is open! Fastly set all the necessary fields... */
1142
1143 sock_hold(sk);
1144 unix_peer(newsk) = sk;
1145 newsk->sk_state = TCP_ESTABLISHED;
1146 newsk->sk_type = sk->sk_type;
b488893a 1147 newsk->sk_peercred.pid = task_tgid_vnr(current);
1da177e4
LT
1148 newsk->sk_peercred.uid = current->euid;
1149 newsk->sk_peercred.gid = current->egid;
1150 newu = unix_sk(newsk);
1151 newsk->sk_sleep = &newu->peer_wait;
1152 otheru = unix_sk(other);
1153
1154 /* copy address information from listening to new sock*/
1155 if (otheru->addr) {
1156 atomic_inc(&otheru->addr->refcnt);
1157 newu->addr = otheru->addr;
1158 }
1159 if (otheru->dentry) {
1160 newu->dentry = dget(otheru->dentry);
1161 newu->mnt = mntget(otheru->mnt);
1162 }
1163
1164 /* Set credentials */
1165 sk->sk_peercred = other->sk_peercred;
1166
1da177e4
LT
1167 sock->state = SS_CONNECTED;
1168 sk->sk_state = TCP_ESTABLISHED;
830a1e5c
BL
1169 sock_hold(newsk);
1170
1171 smp_mb__after_atomic_inc(); /* sock_hold() does an atomic_inc() */
1172 unix_peer(sk) = newsk;
1da177e4 1173
1c92b4e5 1174 unix_state_unlock(sk);
1da177e4
LT
1175
1176 /* take ten and and send info to listening sock */
1177 spin_lock(&other->sk_receive_queue.lock);
1178 __skb_queue_tail(&other->sk_receive_queue, skb);
1da177e4 1179 spin_unlock(&other->sk_receive_queue.lock);
1c92b4e5 1180 unix_state_unlock(other);
1da177e4
LT
1181 other->sk_data_ready(other, 0);
1182 sock_put(other);
1183 return 0;
1184
1185out_unlock:
1186 if (other)
1c92b4e5 1187 unix_state_unlock(other);
1da177e4
LT
1188
1189out:
1190 if (skb)
1191 kfree_skb(skb);
1192 if (newsk)
1193 unix_release_sock(newsk, 0);
1194 if (other)
1195 sock_put(other);
1196 return err;
1197}
1198
1199static int unix_socketpair(struct socket *socka, struct socket *sockb)
1200{
1201 struct sock *ska=socka->sk, *skb = sockb->sk;
1202
1203 /* Join our sockets back to back */
1204 sock_hold(ska);
1205 sock_hold(skb);
1206 unix_peer(ska)=skb;
1207 unix_peer(skb)=ska;
b488893a 1208 ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
1da177e4
LT
1209 ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
1210 ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
1211
1212 if (ska->sk_type != SOCK_DGRAM) {
1213 ska->sk_state = TCP_ESTABLISHED;
1214 skb->sk_state = TCP_ESTABLISHED;
1215 socka->state = SS_CONNECTED;
1216 sockb->state = SS_CONNECTED;
1217 }
1218 return 0;
1219}
1220
1221static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1222{
1223 struct sock *sk = sock->sk;
1224 struct sock *tsk;
1225 struct sk_buff *skb;
1226 int err;
1227
1228 err = -EOPNOTSUPP;
1229 if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
1230 goto out;
1231
1232 err = -EINVAL;
1233 if (sk->sk_state != TCP_LISTEN)
1234 goto out;
1235
1236 /* If socket state is TCP_LISTEN it cannot change (for now...),
1237 * so that no locks are necessary.
1238 */
1239
1240 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1241 if (!skb) {
1242 /* This means receive shutdown. */
1243 if (err == 0)
1244 err = -EINVAL;
1245 goto out;
1246 }
1247
1248 tsk = skb->sk;
1249 skb_free_datagram(sk, skb);
1250 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1251
1252 /* attach accepted sock to socket */
1c92b4e5 1253 unix_state_lock(tsk);
1da177e4
LT
1254 newsock->state = SS_CONNECTED;
1255 sock_graft(tsk, newsock);
1c92b4e5 1256 unix_state_unlock(tsk);
1da177e4
LT
1257 return 0;
1258
1259out:
1260 return err;
1261}
1262
1263
1264static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1265{
1266 struct sock *sk = sock->sk;
1267 struct unix_sock *u;
1268 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1269 int err = 0;
1270
1271 if (peer) {
1272 sk = unix_peer_get(sk);
1273
1274 err = -ENOTCONN;
1275 if (!sk)
1276 goto out;
1277 err = 0;
1278 } else {
1279 sock_hold(sk);
1280 }
1281
1282 u = unix_sk(sk);
1c92b4e5 1283 unix_state_lock(sk);
1da177e4
LT
1284 if (!u->addr) {
1285 sunaddr->sun_family = AF_UNIX;
1286 sunaddr->sun_path[0] = 0;
1287 *uaddr_len = sizeof(short);
1288 } else {
1289 struct unix_address *addr = u->addr;
1290
1291 *uaddr_len = addr->len;
1292 memcpy(sunaddr, addr->name, *uaddr_len);
1293 }
1c92b4e5 1294 unix_state_unlock(sk);
1da177e4
LT
1295 sock_put(sk);
1296out:
1297 return err;
1298}
1299
1300static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1301{
1302 int i;
1303
1304 scm->fp = UNIXCB(skb).fp;
1305 skb->destructor = sock_wfree;
1306 UNIXCB(skb).fp = NULL;
1307
1308 for (i=scm->fp->count-1; i>=0; i--)
1309 unix_notinflight(scm->fp->fp[i]);
1310}
1311
1312static void unix_destruct_fds(struct sk_buff *skb)
1313{
1314 struct scm_cookie scm;
1315 memset(&scm, 0, sizeof(scm));
1316 unix_detach_fds(&scm, skb);
1317
1318 /* Alas, it calls VFS */
1319 /* So fscking what? fput() had been SMP-safe since the last Summer */
1320 scm_destroy(&scm);
1321 sock_wfree(skb);
1322}
1323
1324static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1325{
1326 int i;
1327 for (i=scm->fp->count-1; i>=0; i--)
1328 unix_inflight(scm->fp->fp[i]);
1329 UNIXCB(skb).fp = scm->fp;
1330 skb->destructor = unix_destruct_fds;
1331 scm->fp = NULL;
1332}
1333
1334/*
1335 * Send AF_UNIX data.
1336 */
1337
1338static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1339 struct msghdr *msg, size_t len)
1340{
1341 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1342 struct sock *sk = sock->sk;
097e66c5 1343 struct net *net = sk->sk_net;
1da177e4
LT
1344 struct unix_sock *u = unix_sk(sk);
1345 struct sockaddr_un *sunaddr=msg->msg_name;
1346 struct sock *other = NULL;
1347 int namelen = 0; /* fake GCC */
1348 int err;
1349 unsigned hash;
1350 struct sk_buff *skb;
1351 long timeo;
1352 struct scm_cookie tmp_scm;
1353
1354 if (NULL == siocb->scm)
1355 siocb->scm = &tmp_scm;
1356 err = scm_send(sock, msg, siocb->scm);
1357 if (err < 0)
1358 return err;
1359
1360 err = -EOPNOTSUPP;
1361 if (msg->msg_flags&MSG_OOB)
1362 goto out;
1363
1364 if (msg->msg_namelen) {
1365 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1366 if (err < 0)
1367 goto out;
1368 namelen = err;
1369 } else {
1370 sunaddr = NULL;
1371 err = -ENOTCONN;
1372 other = unix_peer_get(sk);
1373 if (!other)
1374 goto out;
1375 }
1376
1377 if (test_bit(SOCK_PASSCRED, &sock->flags)
1378 && !u->addr && (err = unix_autobind(sock)) != 0)
1379 goto out;
1380
1381 err = -EMSGSIZE;
1382 if (len > sk->sk_sndbuf - 32)
1383 goto out;
1384
1385 skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1386 if (skb==NULL)
1387 goto out;
1388
1389 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1390 if (siocb->scm->fp)
1391 unix_attach_fds(siocb->scm, skb);
dc49c1f9 1392 unix_get_secdata(siocb->scm, skb);
877ce7c1 1393
badff6d0 1394 skb_reset_transport_header(skb);
1da177e4
LT
1395 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
1396 if (err)
1397 goto out_free;
1398
1399 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1400
1401restart:
1402 if (!other) {
1403 err = -ECONNRESET;
1404 if (sunaddr == NULL)
1405 goto out_free;
1406
097e66c5 1407 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1da177e4
LT
1408 hash, &err);
1409 if (other==NULL)
1410 goto out_free;
1411 }
1412
1c92b4e5 1413 unix_state_lock(other);
1da177e4
LT
1414 err = -EPERM;
1415 if (!unix_may_send(sk, other))
1416 goto out_unlock;
1417
1418 if (sock_flag(other, SOCK_DEAD)) {
1419 /*
1420 * Check with 1003.1g - what should
1421 * datagram error
1422 */
1c92b4e5 1423 unix_state_unlock(other);
1da177e4
LT
1424 sock_put(other);
1425
1426 err = 0;
1c92b4e5 1427 unix_state_lock(sk);
1da177e4
LT
1428 if (unix_peer(sk) == other) {
1429 unix_peer(sk)=NULL;
1c92b4e5 1430 unix_state_unlock(sk);
1da177e4
LT
1431
1432 unix_dgram_disconnected(sk, other);
1433 sock_put(other);
1434 err = -ECONNREFUSED;
1435 } else {
1c92b4e5 1436 unix_state_unlock(sk);
1da177e4
LT
1437 }
1438
1439 other = NULL;
1440 if (err)
1441 goto out_free;
1442 goto restart;
1443 }
1444
1445 err = -EPIPE;
1446 if (other->sk_shutdown & RCV_SHUTDOWN)
1447 goto out_unlock;
1448
1449 if (sk->sk_type != SOCK_SEQPACKET) {
1450 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1451 if (err)
1452 goto out_unlock;
1453 }
1454
1455 if (unix_peer(other) != sk &&
64a14651 1456 (skb_queue_len(&other->sk_receive_queue) >
1da177e4
LT
1457 other->sk_max_ack_backlog)) {
1458 if (!timeo) {
1459 err = -EAGAIN;
1460 goto out_unlock;
1461 }
1462
1463 timeo = unix_wait_for_peer(other, timeo);
1464
1465 err = sock_intr_errno(timeo);
1466 if (signal_pending(current))
1467 goto out_free;
1468
1469 goto restart;
1470 }
1471
1472 skb_queue_tail(&other->sk_receive_queue, skb);
1c92b4e5 1473 unix_state_unlock(other);
1da177e4
LT
1474 other->sk_data_ready(other, len);
1475 sock_put(other);
1476 scm_destroy(siocb->scm);
1477 return len;
1478
1479out_unlock:
1c92b4e5 1480 unix_state_unlock(other);
1da177e4
LT
1481out_free:
1482 kfree_skb(skb);
1483out:
1484 if (other)
1485 sock_put(other);
1486 scm_destroy(siocb->scm);
1487 return err;
1488}
1489
ac7bfa62 1490
1da177e4
LT
1491static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1492 struct msghdr *msg, size_t len)
1493{
1494 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1495 struct sock *sk = sock->sk;
1496 struct sock *other = NULL;
1497 struct sockaddr_un *sunaddr=msg->msg_name;
1498 int err,size;
1499 struct sk_buff *skb;
1500 int sent=0;
1501 struct scm_cookie tmp_scm;
1502
1503 if (NULL == siocb->scm)
1504 siocb->scm = &tmp_scm;
1505 err = scm_send(sock, msg, siocb->scm);
1506 if (err < 0)
1507 return err;
1508
1509 err = -EOPNOTSUPP;
1510 if (msg->msg_flags&MSG_OOB)
1511 goto out_err;
1512
1513 if (msg->msg_namelen) {
1514 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1515 goto out_err;
1516 } else {
1517 sunaddr = NULL;
1518 err = -ENOTCONN;
830a1e5c 1519 other = unix_peer(sk);
1da177e4
LT
1520 if (!other)
1521 goto out_err;
1522 }
1523
1524 if (sk->sk_shutdown & SEND_SHUTDOWN)
1525 goto pipe_err;
1526
1527 while(sent < len)
1528 {
1529 /*
e9df7d7f
BL
1530 * Optimisation for the fact that under 0.01% of X
1531 * messages typically need breaking up.
1da177e4
LT
1532 */
1533
e9df7d7f 1534 size = len-sent;
1da177e4
LT
1535
1536 /* Keep two messages in the pipe so it schedules better */
e9df7d7f
BL
1537 if (size > ((sk->sk_sndbuf >> 1) - 64))
1538 size = (sk->sk_sndbuf >> 1) - 64;
1da177e4
LT
1539
1540 if (size > SKB_MAX_ALLOC)
1541 size = SKB_MAX_ALLOC;
ac7bfa62 1542
1da177e4
LT
1543 /*
1544 * Grab a buffer
1545 */
ac7bfa62 1546
1da177e4
LT
1547 skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
1548
1549 if (skb==NULL)
1550 goto out_err;
1551
1552 /*
1553 * If you pass two values to the sock_alloc_send_skb
1554 * it tries to grab the large buffer with GFP_NOFS
1555 * (which can fail easily), and if it fails grab the
1556 * fallback size buffer which is under a page and will
1557 * succeed. [Alan]
1558 */
1559 size = min_t(int, size, skb_tailroom(skb));
1560
1561 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1562 if (siocb->scm->fp)
1563 unix_attach_fds(siocb->scm, skb);
1564
1565 if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
1566 kfree_skb(skb);
1567 goto out_err;
1568 }
1569
1c92b4e5 1570 unix_state_lock(other);
1da177e4
LT
1571
1572 if (sock_flag(other, SOCK_DEAD) ||
1573 (other->sk_shutdown & RCV_SHUTDOWN))
1574 goto pipe_err_free;
1575
1576 skb_queue_tail(&other->sk_receive_queue, skb);
1c92b4e5 1577 unix_state_unlock(other);
1da177e4
LT
1578 other->sk_data_ready(other, size);
1579 sent+=size;
1580 }
1da177e4
LT
1581
1582 scm_destroy(siocb->scm);
1583 siocb->scm = NULL;
1584
1585 return sent;
1586
1587pipe_err_free:
1c92b4e5 1588 unix_state_unlock(other);
1da177e4
LT
1589 kfree_skb(skb);
1590pipe_err:
1591 if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
1592 send_sig(SIGPIPE,current,0);
1593 err = -EPIPE;
1594out_err:
1da177e4
LT
1595 scm_destroy(siocb->scm);
1596 siocb->scm = NULL;
1597 return sent ? : err;
1598}
1599
1600static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1601 struct msghdr *msg, size_t len)
1602{
1603 int err;
1604 struct sock *sk = sock->sk;
ac7bfa62 1605
1da177e4
LT
1606 err = sock_error(sk);
1607 if (err)
1608 return err;
1609
1610 if (sk->sk_state != TCP_ESTABLISHED)
1611 return -ENOTCONN;
1612
1613 if (msg->msg_namelen)
1614 msg->msg_namelen = 0;
1615
1616 return unix_dgram_sendmsg(kiocb, sock, msg, len);
1617}
ac7bfa62 1618
1da177e4
LT
1619static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1620{
1621 struct unix_sock *u = unix_sk(sk);
1622
1623 msg->msg_namelen = 0;
1624 if (u->addr) {
1625 msg->msg_namelen = u->addr->len;
1626 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1627 }
1628}
1629
1630static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1631 struct msghdr *msg, size_t size,
1632 int flags)
1633{
1634 struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1635 struct scm_cookie tmp_scm;
1636 struct sock *sk = sock->sk;
1637 struct unix_sock *u = unix_sk(sk);
1638 int noblock = flags & MSG_DONTWAIT;
1639 struct sk_buff *skb;
1640 int err;
1641
1642 err = -EOPNOTSUPP;
1643 if (flags&MSG_OOB)
1644 goto out;
1645
1646 msg->msg_namelen = 0;
1647
57b47a53 1648 mutex_lock(&u->readlock);
1da177e4
LT
1649
1650 skb = skb_recv_datagram(sk, flags, noblock, &err);
0a112258
FZ
1651 if (!skb) {
1652 unix_state_lock(sk);
1653 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1654 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1655 (sk->sk_shutdown & RCV_SHUTDOWN))
1656 err = 0;
1657 unix_state_unlock(sk);
1da177e4 1658 goto out_unlock;
0a112258 1659 }
1da177e4 1660
71e20f18 1661 wake_up_interruptible_sync(&u->peer_wait);
1da177e4
LT
1662
1663 if (msg->msg_name)
1664 unix_copy_addr(msg, skb->sk);
1665
1666 if (size > skb->len)
1667 size = skb->len;
1668 else if (size < skb->len)
1669 msg->msg_flags |= MSG_TRUNC;
1670
1671 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1672 if (err)
1673 goto out_free;
1674
1675 if (!siocb->scm) {
1676 siocb->scm = &tmp_scm;
1677 memset(&tmp_scm, 0, sizeof(tmp_scm));
1678 }
1679 siocb->scm->creds = *UNIXCREDS(skb);
877ce7c1 1680 unix_set_secdata(siocb->scm, skb);
1da177e4
LT
1681
1682 if (!(flags & MSG_PEEK))
1683 {
1684 if (UNIXCB(skb).fp)
1685 unix_detach_fds(siocb->scm, skb);
1686 }
ac7bfa62 1687 else
1da177e4
LT
1688 {
1689 /* It is questionable: on PEEK we could:
1690 - do not return fds - good, but too simple 8)
1691 - return fds, and do not return them on read (old strategy,
1692 apparently wrong)
1693 - clone fds (I chose it for now, it is the most universal
1694 solution)
ac7bfa62
YH
1695
1696 POSIX 1003.1g does not actually define this clearly
1697 at all. POSIX 1003.1g doesn't define a lot of things
1698 clearly however!
1699
1da177e4
LT
1700 */
1701 if (UNIXCB(skb).fp)
1702 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1703 }
1704 err = size;
1705
1706 scm_recv(sock, msg, siocb->scm, flags);
1707
1708out_free:
1709 skb_free_datagram(sk,skb);
1710out_unlock:
57b47a53 1711 mutex_unlock(&u->readlock);
1da177e4
LT
1712out:
1713 return err;
1714}
1715
1716/*
1717 * Sleep until data has arrive. But check for races..
1718 */
ac7bfa62 1719
1da177e4
LT
1720static long unix_stream_data_wait(struct sock * sk, long timeo)
1721{
1722 DEFINE_WAIT(wait);
1723
1c92b4e5 1724 unix_state_lock(sk);
1da177e4
LT
1725
1726 for (;;) {
1727 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1728
b03efcfb 1729 if (!skb_queue_empty(&sk->sk_receive_queue) ||
1da177e4
LT
1730 sk->sk_err ||
1731 (sk->sk_shutdown & RCV_SHUTDOWN) ||
1732 signal_pending(current) ||
1733 !timeo)
1734 break;
1735
1736 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1c92b4e5 1737 unix_state_unlock(sk);
1da177e4 1738 timeo = schedule_timeout(timeo);
1c92b4e5 1739 unix_state_lock(sk);
1da177e4
LT
1740 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1741 }
1742
1743 finish_wait(sk->sk_sleep, &wait);
1c92b4e5 1744 unix_state_unlock(sk);
1da177e4
LT
1745 return timeo;
1746}
1747
1748
1749
1750static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1751 struct msghdr *msg, size_t size,
1752 int flags)
1753{
1754 struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1755 struct scm_cookie tmp_scm;
1756 struct sock *sk = sock->sk;
1757 struct unix_sock *u = unix_sk(sk);
1758 struct sockaddr_un *sunaddr=msg->msg_name;
1759 int copied = 0;
1760 int check_creds = 0;
1761 int target;
1762 int err = 0;
1763 long timeo;
1764
1765 err = -EINVAL;
1766 if (sk->sk_state != TCP_ESTABLISHED)
1767 goto out;
1768
1769 err = -EOPNOTSUPP;
1770 if (flags&MSG_OOB)
1771 goto out;
1772
1773 target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1774 timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1775
1776 msg->msg_namelen = 0;
1777
1778 /* Lock the socket to prevent queue disordering
1779 * while sleeps in memcpy_tomsg
1780 */
1781
1782 if (!siocb->scm) {
1783 siocb->scm = &tmp_scm;
1784 memset(&tmp_scm, 0, sizeof(tmp_scm));
1785 }
1786
57b47a53 1787 mutex_lock(&u->readlock);
1da177e4
LT
1788
1789 do
1790 {
1791 int chunk;
1792 struct sk_buff *skb;
1793
3c0d2f37 1794 unix_state_lock(sk);
1da177e4
LT
1795 skb = skb_dequeue(&sk->sk_receive_queue);
1796 if (skb==NULL)
1797 {
1798 if (copied >= target)
3c0d2f37 1799 goto unlock;
1da177e4
LT
1800
1801 /*
1802 * POSIX 1003.1g mandates this order.
1803 */
ac7bfa62 1804
1da177e4 1805 if ((err = sock_error(sk)) != 0)
3c0d2f37 1806 goto unlock;
1da177e4 1807 if (sk->sk_shutdown & RCV_SHUTDOWN)
3c0d2f37
MS
1808 goto unlock;
1809
1810 unix_state_unlock(sk);
1da177e4
LT
1811 err = -EAGAIN;
1812 if (!timeo)
1813 break;
57b47a53 1814 mutex_unlock(&u->readlock);
1da177e4
LT
1815
1816 timeo = unix_stream_data_wait(sk, timeo);
1817
1818 if (signal_pending(current)) {
1819 err = sock_intr_errno(timeo);
1820 goto out;
1821 }
57b47a53 1822 mutex_lock(&u->readlock);
1da177e4 1823 continue;
3c0d2f37
MS
1824 unlock:
1825 unix_state_unlock(sk);
1826 break;
1da177e4 1827 }
3c0d2f37 1828 unix_state_unlock(sk);
1da177e4
LT
1829
1830 if (check_creds) {
1831 /* Never glue messages from different writers */
1832 if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, sizeof(siocb->scm->creds)) != 0) {
1833 skb_queue_head(&sk->sk_receive_queue, skb);
1834 break;
1835 }
1836 } else {
1837 /* Copy credentials */
1838 siocb->scm->creds = *UNIXCREDS(skb);
1839 check_creds = 1;
1840 }
1841
1842 /* Copy address just once */
1843 if (sunaddr)
1844 {
1845 unix_copy_addr(msg, skb->sk);
1846 sunaddr = NULL;
1847 }
1848
1849 chunk = min_t(unsigned int, skb->len, size);
1850 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1851 skb_queue_head(&sk->sk_receive_queue, skb);
1852 if (copied == 0)
1853 copied = -EFAULT;
1854 break;
1855 }
1856 copied += chunk;
1857 size -= chunk;
1858
1859 /* Mark read part of skb as used */
1860 if (!(flags & MSG_PEEK))
1861 {
1862 skb_pull(skb, chunk);
1863
1864 if (UNIXCB(skb).fp)
1865 unix_detach_fds(siocb->scm, skb);
1866
1867 /* put the skb back if we didn't use it up.. */
1868 if (skb->len)
1869 {
1870 skb_queue_head(&sk->sk_receive_queue, skb);
1871 break;
1872 }
1873
1874 kfree_skb(skb);
1875
1876 if (siocb->scm->fp)
1877 break;
1878 }
1879 else
1880 {
1881 /* It is questionable, see note in unix_dgram_recvmsg.
1882 */
1883 if (UNIXCB(skb).fp)
1884 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1885
1886 /* put message back and return */
1887 skb_queue_head(&sk->sk_receive_queue, skb);
1888 break;
1889 }
1890 } while (size);
1891
57b47a53 1892 mutex_unlock(&u->readlock);
1da177e4
LT
1893 scm_recv(sock, msg, siocb->scm, flags);
1894out:
1895 return copied ? : err;
1896}
1897
1898static int unix_shutdown(struct socket *sock, int mode)
1899{
1900 struct sock *sk = sock->sk;
1901 struct sock *other;
1902
1903 mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1904
1905 if (mode) {
1c92b4e5 1906 unix_state_lock(sk);
1da177e4
LT
1907 sk->sk_shutdown |= mode;
1908 other=unix_peer(sk);
1909 if (other)
1910 sock_hold(other);
1c92b4e5 1911 unix_state_unlock(sk);
1da177e4
LT
1912 sk->sk_state_change(sk);
1913
1914 if (other &&
1915 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1916
1917 int peer_mode = 0;
1918
1919 if (mode&RCV_SHUTDOWN)
1920 peer_mode |= SEND_SHUTDOWN;
1921 if (mode&SEND_SHUTDOWN)
1922 peer_mode |= RCV_SHUTDOWN;
1c92b4e5 1923 unix_state_lock(other);
1da177e4 1924 other->sk_shutdown |= peer_mode;
1c92b4e5 1925 unix_state_unlock(other);
1da177e4
LT
1926 other->sk_state_change(other);
1927 read_lock(&other->sk_callback_lock);
1928 if (peer_mode == SHUTDOWN_MASK)
1929 sk_wake_async(other,1,POLL_HUP);
1930 else if (peer_mode & RCV_SHUTDOWN)
1931 sk_wake_async(other,1,POLL_IN);
1932 read_unlock(&other->sk_callback_lock);
1933 }
1934 if (other)
1935 sock_put(other);
1936 }
1937 return 0;
1938}
1939
1940static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1941{
1942 struct sock *sk = sock->sk;
1943 long amount=0;
1944 int err;
1945
1946 switch(cmd)
1947 {
1948 case SIOCOUTQ:
1949 amount = atomic_read(&sk->sk_wmem_alloc);
1950 err = put_user(amount, (int __user *)arg);
1951 break;
1952 case SIOCINQ:
1953 {
1954 struct sk_buff *skb;
1955
1956 if (sk->sk_state == TCP_LISTEN) {
1957 err = -EINVAL;
1958 break;
1959 }
1960
1961 spin_lock(&sk->sk_receive_queue.lock);
1962 if (sk->sk_type == SOCK_STREAM ||
1963 sk->sk_type == SOCK_SEQPACKET) {
1964 skb_queue_walk(&sk->sk_receive_queue, skb)
1965 amount += skb->len;
1966 } else {
1967 skb = skb_peek(&sk->sk_receive_queue);
1968 if (skb)
1969 amount=skb->len;
1970 }
1971 spin_unlock(&sk->sk_receive_queue.lock);
1972 err = put_user(amount, (int __user *)arg);
1973 break;
1974 }
1975
1976 default:
b5e5fa5e 1977 err = -ENOIOCTLCMD;
1da177e4
LT
1978 break;
1979 }
1980 return err;
1981}
1982
1983static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait)
1984{
1985 struct sock *sk = sock->sk;
1986 unsigned int mask;
1987
1988 poll_wait(file, sk->sk_sleep, wait);
1989 mask = 0;
1990
1991 /* exceptional events? */
1992 if (sk->sk_err)
1993 mask |= POLLERR;
1994 if (sk->sk_shutdown == SHUTDOWN_MASK)
1995 mask |= POLLHUP;
f348d70a
DL
1996 if (sk->sk_shutdown & RCV_SHUTDOWN)
1997 mask |= POLLRDHUP;
1da177e4
LT
1998
1999 /* readable? */
2000 if (!skb_queue_empty(&sk->sk_receive_queue) ||
2001 (sk->sk_shutdown & RCV_SHUTDOWN))
2002 mask |= POLLIN | POLLRDNORM;
2003
2004 /* Connection-based need to check for termination and startup */
2005 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && sk->sk_state == TCP_CLOSE)
2006 mask |= POLLHUP;
2007
2008 /*
2009 * we set writable also when the other side has shut down the
2010 * connection. This prevents stuck sockets.
2011 */
2012 if (unix_writable(sk))
2013 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2014
2015 return mask;
2016}
2017
2018
2019#ifdef CONFIG_PROC_FS
097e66c5 2020struct unix_iter_state {
e372c414 2021 struct seq_net_private p;
097e66c5
DL
2022 int i;
2023};
2024static struct sock *unix_seq_idx(struct unix_iter_state *iter, loff_t pos)
1da177e4
LT
2025{
2026 loff_t off = 0;
2027 struct sock *s;
2028
097e66c5 2029 for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
e372c414 2030 if (s->sk_net != iter->p.net)
097e66c5 2031 continue;
ac7bfa62 2032 if (off == pos)
1da177e4
LT
2033 return s;
2034 ++off;
2035 }
2036 return NULL;
2037}
2038
2039
2040static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2041{
097e66c5 2042 struct unix_iter_state *iter = seq->private;
fbe9cc4a 2043 spin_lock(&unix_table_lock);
097e66c5 2044 return *pos ? unix_seq_idx(iter, *pos - 1) : ((void *) 1);
1da177e4
LT
2045}
2046
2047static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2048{
097e66c5
DL
2049 struct unix_iter_state *iter = seq->private;
2050 struct sock *sk = v;
1da177e4
LT
2051 ++*pos;
2052
ac7bfa62 2053 if (v == (void *)1)
097e66c5
DL
2054 sk = first_unix_socket(&iter->i);
2055 else
2056 sk = next_unix_socket(&iter->i, sk);
e372c414 2057 while (sk && (sk->sk_net != iter->p.net))
097e66c5
DL
2058 sk = next_unix_socket(&iter->i, sk);
2059 return sk;
1da177e4
LT
2060}
2061
2062static void unix_seq_stop(struct seq_file *seq, void *v)
2063{
fbe9cc4a 2064 spin_unlock(&unix_table_lock);
1da177e4
LT
2065}
2066
2067static int unix_seq_show(struct seq_file *seq, void *v)
2068{
ac7bfa62 2069
1da177e4
LT
2070 if (v == (void *)1)
2071 seq_puts(seq, "Num RefCount Protocol Flags Type St "
2072 "Inode Path\n");
2073 else {
2074 struct sock *s = v;
2075 struct unix_sock *u = unix_sk(s);
1c92b4e5 2076 unix_state_lock(s);
1da177e4
LT
2077
2078 seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
2079 s,
2080 atomic_read(&s->sk_refcnt),
2081 0,
2082 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2083 s->sk_type,
2084 s->sk_socket ?
2085 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2086 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2087 sock_i_ino(s));
2088
2089 if (u->addr) {
2090 int i, len;
2091 seq_putc(seq, ' ');
2092
2093 i = 0;
2094 len = u->addr->len - sizeof(short);
2095 if (!UNIX_ABSTRACT(s))
2096 len--;
2097 else {
2098 seq_putc(seq, '@');
2099 i++;
2100 }
2101 for ( ; i < len; i++)
2102 seq_putc(seq, u->addr->name->sun_path[i]);
2103 }
1c92b4e5 2104 unix_state_unlock(s);
1da177e4
LT
2105 seq_putc(seq, '\n');
2106 }
2107
2108 return 0;
2109}
2110
56b3d975 2111static const struct seq_operations unix_seq_ops = {
1da177e4
LT
2112 .start = unix_seq_start,
2113 .next = unix_seq_next,
2114 .stop = unix_seq_stop,
2115 .show = unix_seq_show,
2116};
2117
2118
2119static int unix_seq_open(struct inode *inode, struct file *file)
2120{
e372c414
DL
2121 return seq_open_net(inode, file, &unix_seq_ops,
2122 sizeof(struct unix_iter_state));
1da177e4
LT
2123}
2124
da7071d7 2125static const struct file_operations unix_seq_fops = {
1da177e4
LT
2126 .owner = THIS_MODULE,
2127 .open = unix_seq_open,
2128 .read = seq_read,
2129 .llseek = seq_lseek,
e372c414 2130 .release = seq_release_net,
1da177e4
LT
2131};
2132
2133#endif
2134
2135static struct net_proto_family unix_family_ops = {
2136 .family = PF_UNIX,
2137 .create = unix_create,
2138 .owner = THIS_MODULE,
2139};
2140
097e66c5
DL
2141
2142static int unix_net_init(struct net *net)
2143{
2144 int error = -ENOMEM;
2145
2146#ifdef CONFIG_PROC_FS
2147 if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops))
2148 goto out;
2149#endif
2150 error = 0;
2151out:
2152 return 0;
2153}
2154
2155static void unix_net_exit(struct net *net)
2156{
2157 proc_net_remove(net, "unix");
2158}
2159
2160static struct pernet_operations unix_net_ops = {
2161 .init = unix_net_init,
2162 .exit = unix_net_exit,
2163};
2164
1da177e4
LT
2165static int __init af_unix_init(void)
2166{
2167 int rc = -1;
2168 struct sk_buff *dummy_skb;
2169
ef047f5e 2170 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
1da177e4
LT
2171
2172 rc = proto_register(&unix_proto, 1);
ac7bfa62
YH
2173 if (rc != 0) {
2174 printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
1da177e4
LT
2175 __FUNCTION__);
2176 goto out;
2177 }
2178
2179 sock_register(&unix_family_ops);
097e66c5 2180 register_pernet_subsys(&unix_net_ops);
1da177e4
LT
2181 unix_sysctl_register();
2182out:
2183 return rc;
2184}
2185
2186static void __exit af_unix_exit(void)
2187{
2188 sock_unregister(PF_UNIX);
2189 unix_sysctl_unregister();
1da177e4 2190 proto_unregister(&unix_proto);
097e66c5 2191 unregister_pernet_subsys(&unix_net_ops);
1da177e4
LT
2192}
2193
2194module_init(af_unix_init);
2195module_exit(af_unix_exit);
2196
2197MODULE_LICENSE("GPL");
2198MODULE_ALIAS_NETPROTO(PF_UNIX);