net: af_unix: implement stream sendpage support
[linux-2.6-block.git] / net / unix / af_unix.c
CommitLineData
1da177e4
LT
1/*
2 * NET4: Implementation of BSD Unix domain sockets.
3 *
113aa838 4 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
1da177e4
LT
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
1da177e4
LT
11 * Fixes:
12 * Linus Torvalds : Assorted bug cures.
13 * Niibe Yutaka : async I/O support.
14 * Carsten Paeth : PF_UNIX check, address fixes.
15 * Alan Cox : Limit size of allocated blocks.
16 * Alan Cox : Fixed the stupid socketpair bug.
17 * Alan Cox : BSD compatibility fine tuning.
18 * Alan Cox : Fixed a bug in connect when interrupted.
19 * Alan Cox : Sorted out a proper draft version of
20 * file descriptor passing hacked up from
21 * Mike Shaver's work.
22 * Marty Leisner : Fixes to fd passing
23 * Nick Nevin : recvmsg bugfix.
24 * Alan Cox : Started proper garbage collector
25 * Heiko EiBfeldt : Missing verify_area check
26 * Alan Cox : Started POSIXisms
27 * Andreas Schwab : Replace inode by dentry for proper
28 * reference counting
29 * Kirk Petersen : Made this a module
30 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
31 * Lots of bug fixes.
32 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
33 * by above two patches.
34 * Andrea Arcangeli : If possible we block in connect(2)
35 * if the max backlog of the listen socket
36 * is been reached. This won't break
37 * old apps and it will avoid huge amount
38 * of socks hashed (this for unix_gc()
39 * performances reasons).
40 * Security fix that limits the max
41 * number of socks to 2*max_files and
42 * the number of skb queueable in the
43 * dgram receiver.
44 * Artur Skawina : Hash function optimizations
45 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
46 * Malcolm Beattie : Set peercred for socketpair
47 * Michal Ostrowski : Module initialization cleanup.
48 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
49 * the core infrastructure is doing that
50 * for all net proto families now (2.5.69+)
51 *
52 *
53 * Known differences from reference BSD that was tested:
54 *
55 * [TO FIX]
56 * ECONNREFUSED is not returned from one end of a connected() socket to the
57 * other the moment one end closes.
58 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
59 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
60 * [NOT TO FIX]
61 * accept() returns a path name even if the connecting socket has closed
62 * in the meantime (BSD loses the path and gives up).
63 * accept() returns 0 length path for an unbound connector. BSD returns 16
64 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
66 * BSD af_unix apparently has connect forgetting to block properly.
67 * (need to check this with the POSIX spec in detail)
68 *
69 * Differences from 2.0.0-11-... (ANK)
70 * Bug fixes and improvements.
71 * - client shutdown killed server socket.
72 * - removed all useless cli/sti pairs.
73 *
74 * Semantic changes/extensions.
75 * - generic control message passing.
76 * - SCM_CREDENTIALS control message.
77 * - "Abstract" (not FS based) socket bindings.
78 * Abstract names are sequences of bytes (not zero terminated)
79 * started by 0, so that this name space does not intersect
80 * with BSD names.
81 */
82
5cc208be 83#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
84
1da177e4 85#include <linux/module.h>
1da177e4 86#include <linux/kernel.h>
1da177e4
LT
87#include <linux/signal.h>
88#include <linux/sched.h>
89#include <linux/errno.h>
90#include <linux/string.h>
91#include <linux/stat.h>
92#include <linux/dcache.h>
93#include <linux/namei.h>
94#include <linux/socket.h>
95#include <linux/un.h>
96#include <linux/fcntl.h>
97#include <linux/termios.h>
98#include <linux/sockios.h>
99#include <linux/net.h>
100#include <linux/in.h>
101#include <linux/fs.h>
102#include <linux/slab.h>
103#include <asm/uaccess.h>
104#include <linux/skbuff.h>
105#include <linux/netdevice.h>
457c4cbc 106#include <net/net_namespace.h>
1da177e4 107#include <net/sock.h>
c752f073 108#include <net/tcp_states.h>
1da177e4
LT
109#include <net/af_unix.h>
110#include <linux/proc_fs.h>
111#include <linux/seq_file.h>
112#include <net/scm.h>
113#include <linux/init.h>
114#include <linux/poll.h>
1da177e4
LT
115#include <linux/rtnetlink.h>
116#include <linux/mount.h>
117#include <net/checksum.h>
118#include <linux/security.h>
2b15af6f 119#include <linux/freezer.h>
1da177e4 120
7123aaa3 121struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
fa7ff56f
PE
122EXPORT_SYMBOL_GPL(unix_socket_table);
123DEFINE_SPINLOCK(unix_table_lock);
124EXPORT_SYMBOL_GPL(unix_table_lock);
518de9b3 125static atomic_long_t unix_nr_socks;
1da177e4 126
1da177e4 127
7123aaa3
ED
128static struct hlist_head *unix_sockets_unbound(void *addr)
129{
130 unsigned long hash = (unsigned long)addr;
131
132 hash ^= hash >> 16;
133 hash ^= hash >> 8;
134 hash %= UNIX_HASH_SIZE;
135 return &unix_socket_table[UNIX_HASH_SIZE + hash];
136}
137
138#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
1da177e4 139
877ce7c1 140#ifdef CONFIG_SECURITY_NETWORK
dc49c1f9 141static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
877ce7c1 142{
dc49c1f9 143 memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
877ce7c1
CZ
144}
145
146static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
147{
dc49c1f9 148 scm->secid = *UNIXSID(skb);
877ce7c1
CZ
149}
150#else
dc49c1f9 151static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
877ce7c1
CZ
152{ }
153
154static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
155{ }
156#endif /* CONFIG_SECURITY_NETWORK */
157
1da177e4
LT
158/*
159 * SMP locking strategy:
fbe9cc4a 160 * hash table is protected with spinlock unix_table_lock
663717f6 161 * each socket state is protected by separate spin lock.
1da177e4
LT
162 */
163
95c96174 164static inline unsigned int unix_hash_fold(__wsum n)
1da177e4 165{
0a13404d 166 unsigned int hash = (__force unsigned int)csum_fold(n);
95c96174 167
1da177e4
LT
168 hash ^= hash>>8;
169 return hash&(UNIX_HASH_SIZE-1);
170}
171
172#define unix_peer(sk) (unix_sk(sk)->peer)
173
174static inline int unix_our_peer(struct sock *sk, struct sock *osk)
175{
176 return unix_peer(osk) == sk;
177}
178
179static inline int unix_may_send(struct sock *sk, struct sock *osk)
180{
6eba6a37 181 return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
1da177e4
LT
182}
183
3c73419c
RW
184static inline int unix_recvq_full(struct sock const *sk)
185{
186 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
187}
188
fa7ff56f 189struct sock *unix_peer_get(struct sock *s)
1da177e4
LT
190{
191 struct sock *peer;
192
1c92b4e5 193 unix_state_lock(s);
1da177e4
LT
194 peer = unix_peer(s);
195 if (peer)
196 sock_hold(peer);
1c92b4e5 197 unix_state_unlock(s);
1da177e4
LT
198 return peer;
199}
fa7ff56f 200EXPORT_SYMBOL_GPL(unix_peer_get);
1da177e4
LT
201
202static inline void unix_release_addr(struct unix_address *addr)
203{
204 if (atomic_dec_and_test(&addr->refcnt))
205 kfree(addr);
206}
207
208/*
209 * Check unix socket name:
210 * - should be not zero length.
211 * - if started by not zero, should be NULL terminated (FS object)
212 * - if started by zero, it is abstract name.
213 */
ac7bfa62 214
95c96174 215static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
1da177e4
LT
216{
217 if (len <= sizeof(short) || len > sizeof(*sunaddr))
218 return -EINVAL;
219 if (!sunaddr || sunaddr->sun_family != AF_UNIX)
220 return -EINVAL;
221 if (sunaddr->sun_path[0]) {
222 /*
223 * This may look like an off by one error but it is a bit more
224 * subtle. 108 is the longest valid AF_UNIX path for a binding.
25985edc 225 * sun_path[108] doesn't as such exist. However in kernel space
1da177e4
LT
226 * we are guaranteed that it is a valid memory location in our
227 * kernel address buffer.
228 */
e27dfcea 229 ((char *)sunaddr)[len] = 0;
1da177e4
LT
230 len = strlen(sunaddr->sun_path)+1+sizeof(short);
231 return len;
232 }
233
07f0757a 234 *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
1da177e4
LT
235 return len;
236}
237
238static void __unix_remove_socket(struct sock *sk)
239{
240 sk_del_node_init(sk);
241}
242
243static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
244{
547b792c 245 WARN_ON(!sk_unhashed(sk));
1da177e4
LT
246 sk_add_node(sk, list);
247}
248
249static inline void unix_remove_socket(struct sock *sk)
250{
fbe9cc4a 251 spin_lock(&unix_table_lock);
1da177e4 252 __unix_remove_socket(sk);
fbe9cc4a 253 spin_unlock(&unix_table_lock);
1da177e4
LT
254}
255
256static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
257{
fbe9cc4a 258 spin_lock(&unix_table_lock);
1da177e4 259 __unix_insert_socket(list, sk);
fbe9cc4a 260 spin_unlock(&unix_table_lock);
1da177e4
LT
261}
262
097e66c5
DL
263static struct sock *__unix_find_socket_byname(struct net *net,
264 struct sockaddr_un *sunname,
95c96174 265 int len, int type, unsigned int hash)
1da177e4
LT
266{
267 struct sock *s;
1da177e4 268
b67bfe0d 269 sk_for_each(s, &unix_socket_table[hash ^ type]) {
1da177e4
LT
270 struct unix_sock *u = unix_sk(s);
271
878628fb 272 if (!net_eq(sock_net(s), net))
097e66c5
DL
273 continue;
274
1da177e4
LT
275 if (u->addr->len == len &&
276 !memcmp(u->addr->name, sunname, len))
277 goto found;
278 }
279 s = NULL;
280found:
281 return s;
282}
283
097e66c5
DL
284static inline struct sock *unix_find_socket_byname(struct net *net,
285 struct sockaddr_un *sunname,
1da177e4 286 int len, int type,
95c96174 287 unsigned int hash)
1da177e4
LT
288{
289 struct sock *s;
290
fbe9cc4a 291 spin_lock(&unix_table_lock);
097e66c5 292 s = __unix_find_socket_byname(net, sunname, len, type, hash);
1da177e4
LT
293 if (s)
294 sock_hold(s);
fbe9cc4a 295 spin_unlock(&unix_table_lock);
1da177e4
LT
296 return s;
297}
298
6616f788 299static struct sock *unix_find_socket_byinode(struct inode *i)
1da177e4
LT
300{
301 struct sock *s;
1da177e4 302
fbe9cc4a 303 spin_lock(&unix_table_lock);
b67bfe0d 304 sk_for_each(s,
1da177e4 305 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
40ffe67d 306 struct dentry *dentry = unix_sk(s)->path.dentry;
1da177e4 307
a25b376b 308 if (dentry && d_backing_inode(dentry) == i) {
1da177e4
LT
309 sock_hold(s);
310 goto found;
311 }
312 }
313 s = NULL;
314found:
fbe9cc4a 315 spin_unlock(&unix_table_lock);
1da177e4
LT
316 return s;
317}
318
319static inline int unix_writable(struct sock *sk)
320{
321 return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
322}
323
324static void unix_write_space(struct sock *sk)
325{
43815482
ED
326 struct socket_wq *wq;
327
328 rcu_read_lock();
1da177e4 329 if (unix_writable(sk)) {
43815482
ED
330 wq = rcu_dereference(sk->sk_wq);
331 if (wq_has_sleeper(wq))
67426b75
ED
332 wake_up_interruptible_sync_poll(&wq->wait,
333 POLLOUT | POLLWRNORM | POLLWRBAND);
8d8ad9d7 334 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
1da177e4 335 }
43815482 336 rcu_read_unlock();
1da177e4
LT
337}
338
339/* When dgram socket disconnects (or changes its peer), we clear its receive
340 * queue of packets arrived from previous peer. First, it allows to do
341 * flow control based only on wmem_alloc; second, sk connected to peer
342 * may receive messages only from that peer. */
343static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
344{
b03efcfb 345 if (!skb_queue_empty(&sk->sk_receive_queue)) {
1da177e4
LT
346 skb_queue_purge(&sk->sk_receive_queue);
347 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
348
349 /* If one link of bidirectional dgram pipe is disconnected,
350 * we signal error. Messages are lost. Do not make this,
351 * when peer was not connected to us.
352 */
353 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
354 other->sk_err = ECONNRESET;
355 other->sk_error_report(other);
356 }
357 }
358}
359
360static void unix_sock_destructor(struct sock *sk)
361{
362 struct unix_sock *u = unix_sk(sk);
363
364 skb_queue_purge(&sk->sk_receive_queue);
365
547b792c
IJ
366 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
367 WARN_ON(!sk_unhashed(sk));
368 WARN_ON(sk->sk_socket);
1da177e4 369 if (!sock_flag(sk, SOCK_DEAD)) {
5cc208be 370 pr_info("Attempt to release alive unix socket: %p\n", sk);
1da177e4
LT
371 return;
372 }
373
374 if (u->addr)
375 unix_release_addr(u->addr);
376
518de9b3 377 atomic_long_dec(&unix_nr_socks);
6f756a8c 378 local_bh_disable();
a8076d8d 379 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
6f756a8c 380 local_bh_enable();
1da177e4 381#ifdef UNIX_REFCNT_DEBUG
5cc208be 382 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
518de9b3 383 atomic_long_read(&unix_nr_socks));
1da177e4
LT
384#endif
385}
386
ded34e0f 387static void unix_release_sock(struct sock *sk, int embrion)
1da177e4
LT
388{
389 struct unix_sock *u = unix_sk(sk);
40ffe67d 390 struct path path;
1da177e4
LT
391 struct sock *skpair;
392 struct sk_buff *skb;
393 int state;
394
395 unix_remove_socket(sk);
396
397 /* Clear state */
1c92b4e5 398 unix_state_lock(sk);
1da177e4
LT
399 sock_orphan(sk);
400 sk->sk_shutdown = SHUTDOWN_MASK;
40ffe67d
AV
401 path = u->path;
402 u->path.dentry = NULL;
403 u->path.mnt = NULL;
1da177e4
LT
404 state = sk->sk_state;
405 sk->sk_state = TCP_CLOSE;
1c92b4e5 406 unix_state_unlock(sk);
1da177e4
LT
407
408 wake_up_interruptible_all(&u->peer_wait);
409
e27dfcea 410 skpair = unix_peer(sk);
1da177e4 411
e27dfcea 412 if (skpair != NULL) {
1da177e4 413 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
1c92b4e5 414 unix_state_lock(skpair);
1da177e4
LT
415 /* No more writes */
416 skpair->sk_shutdown = SHUTDOWN_MASK;
417 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
418 skpair->sk_err = ECONNRESET;
1c92b4e5 419 unix_state_unlock(skpair);
1da177e4 420 skpair->sk_state_change(skpair);
8d8ad9d7 421 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
1da177e4
LT
422 }
423 sock_put(skpair); /* It may now die */
424 unix_peer(sk) = NULL;
425 }
426
427 /* Try to flush out this socket. Throw out buffers at least */
428
429 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
e27dfcea 430 if (state == TCP_LISTEN)
1da177e4
LT
431 unix_release_sock(skb->sk, 1);
432 /* passed fds are erased in the kfree_skb hook */
433 kfree_skb(skb);
434 }
435
40ffe67d
AV
436 if (path.dentry)
437 path_put(&path);
1da177e4
LT
438
439 sock_put(sk);
440
441 /* ---- Socket is dead now and most probably destroyed ---- */
442
443 /*
e04dae84 444 * Fixme: BSD difference: In BSD all sockets connected to us get
1da177e4
LT
445 * ECONNRESET and we die on the spot. In Linux we behave
446 * like files and pipes do and wait for the last
447 * dereference.
448 *
449 * Can't we simply set sock->err?
450 *
451 * What the above comment does talk about? --ANK(980817)
452 */
453
9305cfa4 454 if (unix_tot_inflight)
ac7bfa62 455 unix_gc(); /* Garbage collect fds */
1da177e4
LT
456}
457
109f6e39
EB
458static void init_peercred(struct sock *sk)
459{
460 put_pid(sk->sk_peer_pid);
461 if (sk->sk_peer_cred)
462 put_cred(sk->sk_peer_cred);
463 sk->sk_peer_pid = get_pid(task_tgid(current));
464 sk->sk_peer_cred = get_current_cred();
465}
466
467static void copy_peercred(struct sock *sk, struct sock *peersk)
468{
469 put_pid(sk->sk_peer_pid);
470 if (sk->sk_peer_cred)
471 put_cred(sk->sk_peer_cred);
472 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
473 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
474}
475
1da177e4
LT
476static int unix_listen(struct socket *sock, int backlog)
477{
478 int err;
479 struct sock *sk = sock->sk;
480 struct unix_sock *u = unix_sk(sk);
109f6e39 481 struct pid *old_pid = NULL;
1da177e4
LT
482
483 err = -EOPNOTSUPP;
6eba6a37
ED
484 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
485 goto out; /* Only stream/seqpacket sockets accept */
1da177e4
LT
486 err = -EINVAL;
487 if (!u->addr)
6eba6a37 488 goto out; /* No listens on an unbound socket */
1c92b4e5 489 unix_state_lock(sk);
1da177e4
LT
490 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
491 goto out_unlock;
492 if (backlog > sk->sk_max_ack_backlog)
493 wake_up_interruptible_all(&u->peer_wait);
494 sk->sk_max_ack_backlog = backlog;
495 sk->sk_state = TCP_LISTEN;
496 /* set credentials so connect can copy them */
109f6e39 497 init_peercred(sk);
1da177e4
LT
498 err = 0;
499
500out_unlock:
1c92b4e5 501 unix_state_unlock(sk);
109f6e39 502 put_pid(old_pid);
1da177e4
LT
503out:
504 return err;
505}
506
507static int unix_release(struct socket *);
508static int unix_bind(struct socket *, struct sockaddr *, int);
509static int unix_stream_connect(struct socket *, struct sockaddr *,
510 int addr_len, int flags);
511static int unix_socketpair(struct socket *, struct socket *);
512static int unix_accept(struct socket *, struct socket *, int);
513static int unix_getname(struct socket *, struct sockaddr *, int *, int);
514static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
ec0d215f
RW
515static unsigned int unix_dgram_poll(struct file *, struct socket *,
516 poll_table *);
1da177e4
LT
517static int unix_ioctl(struct socket *, unsigned int, unsigned long);
518static int unix_shutdown(struct socket *, int);
1b784140
YX
519static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
520static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
869e7c62
HFS
521static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
522 size_t size, int flags);
1b784140
YX
523static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
524static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
1da177e4
LT
525static int unix_dgram_connect(struct socket *, struct sockaddr *,
526 int, int);
1b784140
YX
527static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
528static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
529 int);
1da177e4 530
12663bfc 531static int unix_set_peek_off(struct sock *sk, int val)
f55bb7f9
PE
532{
533 struct unix_sock *u = unix_sk(sk);
534
12663bfc
SL
535 if (mutex_lock_interruptible(&u->readlock))
536 return -EINTR;
537
f55bb7f9
PE
538 sk->sk_peek_off = val;
539 mutex_unlock(&u->readlock);
12663bfc
SL
540
541 return 0;
f55bb7f9
PE
542}
543
544
90ddc4f0 545static const struct proto_ops unix_stream_ops = {
1da177e4
LT
546 .family = PF_UNIX,
547 .owner = THIS_MODULE,
548 .release = unix_release,
549 .bind = unix_bind,
550 .connect = unix_stream_connect,
551 .socketpair = unix_socketpair,
552 .accept = unix_accept,
553 .getname = unix_getname,
554 .poll = unix_poll,
555 .ioctl = unix_ioctl,
556 .listen = unix_listen,
557 .shutdown = unix_shutdown,
558 .setsockopt = sock_no_setsockopt,
559 .getsockopt = sock_no_getsockopt,
560 .sendmsg = unix_stream_sendmsg,
561 .recvmsg = unix_stream_recvmsg,
562 .mmap = sock_no_mmap,
869e7c62 563 .sendpage = unix_stream_sendpage,
fc0d7536 564 .set_peek_off = unix_set_peek_off,
1da177e4
LT
565};
566
90ddc4f0 567static const struct proto_ops unix_dgram_ops = {
1da177e4
LT
568 .family = PF_UNIX,
569 .owner = THIS_MODULE,
570 .release = unix_release,
571 .bind = unix_bind,
572 .connect = unix_dgram_connect,
573 .socketpair = unix_socketpair,
574 .accept = sock_no_accept,
575 .getname = unix_getname,
ec0d215f 576 .poll = unix_dgram_poll,
1da177e4
LT
577 .ioctl = unix_ioctl,
578 .listen = sock_no_listen,
579 .shutdown = unix_shutdown,
580 .setsockopt = sock_no_setsockopt,
581 .getsockopt = sock_no_getsockopt,
582 .sendmsg = unix_dgram_sendmsg,
583 .recvmsg = unix_dgram_recvmsg,
584 .mmap = sock_no_mmap,
585 .sendpage = sock_no_sendpage,
f55bb7f9 586 .set_peek_off = unix_set_peek_off,
1da177e4
LT
587};
588
90ddc4f0 589static const struct proto_ops unix_seqpacket_ops = {
1da177e4
LT
590 .family = PF_UNIX,
591 .owner = THIS_MODULE,
592 .release = unix_release,
593 .bind = unix_bind,
594 .connect = unix_stream_connect,
595 .socketpair = unix_socketpair,
596 .accept = unix_accept,
597 .getname = unix_getname,
ec0d215f 598 .poll = unix_dgram_poll,
1da177e4
LT
599 .ioctl = unix_ioctl,
600 .listen = unix_listen,
601 .shutdown = unix_shutdown,
602 .setsockopt = sock_no_setsockopt,
603 .getsockopt = sock_no_getsockopt,
604 .sendmsg = unix_seqpacket_sendmsg,
a05d2ad1 605 .recvmsg = unix_seqpacket_recvmsg,
1da177e4
LT
606 .mmap = sock_no_mmap,
607 .sendpage = sock_no_sendpage,
f55bb7f9 608 .set_peek_off = unix_set_peek_off,
1da177e4
LT
609};
610
611static struct proto unix_proto = {
248969ae
ED
612 .name = "UNIX",
613 .owner = THIS_MODULE,
248969ae 614 .obj_size = sizeof(struct unix_sock),
1da177e4
LT
615};
616
a09785a2
IM
617/*
618 * AF_UNIX sockets do not interact with hardware, hence they
619 * dont trigger interrupts - so it's safe for them to have
620 * bh-unsafe locking for their sk_receive_queue.lock. Split off
621 * this special lock-class by reinitializing the spinlock key:
622 */
623static struct lock_class_key af_unix_sk_receive_queue_lock_key;
624
11aa9c28 625static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
1da177e4
LT
626{
627 struct sock *sk = NULL;
628 struct unix_sock *u;
629
518de9b3
ED
630 atomic_long_inc(&unix_nr_socks);
631 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
1da177e4
LT
632 goto out;
633
11aa9c28 634 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
1da177e4
LT
635 if (!sk)
636 goto out;
637
6eba6a37 638 sock_init_data(sock, sk);
a09785a2
IM
639 lockdep_set_class(&sk->sk_receive_queue.lock,
640 &af_unix_sk_receive_queue_lock_key);
1da177e4
LT
641
642 sk->sk_write_space = unix_write_space;
a0a53c8b 643 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
1da177e4
LT
644 sk->sk_destruct = unix_sock_destructor;
645 u = unix_sk(sk);
40ffe67d
AV
646 u->path.dentry = NULL;
647 u->path.mnt = NULL;
fd19f329 648 spin_lock_init(&u->lock);
516e0cc5 649 atomic_long_set(&u->inflight, 0);
1fd05ba5 650 INIT_LIST_HEAD(&u->link);
57b47a53 651 mutex_init(&u->readlock); /* single task reading lock */
1da177e4 652 init_waitqueue_head(&u->peer_wait);
7123aaa3 653 unix_insert_socket(unix_sockets_unbound(sk), sk);
1da177e4 654out:
284b327b 655 if (sk == NULL)
518de9b3 656 atomic_long_dec(&unix_nr_socks);
920de804
ED
657 else {
658 local_bh_disable();
a8076d8d 659 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
920de804
ED
660 local_bh_enable();
661 }
1da177e4
LT
662 return sk;
663}
664
3f378b68
EP
665static int unix_create(struct net *net, struct socket *sock, int protocol,
666 int kern)
1da177e4
LT
667{
668 if (protocol && protocol != PF_UNIX)
669 return -EPROTONOSUPPORT;
670
671 sock->state = SS_UNCONNECTED;
672
673 switch (sock->type) {
674 case SOCK_STREAM:
675 sock->ops = &unix_stream_ops;
676 break;
677 /*
678 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
679 * nothing uses it.
680 */
681 case SOCK_RAW:
e27dfcea 682 sock->type = SOCK_DGRAM;
1da177e4
LT
683 case SOCK_DGRAM:
684 sock->ops = &unix_dgram_ops;
685 break;
686 case SOCK_SEQPACKET:
687 sock->ops = &unix_seqpacket_ops;
688 break;
689 default:
690 return -ESOCKTNOSUPPORT;
691 }
692
11aa9c28 693 return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
1da177e4
LT
694}
695
696static int unix_release(struct socket *sock)
697{
698 struct sock *sk = sock->sk;
699
700 if (!sk)
701 return 0;
702
ded34e0f 703 unix_release_sock(sk, 0);
1da177e4
LT
704 sock->sk = NULL;
705
ded34e0f 706 return 0;
1da177e4
LT
707}
708
709static int unix_autobind(struct socket *sock)
710{
711 struct sock *sk = sock->sk;
3b1e0a65 712 struct net *net = sock_net(sk);
1da177e4
LT
713 struct unix_sock *u = unix_sk(sk);
714 static u32 ordernum = 1;
6eba6a37 715 struct unix_address *addr;
1da177e4 716 int err;
8df73ff9 717 unsigned int retries = 0;
1da177e4 718
37ab4fa7
SL
719 err = mutex_lock_interruptible(&u->readlock);
720 if (err)
721 return err;
1da177e4
LT
722
723 err = 0;
724 if (u->addr)
725 goto out;
726
727 err = -ENOMEM;
0da974f4 728 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
1da177e4
LT
729 if (!addr)
730 goto out;
731
1da177e4
LT
732 addr->name->sun_family = AF_UNIX;
733 atomic_set(&addr->refcnt, 1);
734
735retry:
736 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
07f0757a 737 addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
1da177e4 738
fbe9cc4a 739 spin_lock(&unix_table_lock);
1da177e4
LT
740 ordernum = (ordernum+1)&0xFFFFF;
741
097e66c5 742 if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
1da177e4 743 addr->hash)) {
fbe9cc4a 744 spin_unlock(&unix_table_lock);
8df73ff9
TH
745 /*
746 * __unix_find_socket_byname() may take long time if many names
747 * are already in use.
748 */
749 cond_resched();
750 /* Give up if all names seems to be in use. */
751 if (retries++ == 0xFFFFF) {
752 err = -ENOSPC;
753 kfree(addr);
754 goto out;
755 }
1da177e4
LT
756 goto retry;
757 }
758 addr->hash ^= sk->sk_type;
759
760 __unix_remove_socket(sk);
761 u->addr = addr;
762 __unix_insert_socket(&unix_socket_table[addr->hash], sk);
fbe9cc4a 763 spin_unlock(&unix_table_lock);
1da177e4
LT
764 err = 0;
765
57b47a53 766out: mutex_unlock(&u->readlock);
1da177e4
LT
767 return err;
768}
769
097e66c5
DL
770static struct sock *unix_find_other(struct net *net,
771 struct sockaddr_un *sunname, int len,
95c96174 772 int type, unsigned int hash, int *error)
1da177e4
LT
773{
774 struct sock *u;
421748ec 775 struct path path;
1da177e4 776 int err = 0;
ac7bfa62 777
1da177e4 778 if (sunname->sun_path[0]) {
421748ec
AV
779 struct inode *inode;
780 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
1da177e4
LT
781 if (err)
782 goto fail;
a25b376b 783 inode = d_backing_inode(path.dentry);
421748ec 784 err = inode_permission(inode, MAY_WRITE);
1da177e4
LT
785 if (err)
786 goto put_fail;
787
788 err = -ECONNREFUSED;
421748ec 789 if (!S_ISSOCK(inode->i_mode))
1da177e4 790 goto put_fail;
6616f788 791 u = unix_find_socket_byinode(inode);
1da177e4
LT
792 if (!u)
793 goto put_fail;
794
795 if (u->sk_type == type)
68ac1234 796 touch_atime(&path);
1da177e4 797
421748ec 798 path_put(&path);
1da177e4 799
e27dfcea 800 err = -EPROTOTYPE;
1da177e4
LT
801 if (u->sk_type != type) {
802 sock_put(u);
803 goto fail;
804 }
805 } else {
806 err = -ECONNREFUSED;
e27dfcea 807 u = unix_find_socket_byname(net, sunname, len, type, hash);
1da177e4
LT
808 if (u) {
809 struct dentry *dentry;
40ffe67d 810 dentry = unix_sk(u)->path.dentry;
1da177e4 811 if (dentry)
68ac1234 812 touch_atime(&unix_sk(u)->path);
1da177e4
LT
813 } else
814 goto fail;
815 }
816 return u;
817
818put_fail:
421748ec 819 path_put(&path);
1da177e4 820fail:
e27dfcea 821 *error = err;
1da177e4
LT
822 return NULL;
823}
824
faf02010
AV
825static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
826{
827 struct dentry *dentry;
828 struct path path;
829 int err = 0;
830 /*
831 * Get the parent directory, calculate the hash for last
832 * component.
833 */
834 dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
835 err = PTR_ERR(dentry);
836 if (IS_ERR(dentry))
837 return err;
838
839 /*
840 * All right, let's create it.
841 */
842 err = security_path_mknod(&path, dentry, mode, 0);
843 if (!err) {
ee8ac4d6 844 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
faf02010
AV
845 if (!err) {
846 res->mnt = mntget(path.mnt);
847 res->dentry = dget(dentry);
848 }
849 }
850 done_path_create(&path, dentry);
851 return err;
852}
1da177e4
LT
853
854static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
855{
856 struct sock *sk = sock->sk;
3b1e0a65 857 struct net *net = sock_net(sk);
1da177e4 858 struct unix_sock *u = unix_sk(sk);
e27dfcea 859 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
dae6ad8f 860 char *sun_path = sunaddr->sun_path;
1da177e4 861 int err;
95c96174 862 unsigned int hash;
1da177e4
LT
863 struct unix_address *addr;
864 struct hlist_head *list;
865
866 err = -EINVAL;
867 if (sunaddr->sun_family != AF_UNIX)
868 goto out;
869
e27dfcea 870 if (addr_len == sizeof(short)) {
1da177e4
LT
871 err = unix_autobind(sock);
872 goto out;
873 }
874
875 err = unix_mkname(sunaddr, addr_len, &hash);
876 if (err < 0)
877 goto out;
878 addr_len = err;
879
37ab4fa7
SL
880 err = mutex_lock_interruptible(&u->readlock);
881 if (err)
882 goto out;
1da177e4
LT
883
884 err = -EINVAL;
885 if (u->addr)
886 goto out_up;
887
888 err = -ENOMEM;
889 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
890 if (!addr)
891 goto out_up;
892
893 memcpy(addr->name, sunaddr, addr_len);
894 addr->len = addr_len;
895 addr->hash = hash ^ sk->sk_type;
896 atomic_set(&addr->refcnt, 1);
897
dae6ad8f 898 if (sun_path[0]) {
faf02010
AV
899 struct path path;
900 umode_t mode = S_IFSOCK |
ce3b0f8d 901 (SOCK_INODE(sock)->i_mode & ~current_umask());
faf02010
AV
902 err = unix_mknod(sun_path, mode, &path);
903 if (err) {
904 if (err == -EEXIST)
905 err = -EADDRINUSE;
906 unix_release_addr(addr);
907 goto out_up;
908 }
1da177e4 909 addr->hash = UNIX_HASH_SIZE;
a25b376b 910 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE-1);
faf02010
AV
911 spin_lock(&unix_table_lock);
912 u->path = path;
913 list = &unix_socket_table[hash];
914 } else {
915 spin_lock(&unix_table_lock);
1da177e4 916 err = -EADDRINUSE;
097e66c5 917 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1da177e4
LT
918 sk->sk_type, hash)) {
919 unix_release_addr(addr);
920 goto out_unlock;
921 }
922
923 list = &unix_socket_table[addr->hash];
1da177e4
LT
924 }
925
926 err = 0;
927 __unix_remove_socket(sk);
928 u->addr = addr;
929 __unix_insert_socket(list, sk);
930
931out_unlock:
fbe9cc4a 932 spin_unlock(&unix_table_lock);
1da177e4 933out_up:
57b47a53 934 mutex_unlock(&u->readlock);
1da177e4
LT
935out:
936 return err;
1da177e4
LT
937}
938
278a3de5
DM
939static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
940{
941 if (unlikely(sk1 == sk2) || !sk2) {
942 unix_state_lock(sk1);
943 return;
944 }
945 if (sk1 < sk2) {
946 unix_state_lock(sk1);
947 unix_state_lock_nested(sk2);
948 } else {
949 unix_state_lock(sk2);
950 unix_state_lock_nested(sk1);
951 }
952}
953
954static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
955{
956 if (unlikely(sk1 == sk2) || !sk2) {
957 unix_state_unlock(sk1);
958 return;
959 }
960 unix_state_unlock(sk1);
961 unix_state_unlock(sk2);
962}
963
1da177e4
LT
964static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
965 int alen, int flags)
966{
967 struct sock *sk = sock->sk;
3b1e0a65 968 struct net *net = sock_net(sk);
e27dfcea 969 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1da177e4 970 struct sock *other;
95c96174 971 unsigned int hash;
1da177e4
LT
972 int err;
973
974 if (addr->sa_family != AF_UNSPEC) {
975 err = unix_mkname(sunaddr, alen, &hash);
976 if (err < 0)
977 goto out;
978 alen = err;
979
980 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
981 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
982 goto out;
983
278a3de5 984restart:
e27dfcea 985 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1da177e4
LT
986 if (!other)
987 goto out;
988
278a3de5
DM
989 unix_state_double_lock(sk, other);
990
991 /* Apparently VFS overslept socket death. Retry. */
992 if (sock_flag(other, SOCK_DEAD)) {
993 unix_state_double_unlock(sk, other);
994 sock_put(other);
995 goto restart;
996 }
1da177e4
LT
997
998 err = -EPERM;
999 if (!unix_may_send(sk, other))
1000 goto out_unlock;
1001
1002 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1003 if (err)
1004 goto out_unlock;
1005
1006 } else {
1007 /*
1008 * 1003.1g breaking connected state with AF_UNSPEC
1009 */
1010 other = NULL;
278a3de5 1011 unix_state_double_lock(sk, other);
1da177e4
LT
1012 }
1013
1014 /*
1015 * If it was connected, reconnect.
1016 */
1017 if (unix_peer(sk)) {
1018 struct sock *old_peer = unix_peer(sk);
e27dfcea 1019 unix_peer(sk) = other;
278a3de5 1020 unix_state_double_unlock(sk, other);
1da177e4
LT
1021
1022 if (other != old_peer)
1023 unix_dgram_disconnected(sk, old_peer);
1024 sock_put(old_peer);
1025 } else {
e27dfcea 1026 unix_peer(sk) = other;
278a3de5 1027 unix_state_double_unlock(sk, other);
1da177e4 1028 }
ac7bfa62 1029 return 0;
1da177e4
LT
1030
1031out_unlock:
278a3de5 1032 unix_state_double_unlock(sk, other);
1da177e4
LT
1033 sock_put(other);
1034out:
1035 return err;
1036}
1037
1038static long unix_wait_for_peer(struct sock *other, long timeo)
1039{
1040 struct unix_sock *u = unix_sk(other);
1041 int sched;
1042 DEFINE_WAIT(wait);
1043
1044 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1045
1046 sched = !sock_flag(other, SOCK_DEAD) &&
1047 !(other->sk_shutdown & RCV_SHUTDOWN) &&
3c73419c 1048 unix_recvq_full(other);
1da177e4 1049
1c92b4e5 1050 unix_state_unlock(other);
1da177e4
LT
1051
1052 if (sched)
1053 timeo = schedule_timeout(timeo);
1054
1055 finish_wait(&u->peer_wait, &wait);
1056 return timeo;
1057}
1058
1059static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1060 int addr_len, int flags)
1061{
e27dfcea 1062 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1da177e4 1063 struct sock *sk = sock->sk;
3b1e0a65 1064 struct net *net = sock_net(sk);
1da177e4
LT
1065 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1066 struct sock *newsk = NULL;
1067 struct sock *other = NULL;
1068 struct sk_buff *skb = NULL;
95c96174 1069 unsigned int hash;
1da177e4
LT
1070 int st;
1071 int err;
1072 long timeo;
1073
1074 err = unix_mkname(sunaddr, addr_len, &hash);
1075 if (err < 0)
1076 goto out;
1077 addr_len = err;
1078
f64f9e71
JP
1079 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1080 (err = unix_autobind(sock)) != 0)
1da177e4
LT
1081 goto out;
1082
1083 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1084
1085 /* First of all allocate resources.
1086 If we will make it after state is locked,
1087 we will have to recheck all again in any case.
1088 */
1089
1090 err = -ENOMEM;
1091
1092 /* create new sock for complete connection */
11aa9c28 1093 newsk = unix_create1(sock_net(sk), NULL, 0);
1da177e4
LT
1094 if (newsk == NULL)
1095 goto out;
1096
1097 /* Allocate skb for sending to listening sock */
1098 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1099 if (skb == NULL)
1100 goto out;
1101
1102restart:
1103 /* Find listening sock. */
097e66c5 1104 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1da177e4
LT
1105 if (!other)
1106 goto out;
1107
1108 /* Latch state of peer */
1c92b4e5 1109 unix_state_lock(other);
1da177e4
LT
1110
1111 /* Apparently VFS overslept socket death. Retry. */
1112 if (sock_flag(other, SOCK_DEAD)) {
1c92b4e5 1113 unix_state_unlock(other);
1da177e4
LT
1114 sock_put(other);
1115 goto restart;
1116 }
1117
1118 err = -ECONNREFUSED;
1119 if (other->sk_state != TCP_LISTEN)
1120 goto out_unlock;
77238f2b
TS
1121 if (other->sk_shutdown & RCV_SHUTDOWN)
1122 goto out_unlock;
1da177e4 1123
3c73419c 1124 if (unix_recvq_full(other)) {
1da177e4
LT
1125 err = -EAGAIN;
1126 if (!timeo)
1127 goto out_unlock;
1128
1129 timeo = unix_wait_for_peer(other, timeo);
1130
1131 err = sock_intr_errno(timeo);
1132 if (signal_pending(current))
1133 goto out;
1134 sock_put(other);
1135 goto restart;
ac7bfa62 1136 }
1da177e4
LT
1137
1138 /* Latch our state.
1139
e5537bfc 1140 It is tricky place. We need to grab our state lock and cannot
1da177e4
LT
1141 drop lock on peer. It is dangerous because deadlock is
1142 possible. Connect to self case and simultaneous
1143 attempt to connect are eliminated by checking socket
1144 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1145 check this before attempt to grab lock.
1146
1147 Well, and we have to recheck the state after socket locked.
1148 */
1149 st = sk->sk_state;
1150
1151 switch (st) {
1152 case TCP_CLOSE:
1153 /* This is ok... continue with connect */
1154 break;
1155 case TCP_ESTABLISHED:
1156 /* Socket is already connected */
1157 err = -EISCONN;
1158 goto out_unlock;
1159 default:
1160 err = -EINVAL;
1161 goto out_unlock;
1162 }
1163
1c92b4e5 1164 unix_state_lock_nested(sk);
1da177e4
LT
1165
1166 if (sk->sk_state != st) {
1c92b4e5
DM
1167 unix_state_unlock(sk);
1168 unix_state_unlock(other);
1da177e4
LT
1169 sock_put(other);
1170 goto restart;
1171 }
1172
3610cda5 1173 err = security_unix_stream_connect(sk, other, newsk);
1da177e4 1174 if (err) {
1c92b4e5 1175 unix_state_unlock(sk);
1da177e4
LT
1176 goto out_unlock;
1177 }
1178
1179 /* The way is open! Fastly set all the necessary fields... */
1180
1181 sock_hold(sk);
1182 unix_peer(newsk) = sk;
1183 newsk->sk_state = TCP_ESTABLISHED;
1184 newsk->sk_type = sk->sk_type;
109f6e39 1185 init_peercred(newsk);
1da177e4 1186 newu = unix_sk(newsk);
eaefd110 1187 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1da177e4
LT
1188 otheru = unix_sk(other);
1189
1190 /* copy address information from listening to new sock*/
1191 if (otheru->addr) {
1192 atomic_inc(&otheru->addr->refcnt);
1193 newu->addr = otheru->addr;
1194 }
40ffe67d
AV
1195 if (otheru->path.dentry) {
1196 path_get(&otheru->path);
1197 newu->path = otheru->path;
1da177e4
LT
1198 }
1199
1200 /* Set credentials */
109f6e39 1201 copy_peercred(sk, other);
1da177e4 1202
1da177e4
LT
1203 sock->state = SS_CONNECTED;
1204 sk->sk_state = TCP_ESTABLISHED;
830a1e5c
BL
1205 sock_hold(newsk);
1206
4e857c58 1207 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
830a1e5c 1208 unix_peer(sk) = newsk;
1da177e4 1209
1c92b4e5 1210 unix_state_unlock(sk);
1da177e4
LT
1211
1212 /* take ten and and send info to listening sock */
1213 spin_lock(&other->sk_receive_queue.lock);
1214 __skb_queue_tail(&other->sk_receive_queue, skb);
1da177e4 1215 spin_unlock(&other->sk_receive_queue.lock);
1c92b4e5 1216 unix_state_unlock(other);
676d2369 1217 other->sk_data_ready(other);
1da177e4
LT
1218 sock_put(other);
1219 return 0;
1220
1221out_unlock:
1222 if (other)
1c92b4e5 1223 unix_state_unlock(other);
1da177e4
LT
1224
1225out:
40d44446 1226 kfree_skb(skb);
1da177e4
LT
1227 if (newsk)
1228 unix_release_sock(newsk, 0);
1229 if (other)
1230 sock_put(other);
1231 return err;
1232}
1233
1234static int unix_socketpair(struct socket *socka, struct socket *sockb)
1235{
e27dfcea 1236 struct sock *ska = socka->sk, *skb = sockb->sk;
1da177e4
LT
1237
1238 /* Join our sockets back to back */
1239 sock_hold(ska);
1240 sock_hold(skb);
e27dfcea
JK
1241 unix_peer(ska) = skb;
1242 unix_peer(skb) = ska;
109f6e39
EB
1243 init_peercred(ska);
1244 init_peercred(skb);
1da177e4
LT
1245
1246 if (ska->sk_type != SOCK_DGRAM) {
1247 ska->sk_state = TCP_ESTABLISHED;
1248 skb->sk_state = TCP_ESTABLISHED;
1249 socka->state = SS_CONNECTED;
1250 sockb->state = SS_CONNECTED;
1251 }
1252 return 0;
1253}
1254
90c6bd34
DB
1255static void unix_sock_inherit_flags(const struct socket *old,
1256 struct socket *new)
1257{
1258 if (test_bit(SOCK_PASSCRED, &old->flags))
1259 set_bit(SOCK_PASSCRED, &new->flags);
1260 if (test_bit(SOCK_PASSSEC, &old->flags))
1261 set_bit(SOCK_PASSSEC, &new->flags);
1262}
1263
1da177e4
LT
1264static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1265{
1266 struct sock *sk = sock->sk;
1267 struct sock *tsk;
1268 struct sk_buff *skb;
1269 int err;
1270
1271 err = -EOPNOTSUPP;
6eba6a37 1272 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1da177e4
LT
1273 goto out;
1274
1275 err = -EINVAL;
1276 if (sk->sk_state != TCP_LISTEN)
1277 goto out;
1278
1279 /* If socket state is TCP_LISTEN it cannot change (for now...),
1280 * so that no locks are necessary.
1281 */
1282
1283 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1284 if (!skb) {
1285 /* This means receive shutdown. */
1286 if (err == 0)
1287 err = -EINVAL;
1288 goto out;
1289 }
1290
1291 tsk = skb->sk;
1292 skb_free_datagram(sk, skb);
1293 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1294
1295 /* attach accepted sock to socket */
1c92b4e5 1296 unix_state_lock(tsk);
1da177e4 1297 newsock->state = SS_CONNECTED;
90c6bd34 1298 unix_sock_inherit_flags(sock, newsock);
1da177e4 1299 sock_graft(tsk, newsock);
1c92b4e5 1300 unix_state_unlock(tsk);
1da177e4
LT
1301 return 0;
1302
1303out:
1304 return err;
1305}
1306
1307
1308static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1309{
1310 struct sock *sk = sock->sk;
1311 struct unix_sock *u;
13cfa97b 1312 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1da177e4
LT
1313 int err = 0;
1314
1315 if (peer) {
1316 sk = unix_peer_get(sk);
1317
1318 err = -ENOTCONN;
1319 if (!sk)
1320 goto out;
1321 err = 0;
1322 } else {
1323 sock_hold(sk);
1324 }
1325
1326 u = unix_sk(sk);
1c92b4e5 1327 unix_state_lock(sk);
1da177e4
LT
1328 if (!u->addr) {
1329 sunaddr->sun_family = AF_UNIX;
1330 sunaddr->sun_path[0] = 0;
1331 *uaddr_len = sizeof(short);
1332 } else {
1333 struct unix_address *addr = u->addr;
1334
1335 *uaddr_len = addr->len;
1336 memcpy(sunaddr, addr->name, *uaddr_len);
1337 }
1c92b4e5 1338 unix_state_unlock(sk);
1da177e4
LT
1339 sock_put(sk);
1340out:
1341 return err;
1342}
1343
1344static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1345{
1346 int i;
1347
1348 scm->fp = UNIXCB(skb).fp;
1da177e4
LT
1349 UNIXCB(skb).fp = NULL;
1350
6eba6a37 1351 for (i = scm->fp->count-1; i >= 0; i--)
1da177e4
LT
1352 unix_notinflight(scm->fp->fp[i]);
1353}
1354
7361c36c 1355static void unix_destruct_scm(struct sk_buff *skb)
1da177e4
LT
1356{
1357 struct scm_cookie scm;
1358 memset(&scm, 0, sizeof(scm));
7361c36c 1359 scm.pid = UNIXCB(skb).pid;
7361c36c
EB
1360 if (UNIXCB(skb).fp)
1361 unix_detach_fds(&scm, skb);
1da177e4
LT
1362
1363 /* Alas, it calls VFS */
1364 /* So fscking what? fput() had been SMP-safe since the last Summer */
1365 scm_destroy(&scm);
1366 sock_wfree(skb);
1367}
1368
25888e30
ED
1369#define MAX_RECURSION_LEVEL 4
1370
6209344f 1371static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1da177e4
LT
1372{
1373 int i;
25888e30
ED
1374 unsigned char max_level = 0;
1375 int unix_sock_count = 0;
1376
1377 for (i = scm->fp->count - 1; i >= 0; i--) {
1378 struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1379
1380 if (sk) {
1381 unix_sock_count++;
1382 max_level = max(max_level,
1383 unix_sk(sk)->recursion_level);
1384 }
1385 }
1386 if (unlikely(max_level > MAX_RECURSION_LEVEL))
1387 return -ETOOMANYREFS;
6209344f
MS
1388
1389 /*
1390 * Need to duplicate file references for the sake of garbage
1391 * collection. Otherwise a socket in the fps might become a
1392 * candidate for GC while the skb is not yet queued.
1393 */
1394 UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1395 if (!UNIXCB(skb).fp)
1396 return -ENOMEM;
1397
25888e30
ED
1398 if (unix_sock_count) {
1399 for (i = scm->fp->count - 1; i >= 0; i--)
1400 unix_inflight(scm->fp->fp[i]);
1401 }
1402 return max_level;
1da177e4
LT
1403}
1404
f78a5fda 1405static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
7361c36c
EB
1406{
1407 int err = 0;
16e57262 1408
f78a5fda 1409 UNIXCB(skb).pid = get_pid(scm->pid);
6b0ee8c0
EB
1410 UNIXCB(skb).uid = scm->creds.uid;
1411 UNIXCB(skb).gid = scm->creds.gid;
7361c36c
EB
1412 UNIXCB(skb).fp = NULL;
1413 if (scm->fp && send_fds)
1414 err = unix_attach_fds(scm, skb);
1415
1416 skb->destructor = unix_destruct_scm;
1417 return err;
1418}
1419
16e57262
ED
1420/*
1421 * Some apps rely on write() giving SCM_CREDENTIALS
1422 * We include credentials if source or destination socket
1423 * asserted SOCK_PASSCRED.
1424 */
1425static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1426 const struct sock *other)
1427{
6b0ee8c0 1428 if (UNIXCB(skb).pid)
16e57262
ED
1429 return;
1430 if (test_bit(SOCK_PASSCRED, &sock->flags) ||
25da0e3e
EB
1431 !other->sk_socket ||
1432 test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
16e57262 1433 UNIXCB(skb).pid = get_pid(task_tgid(current));
6e0895c2 1434 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
16e57262
ED
1435 }
1436}
1437
1da177e4
LT
1438/*
1439 * Send AF_UNIX data.
1440 */
1441
1b784140
YX
1442static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1443 size_t len)
1da177e4 1444{
1da177e4 1445 struct sock *sk = sock->sk;
3b1e0a65 1446 struct net *net = sock_net(sk);
1da177e4 1447 struct unix_sock *u = unix_sk(sk);
342dfc30 1448 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1da177e4
LT
1449 struct sock *other = NULL;
1450 int namelen = 0; /* fake GCC */
1451 int err;
95c96174 1452 unsigned int hash;
f78a5fda 1453 struct sk_buff *skb;
1da177e4 1454 long timeo;
7cc05662 1455 struct scm_cookie scm;
25888e30 1456 int max_level;
eb6a2481 1457 int data_len = 0;
1da177e4 1458
5f23b734 1459 wait_for_unix_gc();
7cc05662 1460 err = scm_send(sock, msg, &scm, false);
1da177e4
LT
1461 if (err < 0)
1462 return err;
1463
1464 err = -EOPNOTSUPP;
1465 if (msg->msg_flags&MSG_OOB)
1466 goto out;
1467
1468 if (msg->msg_namelen) {
1469 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1470 if (err < 0)
1471 goto out;
1472 namelen = err;
1473 } else {
1474 sunaddr = NULL;
1475 err = -ENOTCONN;
1476 other = unix_peer_get(sk);
1477 if (!other)
1478 goto out;
1479 }
1480
f64f9e71
JP
1481 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1482 && (err = unix_autobind(sock)) != 0)
1da177e4
LT
1483 goto out;
1484
1485 err = -EMSGSIZE;
1486 if (len > sk->sk_sndbuf - 32)
1487 goto out;
1488
31ff6aa5 1489 if (len > SKB_MAX_ALLOC) {
eb6a2481
ED
1490 data_len = min_t(size_t,
1491 len - SKB_MAX_ALLOC,
1492 MAX_SKB_FRAGS * PAGE_SIZE);
31ff6aa5
KT
1493 data_len = PAGE_ALIGN(data_len);
1494
1495 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1496 }
eb6a2481
ED
1497
1498 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
28d64271
ED
1499 msg->msg_flags & MSG_DONTWAIT, &err,
1500 PAGE_ALLOC_COSTLY_ORDER);
e27dfcea 1501 if (skb == NULL)
1da177e4
LT
1502 goto out;
1503
7cc05662 1504 err = unix_scm_to_skb(&scm, skb, true);
25888e30 1505 if (err < 0)
7361c36c 1506 goto out_free;
25888e30 1507 max_level = err + 1;
7cc05662 1508 unix_get_secdata(&scm, skb);
877ce7c1 1509
eb6a2481
ED
1510 skb_put(skb, len - data_len);
1511 skb->data_len = data_len;
1512 skb->len = len;
c0371da6 1513 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1da177e4
LT
1514 if (err)
1515 goto out_free;
1516
1517 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1518
1519restart:
1520 if (!other) {
1521 err = -ECONNRESET;
1522 if (sunaddr == NULL)
1523 goto out_free;
1524
097e66c5 1525 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1da177e4 1526 hash, &err);
e27dfcea 1527 if (other == NULL)
1da177e4
LT
1528 goto out_free;
1529 }
1530
d6ae3bae
AC
1531 if (sk_filter(other, skb) < 0) {
1532 /* Toss the packet but do not return any error to the sender */
1533 err = len;
1534 goto out_free;
1535 }
1536
1c92b4e5 1537 unix_state_lock(other);
1da177e4
LT
1538 err = -EPERM;
1539 if (!unix_may_send(sk, other))
1540 goto out_unlock;
1541
1542 if (sock_flag(other, SOCK_DEAD)) {
1543 /*
1544 * Check with 1003.1g - what should
1545 * datagram error
1546 */
1c92b4e5 1547 unix_state_unlock(other);
1da177e4
LT
1548 sock_put(other);
1549
1550 err = 0;
1c92b4e5 1551 unix_state_lock(sk);
1da177e4 1552 if (unix_peer(sk) == other) {
e27dfcea 1553 unix_peer(sk) = NULL;
1c92b4e5 1554 unix_state_unlock(sk);
1da177e4
LT
1555
1556 unix_dgram_disconnected(sk, other);
1557 sock_put(other);
1558 err = -ECONNREFUSED;
1559 } else {
1c92b4e5 1560 unix_state_unlock(sk);
1da177e4
LT
1561 }
1562
1563 other = NULL;
1564 if (err)
1565 goto out_free;
1566 goto restart;
1567 }
1568
1569 err = -EPIPE;
1570 if (other->sk_shutdown & RCV_SHUTDOWN)
1571 goto out_unlock;
1572
1573 if (sk->sk_type != SOCK_SEQPACKET) {
1574 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1575 if (err)
1576 goto out_unlock;
1577 }
1578
3c73419c 1579 if (unix_peer(other) != sk && unix_recvq_full(other)) {
1da177e4
LT
1580 if (!timeo) {
1581 err = -EAGAIN;
1582 goto out_unlock;
1583 }
1584
1585 timeo = unix_wait_for_peer(other, timeo);
1586
1587 err = sock_intr_errno(timeo);
1588 if (signal_pending(current))
1589 goto out_free;
1590
1591 goto restart;
1592 }
1593
3f66116e
AC
1594 if (sock_flag(other, SOCK_RCVTSTAMP))
1595 __net_timestamp(skb);
16e57262 1596 maybe_add_creds(skb, sock, other);
1da177e4 1597 skb_queue_tail(&other->sk_receive_queue, skb);
25888e30
ED
1598 if (max_level > unix_sk(other)->recursion_level)
1599 unix_sk(other)->recursion_level = max_level;
1c92b4e5 1600 unix_state_unlock(other);
676d2369 1601 other->sk_data_ready(other);
1da177e4 1602 sock_put(other);
7cc05662 1603 scm_destroy(&scm);
1da177e4
LT
1604 return len;
1605
1606out_unlock:
1c92b4e5 1607 unix_state_unlock(other);
1da177e4
LT
1608out_free:
1609 kfree_skb(skb);
1610out:
1611 if (other)
1612 sock_put(other);
7cc05662 1613 scm_destroy(&scm);
1da177e4
LT
1614 return err;
1615}
1616
e370a723
ED
1617/* We use paged skbs for stream sockets, and limit occupancy to 32768
1618 * bytes, and a minimun of a full page.
1619 */
1620#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
ac7bfa62 1621
1b784140
YX
1622static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1623 size_t len)
1da177e4 1624{
1da177e4
LT
1625 struct sock *sk = sock->sk;
1626 struct sock *other = NULL;
6eba6a37 1627 int err, size;
f78a5fda 1628 struct sk_buff *skb;
e27dfcea 1629 int sent = 0;
7cc05662 1630 struct scm_cookie scm;
8ba69ba6 1631 bool fds_sent = false;
25888e30 1632 int max_level;
e370a723 1633 int data_len;
1da177e4 1634
5f23b734 1635 wait_for_unix_gc();
7cc05662 1636 err = scm_send(sock, msg, &scm, false);
1da177e4
LT
1637 if (err < 0)
1638 return err;
1639
1640 err = -EOPNOTSUPP;
1641 if (msg->msg_flags&MSG_OOB)
1642 goto out_err;
1643
1644 if (msg->msg_namelen) {
1645 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1646 goto out_err;
1647 } else {
1da177e4 1648 err = -ENOTCONN;
830a1e5c 1649 other = unix_peer(sk);
1da177e4
LT
1650 if (!other)
1651 goto out_err;
1652 }
1653
1654 if (sk->sk_shutdown & SEND_SHUTDOWN)
1655 goto pipe_err;
1656
6eba6a37 1657 while (sent < len) {
e370a723 1658 size = len - sent;
1da177e4
LT
1659
1660 /* Keep two messages in the pipe so it schedules better */
e370a723 1661 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1da177e4 1662
e370a723
ED
1663 /* allow fallback to order-0 allocations */
1664 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
ac7bfa62 1665
e370a723 1666 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1da177e4 1667
31ff6aa5
KT
1668 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1669
e370a723 1670 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
28d64271
ED
1671 msg->msg_flags & MSG_DONTWAIT, &err,
1672 get_order(UNIX_SKB_FRAGS_SZ));
e370a723 1673 if (!skb)
1da177e4
LT
1674 goto out_err;
1675
f78a5fda 1676 /* Only send the fds in the first buffer */
7cc05662 1677 err = unix_scm_to_skb(&scm, skb, !fds_sent);
25888e30 1678 if (err < 0) {
7361c36c 1679 kfree_skb(skb);
f78a5fda 1680 goto out_err;
6209344f 1681 }
25888e30 1682 max_level = err + 1;
7361c36c 1683 fds_sent = true;
1da177e4 1684
e370a723
ED
1685 skb_put(skb, size - data_len);
1686 skb->data_len = data_len;
1687 skb->len = size;
c0371da6 1688 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
6eba6a37 1689 if (err) {
1da177e4 1690 kfree_skb(skb);
f78a5fda 1691 goto out_err;
1da177e4
LT
1692 }
1693
1c92b4e5 1694 unix_state_lock(other);
1da177e4
LT
1695
1696 if (sock_flag(other, SOCK_DEAD) ||
1697 (other->sk_shutdown & RCV_SHUTDOWN))
1698 goto pipe_err_free;
1699
16e57262 1700 maybe_add_creds(skb, sock, other);
1da177e4 1701 skb_queue_tail(&other->sk_receive_queue, skb);
25888e30
ED
1702 if (max_level > unix_sk(other)->recursion_level)
1703 unix_sk(other)->recursion_level = max_level;
1c92b4e5 1704 unix_state_unlock(other);
676d2369 1705 other->sk_data_ready(other);
e27dfcea 1706 sent += size;
1da177e4 1707 }
1da177e4 1708
7cc05662 1709 scm_destroy(&scm);
1da177e4
LT
1710
1711 return sent;
1712
1713pipe_err_free:
1c92b4e5 1714 unix_state_unlock(other);
1da177e4
LT
1715 kfree_skb(skb);
1716pipe_err:
6eba6a37
ED
1717 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1718 send_sig(SIGPIPE, current, 0);
1da177e4
LT
1719 err = -EPIPE;
1720out_err:
7cc05662 1721 scm_destroy(&scm);
1da177e4
LT
1722 return sent ? : err;
1723}
1724
869e7c62
HFS
1725static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1726 int offset, size_t size, int flags)
1727{
1728 int err = 0;
1729 bool send_sigpipe = true;
1730 struct sock *other, *sk = socket->sk;
1731 struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1732
1733 if (flags & MSG_OOB)
1734 return -EOPNOTSUPP;
1735
1736 other = unix_peer(sk);
1737 if (!other || sk->sk_state != TCP_ESTABLISHED)
1738 return -ENOTCONN;
1739
1740 if (false) {
1741alloc_skb:
1742 unix_state_unlock(other);
1743 mutex_unlock(&unix_sk(other)->readlock);
1744 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1745 &err, 0);
1746 if (!newskb)
1747 return err;
1748 }
1749
1750 /* we must acquire readlock as we modify already present
1751 * skbs in the sk_receive_queue and mess with skb->len
1752 */
1753 err = mutex_lock_interruptible(&unix_sk(other)->readlock);
1754 if (err) {
1755 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1756 send_sigpipe = false;
1757 goto err;
1758 }
1759
1760 if (sk->sk_shutdown & SEND_SHUTDOWN) {
1761 err = -EPIPE;
1762 goto err_unlock;
1763 }
1764
1765 unix_state_lock(other);
1766
1767 if (sock_flag(other, SOCK_DEAD) ||
1768 other->sk_shutdown & RCV_SHUTDOWN) {
1769 err = -EPIPE;
1770 goto err_state_unlock;
1771 }
1772
1773 skb = skb_peek_tail(&other->sk_receive_queue);
1774 if (tail && tail == skb) {
1775 skb = newskb;
1776 } else if (!skb) {
1777 if (newskb)
1778 skb = newskb;
1779 else
1780 goto alloc_skb;
1781 } else if (newskb) {
1782 /* this is fast path, we don't necessarily need to
1783 * call to kfree_skb even though with newskb == NULL
1784 * this - does no harm
1785 */
1786 consume_skb(newskb);
1787 }
1788
1789 if (skb_append_pagefrags(skb, page, offset, size)) {
1790 tail = skb;
1791 goto alloc_skb;
1792 }
1793
1794 skb->len += size;
1795 skb->data_len += size;
1796 skb->truesize += size;
1797 atomic_add(size, &sk->sk_wmem_alloc);
1798
1799 if (newskb)
1800 __skb_queue_tail(&other->sk_receive_queue, newskb);
1801
1802 unix_state_unlock(other);
1803 mutex_unlock(&unix_sk(other)->readlock);
1804
1805 other->sk_data_ready(other);
1806
1807 return size;
1808
1809err_state_unlock:
1810 unix_state_unlock(other);
1811err_unlock:
1812 mutex_unlock(&unix_sk(other)->readlock);
1813err:
1814 kfree_skb(newskb);
1815 if (send_sigpipe && !(flags & MSG_NOSIGNAL))
1816 send_sig(SIGPIPE, current, 0);
1817 return err;
1818}
1819
1b784140
YX
1820static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
1821 size_t len)
1da177e4
LT
1822{
1823 int err;
1824 struct sock *sk = sock->sk;
ac7bfa62 1825
1da177e4
LT
1826 err = sock_error(sk);
1827 if (err)
1828 return err;
1829
1830 if (sk->sk_state != TCP_ESTABLISHED)
1831 return -ENOTCONN;
1832
1833 if (msg->msg_namelen)
1834 msg->msg_namelen = 0;
1835
1b784140 1836 return unix_dgram_sendmsg(sock, msg, len);
1da177e4 1837}
ac7bfa62 1838
1b784140
YX
1839static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
1840 size_t size, int flags)
a05d2ad1
EB
1841{
1842 struct sock *sk = sock->sk;
1843
1844 if (sk->sk_state != TCP_ESTABLISHED)
1845 return -ENOTCONN;
1846
1b784140 1847 return unix_dgram_recvmsg(sock, msg, size, flags);
a05d2ad1
EB
1848}
1849
1da177e4
LT
1850static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1851{
1852 struct unix_sock *u = unix_sk(sk);
1853
1da177e4
LT
1854 if (u->addr) {
1855 msg->msg_namelen = u->addr->len;
1856 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1857 }
1858}
1859
1b784140
YX
1860static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
1861 size_t size, int flags)
1da177e4 1862{
7cc05662 1863 struct scm_cookie scm;
1da177e4
LT
1864 struct sock *sk = sock->sk;
1865 struct unix_sock *u = unix_sk(sk);
1866 int noblock = flags & MSG_DONTWAIT;
1867 struct sk_buff *skb;
1868 int err;
f55bb7f9 1869 int peeked, skip;
1da177e4
LT
1870
1871 err = -EOPNOTSUPP;
1872 if (flags&MSG_OOB)
1873 goto out;
1874
b3ca9b02 1875 err = mutex_lock_interruptible(&u->readlock);
de144391
ED
1876 if (unlikely(err)) {
1877 /* recvmsg() in non blocking mode is supposed to return -EAGAIN
1878 * sk_rcvtimeo is not honored by mutex_lock_interruptible()
1879 */
1880 err = noblock ? -EAGAIN : -ERESTARTSYS;
b3ca9b02
RW
1881 goto out;
1882 }
1da177e4 1883
f55bb7f9
PE
1884 skip = sk_peek_offset(sk, flags);
1885
1886 skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err);
0a112258
FZ
1887 if (!skb) {
1888 unix_state_lock(sk);
1889 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1890 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1891 (sk->sk_shutdown & RCV_SHUTDOWN))
1892 err = 0;
1893 unix_state_unlock(sk);
1da177e4 1894 goto out_unlock;
0a112258 1895 }
1da177e4 1896
67426b75
ED
1897 wake_up_interruptible_sync_poll(&u->peer_wait,
1898 POLLOUT | POLLWRNORM | POLLWRBAND);
1da177e4
LT
1899
1900 if (msg->msg_name)
1901 unix_copy_addr(msg, skb->sk);
1902
f55bb7f9
PE
1903 if (size > skb->len - skip)
1904 size = skb->len - skip;
1905 else if (size < skb->len - skip)
1da177e4
LT
1906 msg->msg_flags |= MSG_TRUNC;
1907
51f3d02b 1908 err = skb_copy_datagram_msg(skb, skip, msg, size);
1da177e4
LT
1909 if (err)
1910 goto out_free;
1911
3f66116e
AC
1912 if (sock_flag(sk, SOCK_RCVTSTAMP))
1913 __sock_recv_timestamp(msg, sk, skb);
1914
7cc05662
CH
1915 memset(&scm, 0, sizeof(scm));
1916
1917 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
1918 unix_set_secdata(&scm, skb);
1da177e4 1919
6eba6a37 1920 if (!(flags & MSG_PEEK)) {
1da177e4 1921 if (UNIXCB(skb).fp)
7cc05662 1922 unix_detach_fds(&scm, skb);
f55bb7f9
PE
1923
1924 sk_peek_offset_bwd(sk, skb->len);
6eba6a37 1925 } else {
1da177e4
LT
1926 /* It is questionable: on PEEK we could:
1927 - do not return fds - good, but too simple 8)
1928 - return fds, and do not return them on read (old strategy,
1929 apparently wrong)
1930 - clone fds (I chose it for now, it is the most universal
1931 solution)
ac7bfa62
YH
1932
1933 POSIX 1003.1g does not actually define this clearly
1934 at all. POSIX 1003.1g doesn't define a lot of things
1935 clearly however!
1936
1da177e4 1937 */
f55bb7f9
PE
1938
1939 sk_peek_offset_fwd(sk, size);
1940
1da177e4 1941 if (UNIXCB(skb).fp)
7cc05662 1942 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
1da177e4 1943 }
9f6f9af7 1944 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
1da177e4 1945
7cc05662 1946 scm_recv(sock, msg, &scm, flags);
1da177e4
LT
1947
1948out_free:
6eba6a37 1949 skb_free_datagram(sk, skb);
1da177e4 1950out_unlock:
57b47a53 1951 mutex_unlock(&u->readlock);
1da177e4
LT
1952out:
1953 return err;
1954}
1955
1956/*
79f632c7 1957 * Sleep until more data has arrived. But check for races..
1da177e4 1958 */
79f632c7
BP
1959static long unix_stream_data_wait(struct sock *sk, long timeo,
1960 struct sk_buff *last)
1da177e4
LT
1961{
1962 DEFINE_WAIT(wait);
1963
1c92b4e5 1964 unix_state_lock(sk);
1da177e4
LT
1965
1966 for (;;) {
aa395145 1967 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1da177e4 1968
79f632c7 1969 if (skb_peek_tail(&sk->sk_receive_queue) != last ||
1da177e4
LT
1970 sk->sk_err ||
1971 (sk->sk_shutdown & RCV_SHUTDOWN) ||
1972 signal_pending(current) ||
1973 !timeo)
1974 break;
1975
1976 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1c92b4e5 1977 unix_state_unlock(sk);
2b15af6f 1978 timeo = freezable_schedule_timeout(timeo);
1c92b4e5 1979 unix_state_lock(sk);
1da177e4
LT
1980 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1981 }
1982
aa395145 1983 finish_wait(sk_sleep(sk), &wait);
1c92b4e5 1984 unix_state_unlock(sk);
1da177e4
LT
1985 return timeo;
1986}
1987
e370a723
ED
1988static unsigned int unix_skb_len(const struct sk_buff *skb)
1989{
1990 return skb->len - UNIXCB(skb).consumed;
1991}
1992
1b784140
YX
1993static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
1994 size_t size, int flags)
1da177e4 1995{
7cc05662 1996 struct scm_cookie scm;
1da177e4
LT
1997 struct sock *sk = sock->sk;
1998 struct unix_sock *u = unix_sk(sk);
342dfc30 1999 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1da177e4 2000 int copied = 0;
de144391 2001 int noblock = flags & MSG_DONTWAIT;
1da177e4
LT
2002 int check_creds = 0;
2003 int target;
2004 int err = 0;
2005 long timeo;
fc0d7536 2006 int skip;
1da177e4
LT
2007
2008 err = -EINVAL;
2009 if (sk->sk_state != TCP_ESTABLISHED)
2010 goto out;
2011
2012 err = -EOPNOTSUPP;
2013 if (flags&MSG_OOB)
2014 goto out;
2015
2016 target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
de144391 2017 timeo = sock_rcvtimeo(sk, noblock);
1da177e4 2018
1da177e4
LT
2019 /* Lock the socket to prevent queue disordering
2020 * while sleeps in memcpy_tomsg
2021 */
2022
7cc05662 2023 memset(&scm, 0, sizeof(scm));
1da177e4 2024
b3ca9b02 2025 err = mutex_lock_interruptible(&u->readlock);
de144391
ED
2026 if (unlikely(err)) {
2027 /* recvmsg() in non blocking mode is supposed to return -EAGAIN
2028 * sk_rcvtimeo is not honored by mutex_lock_interruptible()
2029 */
2030 err = noblock ? -EAGAIN : -ERESTARTSYS;
b3ca9b02
RW
2031 goto out;
2032 }
1da177e4 2033
6eba6a37 2034 do {
1da177e4 2035 int chunk;
79f632c7 2036 struct sk_buff *skb, *last;
1da177e4 2037
3c0d2f37 2038 unix_state_lock(sk);
79f632c7 2039 last = skb = skb_peek(&sk->sk_receive_queue);
fc0d7536 2040again:
6eba6a37 2041 if (skb == NULL) {
25888e30 2042 unix_sk(sk)->recursion_level = 0;
1da177e4 2043 if (copied >= target)
3c0d2f37 2044 goto unlock;
1da177e4
LT
2045
2046 /*
2047 * POSIX 1003.1g mandates this order.
2048 */
ac7bfa62 2049
6eba6a37
ED
2050 err = sock_error(sk);
2051 if (err)
3c0d2f37 2052 goto unlock;
1da177e4 2053 if (sk->sk_shutdown & RCV_SHUTDOWN)
3c0d2f37
MS
2054 goto unlock;
2055
2056 unix_state_unlock(sk);
1da177e4
LT
2057 err = -EAGAIN;
2058 if (!timeo)
2059 break;
57b47a53 2060 mutex_unlock(&u->readlock);
1da177e4 2061
79f632c7 2062 timeo = unix_stream_data_wait(sk, timeo, last);
1da177e4 2063
b3ca9b02
RW
2064 if (signal_pending(current)
2065 || mutex_lock_interruptible(&u->readlock)) {
1da177e4
LT
2066 err = sock_intr_errno(timeo);
2067 goto out;
2068 }
b3ca9b02 2069
1da177e4 2070 continue;
3c0d2f37
MS
2071 unlock:
2072 unix_state_unlock(sk);
2073 break;
1da177e4 2074 }
fc0d7536 2075
79f632c7 2076 skip = sk_peek_offset(sk, flags);
e370a723
ED
2077 while (skip >= unix_skb_len(skb)) {
2078 skip -= unix_skb_len(skb);
79f632c7 2079 last = skb;
fc0d7536 2080 skb = skb_peek_next(skb, &sk->sk_receive_queue);
79f632c7
BP
2081 if (!skb)
2082 goto again;
fc0d7536
PE
2083 }
2084
3c0d2f37 2085 unix_state_unlock(sk);
1da177e4
LT
2086
2087 if (check_creds) {
2088 /* Never glue messages from different writers */
7cc05662
CH
2089 if ((UNIXCB(skb).pid != scm.pid) ||
2090 !uid_eq(UNIXCB(skb).uid, scm.creds.uid) ||
2091 !gid_eq(UNIXCB(skb).gid, scm.creds.gid))
1da177e4 2092 break;
0e82e7f6 2093 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
1da177e4 2094 /* Copy credentials */
7cc05662 2095 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
1da177e4
LT
2096 check_creds = 1;
2097 }
2098
2099 /* Copy address just once */
6eba6a37 2100 if (sunaddr) {
1da177e4
LT
2101 unix_copy_addr(msg, skb->sk);
2102 sunaddr = NULL;
2103 }
2104
e370a723 2105 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
51f3d02b
DM
2106 if (skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2107 msg, chunk)) {
1da177e4
LT
2108 if (copied == 0)
2109 copied = -EFAULT;
2110 break;
2111 }
2112 copied += chunk;
2113 size -= chunk;
2114
2115 /* Mark read part of skb as used */
6eba6a37 2116 if (!(flags & MSG_PEEK)) {
e370a723 2117 UNIXCB(skb).consumed += chunk;
1da177e4 2118
fc0d7536
PE
2119 sk_peek_offset_bwd(sk, chunk);
2120
1da177e4 2121 if (UNIXCB(skb).fp)
7cc05662 2122 unix_detach_fds(&scm, skb);
1da177e4 2123
e370a723 2124 if (unix_skb_len(skb))
1da177e4 2125 break;
1da177e4 2126
6f01fd6e 2127 skb_unlink(skb, &sk->sk_receive_queue);
70d4bf6d 2128 consume_skb(skb);
1da177e4 2129
7cc05662 2130 if (scm.fp)
1da177e4 2131 break;
6eba6a37 2132 } else {
1da177e4
LT
2133 /* It is questionable, see note in unix_dgram_recvmsg.
2134 */
2135 if (UNIXCB(skb).fp)
7cc05662 2136 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
1da177e4 2137
fc0d7536
PE
2138 sk_peek_offset_fwd(sk, chunk);
2139
1da177e4
LT
2140 break;
2141 }
2142 } while (size);
2143
57b47a53 2144 mutex_unlock(&u->readlock);
7cc05662 2145 scm_recv(sock, msg, &scm, flags);
1da177e4
LT
2146out:
2147 return copied ? : err;
2148}
2149
2150static int unix_shutdown(struct socket *sock, int mode)
2151{
2152 struct sock *sk = sock->sk;
2153 struct sock *other;
2154
fc61b928
XW
2155 if (mode < SHUT_RD || mode > SHUT_RDWR)
2156 return -EINVAL;
2157 /* This maps:
2158 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2159 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
2160 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2161 */
2162 ++mode;
7180a031
AC
2163
2164 unix_state_lock(sk);
2165 sk->sk_shutdown |= mode;
2166 other = unix_peer(sk);
2167 if (other)
2168 sock_hold(other);
2169 unix_state_unlock(sk);
2170 sk->sk_state_change(sk);
2171
2172 if (other &&
2173 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2174
2175 int peer_mode = 0;
2176
2177 if (mode&RCV_SHUTDOWN)
2178 peer_mode |= SEND_SHUTDOWN;
2179 if (mode&SEND_SHUTDOWN)
2180 peer_mode |= RCV_SHUTDOWN;
2181 unix_state_lock(other);
2182 other->sk_shutdown |= peer_mode;
2183 unix_state_unlock(other);
2184 other->sk_state_change(other);
2185 if (peer_mode == SHUTDOWN_MASK)
2186 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2187 else if (peer_mode & RCV_SHUTDOWN)
2188 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1da177e4 2189 }
7180a031
AC
2190 if (other)
2191 sock_put(other);
2192
1da177e4
LT
2193 return 0;
2194}
2195
885ee74d
PE
2196long unix_inq_len(struct sock *sk)
2197{
2198 struct sk_buff *skb;
2199 long amount = 0;
2200
2201 if (sk->sk_state == TCP_LISTEN)
2202 return -EINVAL;
2203
2204 spin_lock(&sk->sk_receive_queue.lock);
2205 if (sk->sk_type == SOCK_STREAM ||
2206 sk->sk_type == SOCK_SEQPACKET) {
2207 skb_queue_walk(&sk->sk_receive_queue, skb)
e370a723 2208 amount += unix_skb_len(skb);
885ee74d
PE
2209 } else {
2210 skb = skb_peek(&sk->sk_receive_queue);
2211 if (skb)
2212 amount = skb->len;
2213 }
2214 spin_unlock(&sk->sk_receive_queue.lock);
2215
2216 return amount;
2217}
2218EXPORT_SYMBOL_GPL(unix_inq_len);
2219
2220long unix_outq_len(struct sock *sk)
2221{
2222 return sk_wmem_alloc_get(sk);
2223}
2224EXPORT_SYMBOL_GPL(unix_outq_len);
2225
1da177e4
LT
2226static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2227{
2228 struct sock *sk = sock->sk;
e27dfcea 2229 long amount = 0;
1da177e4
LT
2230 int err;
2231
6eba6a37
ED
2232 switch (cmd) {
2233 case SIOCOUTQ:
885ee74d 2234 amount = unix_outq_len(sk);
6eba6a37
ED
2235 err = put_user(amount, (int __user *)arg);
2236 break;
2237 case SIOCINQ:
885ee74d
PE
2238 amount = unix_inq_len(sk);
2239 if (amount < 0)
2240 err = amount;
2241 else
1da177e4 2242 err = put_user(amount, (int __user *)arg);
885ee74d 2243 break;
6eba6a37
ED
2244 default:
2245 err = -ENOIOCTLCMD;
2246 break;
1da177e4
LT
2247 }
2248 return err;
2249}
2250
6eba6a37 2251static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
1da177e4
LT
2252{
2253 struct sock *sk = sock->sk;
2254 unsigned int mask;
2255
aa395145 2256 sock_poll_wait(file, sk_sleep(sk), wait);
1da177e4
LT
2257 mask = 0;
2258
2259 /* exceptional events? */
2260 if (sk->sk_err)
2261 mask |= POLLERR;
2262 if (sk->sk_shutdown == SHUTDOWN_MASK)
2263 mask |= POLLHUP;
f348d70a 2264 if (sk->sk_shutdown & RCV_SHUTDOWN)
db40980f 2265 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
1da177e4
LT
2266
2267 /* readable? */
db40980f 2268 if (!skb_queue_empty(&sk->sk_receive_queue))
1da177e4
LT
2269 mask |= POLLIN | POLLRDNORM;
2270
2271 /* Connection-based need to check for termination and startup */
6eba6a37
ED
2272 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2273 sk->sk_state == TCP_CLOSE)
1da177e4
LT
2274 mask |= POLLHUP;
2275
2276 /*
2277 * we set writable also when the other side has shut down the
2278 * connection. This prevents stuck sockets.
2279 */
2280 if (unix_writable(sk))
2281 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2282
2283 return mask;
2284}
2285
ec0d215f
RW
2286static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2287 poll_table *wait)
3c73419c 2288{
ec0d215f
RW
2289 struct sock *sk = sock->sk, *other;
2290 unsigned int mask, writable;
3c73419c 2291
aa395145 2292 sock_poll_wait(file, sk_sleep(sk), wait);
3c73419c
RW
2293 mask = 0;
2294
2295 /* exceptional events? */
2296 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
7d4c04fc 2297 mask |= POLLERR |
8facd5fb 2298 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
7d4c04fc 2299
3c73419c 2300 if (sk->sk_shutdown & RCV_SHUTDOWN)
5456f09a 2301 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
3c73419c
RW
2302 if (sk->sk_shutdown == SHUTDOWN_MASK)
2303 mask |= POLLHUP;
2304
2305 /* readable? */
5456f09a 2306 if (!skb_queue_empty(&sk->sk_receive_queue))
3c73419c
RW
2307 mask |= POLLIN | POLLRDNORM;
2308
2309 /* Connection-based need to check for termination and startup */
2310 if (sk->sk_type == SOCK_SEQPACKET) {
2311 if (sk->sk_state == TCP_CLOSE)
2312 mask |= POLLHUP;
2313 /* connection hasn't started yet? */
2314 if (sk->sk_state == TCP_SYN_SENT)
2315 return mask;
2316 }
2317
973a34aa 2318 /* No write status requested, avoid expensive OUT tests. */
626cf236 2319 if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
973a34aa
ED
2320 return mask;
2321
ec0d215f 2322 writable = unix_writable(sk);
5456f09a
ED
2323 other = unix_peer_get(sk);
2324 if (other) {
2325 if (unix_peer(other) != sk) {
2326 sock_poll_wait(file, &unix_sk(other)->peer_wait, wait);
2327 if (unix_recvq_full(other))
2328 writable = 0;
ec0d215f 2329 }
5456f09a 2330 sock_put(other);
ec0d215f
RW
2331 }
2332
2333 if (writable)
3c73419c
RW
2334 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2335 else
2336 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2337
3c73419c
RW
2338 return mask;
2339}
1da177e4
LT
2340
2341#ifdef CONFIG_PROC_FS
a53eb3fe 2342
7123aaa3
ED
2343#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2344
2345#define get_bucket(x) ((x) >> BUCKET_SPACE)
2346#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2347#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
a53eb3fe 2348
7123aaa3 2349static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
1da177e4 2350{
7123aaa3
ED
2351 unsigned long offset = get_offset(*pos);
2352 unsigned long bucket = get_bucket(*pos);
2353 struct sock *sk;
2354 unsigned long count = 0;
1da177e4 2355
7123aaa3
ED
2356 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2357 if (sock_net(sk) != seq_file_net(seq))
097e66c5 2358 continue;
7123aaa3
ED
2359 if (++count == offset)
2360 break;
2361 }
2362
2363 return sk;
2364}
2365
2366static struct sock *unix_next_socket(struct seq_file *seq,
2367 struct sock *sk,
2368 loff_t *pos)
2369{
2370 unsigned long bucket;
2371
2372 while (sk > (struct sock *)SEQ_START_TOKEN) {
2373 sk = sk_next(sk);
2374 if (!sk)
2375 goto next_bucket;
2376 if (sock_net(sk) == seq_file_net(seq))
2377 return sk;
1da177e4 2378 }
7123aaa3
ED
2379
2380 do {
2381 sk = unix_from_bucket(seq, pos);
2382 if (sk)
2383 return sk;
2384
2385next_bucket:
2386 bucket = get_bucket(*pos) + 1;
2387 *pos = set_bucket_offset(bucket, 1);
2388 } while (bucket < ARRAY_SIZE(unix_socket_table));
2389
1da177e4
LT
2390 return NULL;
2391}
2392
1da177e4 2393static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
9a429c49 2394 __acquires(unix_table_lock)
1da177e4 2395{
fbe9cc4a 2396 spin_lock(&unix_table_lock);
7123aaa3
ED
2397
2398 if (!*pos)
2399 return SEQ_START_TOKEN;
2400
2401 if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2402 return NULL;
2403
2404 return unix_next_socket(seq, NULL, pos);
1da177e4
LT
2405}
2406
2407static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2408{
2409 ++*pos;
7123aaa3 2410 return unix_next_socket(seq, v, pos);
1da177e4
LT
2411}
2412
2413static void unix_seq_stop(struct seq_file *seq, void *v)
9a429c49 2414 __releases(unix_table_lock)
1da177e4 2415{
fbe9cc4a 2416 spin_unlock(&unix_table_lock);
1da177e4
LT
2417}
2418
2419static int unix_seq_show(struct seq_file *seq, void *v)
2420{
ac7bfa62 2421
b9f3124f 2422 if (v == SEQ_START_TOKEN)
1da177e4
LT
2423 seq_puts(seq, "Num RefCount Protocol Flags Type St "
2424 "Inode Path\n");
2425 else {
2426 struct sock *s = v;
2427 struct unix_sock *u = unix_sk(s);
1c92b4e5 2428 unix_state_lock(s);
1da177e4 2429
71338aa7 2430 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
1da177e4
LT
2431 s,
2432 atomic_read(&s->sk_refcnt),
2433 0,
2434 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2435 s->sk_type,
2436 s->sk_socket ?
2437 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2438 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2439 sock_i_ino(s));
2440
2441 if (u->addr) {
2442 int i, len;
2443 seq_putc(seq, ' ');
2444
2445 i = 0;
2446 len = u->addr->len - sizeof(short);
2447 if (!UNIX_ABSTRACT(s))
2448 len--;
2449 else {
2450 seq_putc(seq, '@');
2451 i++;
2452 }
2453 for ( ; i < len; i++)
2454 seq_putc(seq, u->addr->name->sun_path[i]);
2455 }
1c92b4e5 2456 unix_state_unlock(s);
1da177e4
LT
2457 seq_putc(seq, '\n');
2458 }
2459
2460 return 0;
2461}
2462
56b3d975 2463static const struct seq_operations unix_seq_ops = {
1da177e4
LT
2464 .start = unix_seq_start,
2465 .next = unix_seq_next,
2466 .stop = unix_seq_stop,
2467 .show = unix_seq_show,
2468};
2469
1da177e4
LT
2470static int unix_seq_open(struct inode *inode, struct file *file)
2471{
e372c414 2472 return seq_open_net(inode, file, &unix_seq_ops,
8b51b064 2473 sizeof(struct seq_net_private));
1da177e4
LT
2474}
2475
da7071d7 2476static const struct file_operations unix_seq_fops = {
1da177e4
LT
2477 .owner = THIS_MODULE,
2478 .open = unix_seq_open,
2479 .read = seq_read,
2480 .llseek = seq_lseek,
e372c414 2481 .release = seq_release_net,
1da177e4
LT
2482};
2483
2484#endif
2485
ec1b4cf7 2486static const struct net_proto_family unix_family_ops = {
1da177e4
LT
2487 .family = PF_UNIX,
2488 .create = unix_create,
2489 .owner = THIS_MODULE,
2490};
2491
097e66c5 2492
2c8c1e72 2493static int __net_init unix_net_init(struct net *net)
097e66c5
DL
2494{
2495 int error = -ENOMEM;
2496
a0a53c8b 2497 net->unx.sysctl_max_dgram_qlen = 10;
1597fbc0
PE
2498 if (unix_sysctl_register(net))
2499 goto out;
d392e497 2500
097e66c5 2501#ifdef CONFIG_PROC_FS
d4beaa66 2502 if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
1597fbc0 2503 unix_sysctl_unregister(net);
097e66c5 2504 goto out;
1597fbc0 2505 }
097e66c5
DL
2506#endif
2507 error = 0;
2508out:
48dcc33e 2509 return error;
097e66c5
DL
2510}
2511
2c8c1e72 2512static void __net_exit unix_net_exit(struct net *net)
097e66c5 2513{
1597fbc0 2514 unix_sysctl_unregister(net);
ece31ffd 2515 remove_proc_entry("unix", net->proc_net);
097e66c5
DL
2516}
2517
2518static struct pernet_operations unix_net_ops = {
2519 .init = unix_net_init,
2520 .exit = unix_net_exit,
2521};
2522
1da177e4
LT
2523static int __init af_unix_init(void)
2524{
2525 int rc = -1;
1da177e4 2526
b4fff5f8 2527 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
1da177e4
LT
2528
2529 rc = proto_register(&unix_proto, 1);
ac7bfa62 2530 if (rc != 0) {
5cc208be 2531 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
1da177e4
LT
2532 goto out;
2533 }
2534
2535 sock_register(&unix_family_ops);
097e66c5 2536 register_pernet_subsys(&unix_net_ops);
1da177e4
LT
2537out:
2538 return rc;
2539}
2540
2541static void __exit af_unix_exit(void)
2542{
2543 sock_unregister(PF_UNIX);
1da177e4 2544 proto_unregister(&unix_proto);
097e66c5 2545 unregister_pernet_subsys(&unix_net_ops);
1da177e4
LT
2546}
2547
3d366960
DW
2548/* Earlier than device_initcall() so that other drivers invoking
2549 request_module() don't end up in a loop when modprobe tries
2550 to use a UNIX socket. But later than subsys_initcall() because
2551 we depend on stuff initialised there */
2552fs_initcall(af_unix_init);
1da177e4
LT
2553module_exit(af_unix_exit);
2554
2555MODULE_LICENSE("GPL");
2556MODULE_ALIAS_NETPROTO(PF_UNIX);